In [27]:
!pip install efficientnet_pytorch



In [52]:
from torch.utils.data import Subset
def test_train_val_split(dataset, filepath="/kaggle/input/cub-200-2011/CUB_200_2011/train_test_split.txt", train_size = 0.9):
    
    # Read the train_test_split file
    split_file = open("/kaggle/input/cub-200-2011/CUB_200_2011/train_test_split.txt")
    flags = split_file.readlines()
    N = len(flags)
    i = 0
    flags = [int(flags[i].split('\n')[0].split(" ")[1]) for i in range(N)]
    
    # Store the indices of training and testing data
    index1 = []
    index0 = []

    for i in range(N):
        if(flags[i]==0):
            index0.append(i)
        else:
            index1.append(i)
    
    # Split the dataset based on the train_test split
    test_dataset = Subset(dataset, index0)
    train_val_dataset = Subset(dataset, index1)
    
    # Split the training dataset into training and validation set
    train_count = int(train_size * len(train_val_dataset))
    val_count  = len(train_val_dataset) - train_count

    seed = torch.Generator().manual_seed(42)
    train_dataset, val_dataset = random_split(train_val_dataset, [train_count, val_count], generator=seed)
    
    return train_dataset, val_dataset, test_dataset

In [53]:
# Testing loop
def test(model, test_loader, device):
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to CUDA if available
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = 100 * test_correct / test_total
    print(f'Testing Accuracy: {test_accuracy:.2f}%, Epochs: {epoch}')
    f.write(f'Testing Accuracy: {test_accuracy:.2f}%, Epochs: {epoch}\n')

In [54]:
# Import important libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from efficientnet_pytorch import EfficientNet
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import StepLR
import copy

In [55]:
# Plots
TRAINING_LOSS = []
VALIDATION_LOSS = []
VALIDATION_ACCURACY = []
LEARNING_RATE = []

f = open("/kaggle/working/Log.txt", 'a')
f.truncate(0)
f.write("\n\n\n\n\n")

# If GPU is available then use GPU else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define data transforms
    # Resize images to 224x224
    # Normalize
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Dataset path
dataset = datasets.ImageFolder(root='/kaggle/input/cub-200-2011/CUB_200_2011/images', transform=transform)

train_dataset, val_dataset, test_dataset = test_train_val_split(dataset)

# Dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define EfficientNet-B2 model
model = EfficientNet.from_pretrained('efficientnet-b2')
# Set the number of classes to 200 
model._fc=torch.nn.Linear(in_features=model._fc.in_features, out_features=200)
# Freeze all the wights of the model
for name, param in model.named_parameters():
    param.requires_grad = False
# Un-freeze the last block, fully connected layer and batch normalization layers   
for name, param in model.named_parameters():
    if (name.split('.')[0] == "_blocks" and int(name.split('.')[1]) >= len(model._blocks) - 1) or ("fc" in name) or ("bn" in name) :  # Fix first 15 blocks
        param.requires_grad = True
# model._fc = nn.Sequential(
#     nn.Linear(model._fc.in_features, 200),  # Add a fully connected layer
#     nn.ReLU(inplace=True),  # Add activation function
#     nn.Dropout(0.2),  # Add dropout
#     )
# for name, param in model.named_parameters():
#     print(name)


best_model = copy.deepcopy(model)

total_params = sum(p.numel() for p in model.parameters())
print(f"\nTotal number of parameters in the model: {total_params}")
f.write(f"\nTotal number of parameters in the model: {total_params}\n")


trainable_parameters = []
for param in model.parameters():
    if param.requires_grad == True:
        trainable_parameters.append(param)

number_of_trainable_parameters = sum(p.numel() for p in trainable_parameters)
print(f"\nTotal number of trainable parameters in the model: {number_of_trainable_parameters}\n")
f.write(f"Total number of trainable parameters in the model: {number_of_trainable_parameters}\n\n")


# Move the model to the GPU
model.to(device)

# Define loss function, optimizer and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1,gamma=0.9)


print_every = 17
total_batches = len(train_loader)
max_val_accuracy = 0
stop_count = 0
running_loss = 0.0
epoch_loss = 0.0
val_loss = 0.0
num_epochs = 20

# Training loop
for epoch in range(num_epochs):
    if(stop_count > 3 or epoch == num_epochs):
        break

    model.train()
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if (batch_idx + 1) % print_every == 0 or (batch_idx + 1) == total_batches:
            avg_loss = running_loss / print_every
            print(f'Training Epoch - [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{total_batches}], Loss: {avg_loss:.4f}')
            f.write(f'Training Epoch - [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{total_batches}], Loss: {avg_loss:.4f}\n')
            epoch_loss += running_loss
            running_loss = 0
    epoch_loss = epoch_loss / len(train_loader)
    TRAINING_LOSS.append(epoch_loss)
    
    print(f'Epoch Loss: {epoch_loss:.4f}')
    f.write(f'Epoch Loss: {epoch_loss:.4f}\n')
    
    epoch_loss = 0
    
    # Validation loop 
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to CUDA if available
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
    val_loss = val_loss/len(val_loader)       
    VALIDATION_LOSS.append(val_loss)
    
    val_accuracy = 100 * val_correct / val_total
    VALIDATION_ACCURACY.append(val_accuracy)
    
    LEARNING_RATE.append(optimizer.param_groups[-1]['lr'])
    
    # Stoping criteria
    if(val_accuracy > max_val_accuracy):
        max_val_accuracy = val_accuracy
        torch.save(model.state_dict(), '/kaggle/working//efficientnet_b0_cub.pth')
        stop_count = 0
    else:
        stop_count += 1
    
    scheduler.step()
    
    print(f'Validation - Epoch [{epoch+1}/{num_epochs}], Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%')
    f.write(f'Validation - Epoch [{epoch+1}/{num_epochs}], Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%\n')
    print(f"\nstop_count: {stop_count}")
    
#Testing loop
best_model.load_state_dict(torch.load("/kaggle/working/efficientnet_b0_cub.pth"))
best_model.to(device)
test(best_model, test_loader, device)
f.close()

Loaded pretrained weights for efficientnet-b2

Total number of parameters in the model: 7982794

Total number of trainable parameters in the model: 2229136

Training Epoch - [1/20], Batch [17/85], Loss: 5.2893
Training Epoch - [1/20], Batch [34/85], Loss: 5.2877
Training Epoch - [1/20], Batch [51/85], Loss: 5.2724
Training Epoch - [1/20], Batch [68/85], Loss: 5.2380
Training Epoch - [1/20], Batch [85/85], Loss: 5.2276
Epoch Loss: 5.2630
Validation - Epoch [1/20], Loss: 5.1757, Accuracy: 5.33%

stop_count: 0
Training Epoch - [2/20], Batch [17/85], Loss: 5.1399
Training Epoch - [2/20], Batch [34/85], Loss: 5.1349
Training Epoch - [2/20], Batch [51/85], Loss: 5.0935
Training Epoch - [2/20], Batch [68/85], Loss: 5.0303
Training Epoch - [2/20], Batch [85/85], Loss: 4.9901
Epoch Loss: 5.0777
Validation - Epoch [2/20], Loss: 5.3717, Accuracy: 15.33%

stop_count: 0
Training Epoch - [3/20], Batch [17/85], Loss: 4.8758
Training Epoch - [3/20], Batch [34/85], Loss: 4.7430
Training Epoch - [3/20],