NOTE:- I researched and felt that EfficientNetV2 might be a good fit for this task.

In [2]:
# Import Statements
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import wandb
import random

In [3]:
# Check if GPU is available
torch.cuda.is_available()

True

In [4]:
# Setup device agnostic code
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [5]:
# Mount the google drive (RUN ONLY IN COLAB)
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [6]:
# Set up data directory path
TRAIN_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/train"
TEST_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/val"

In [7]:
def data_transformations(image_size, mean, std, data_augment=False):

  """This function returns data transformations for the images data."""

  # Define transformations to be applied (Base Transformations)
  transformations = [
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
  ]

  # If Augmentation is needed, add them to transform list
  if data_augment:
    transformations += [
      transforms.RandomHorizontalFlip(0.05),
      transforms.RandomVerticalFlip(0.05),
      transforms.RandomRotation(degrees=20),
      transforms.RandomApply([transforms.ColorJitter(0.2, 0.2, 0.2, 0.2)], p=0.05),
      transforms.RandomApply([transforms.GaussianBlur(3)], p=0.05)
    ]

  transformer = transforms.Compose(transformations)

  return transformer

def get_train_and_val_dataloaders(train_data_dir, transforms, valset_size=0.2, batch_size=16):

  """This function returns the dataloader for trainset and validation set and classnames"""

  # Dowload the total_train dataset
  total_trainset = torchvision.datasets.ImageFolder(root = train_data_dir, transform=transforms)

  # Get the classnames
  classnames = total_trainset.classes

  # Split the total_train data into train data and val data
  labels = [label for _, label in total_trainset.samples]

  if valset_size != 0:
    train_indices, val_indices = train_test_split(
                                    range(len(total_trainset)),
                                    test_size=valset_size,
                                    stratify=labels,
                                    random_state=42
                                    )
  else:
    train_indices = range(len(total_trainset))
    val_indices = []

  # Create the trainset and valset
  trainset = torch.utils.data.Subset(total_trainset, train_indices)
  valset = torch.utils.data.Subset(total_trainset, val_indices)

  # Create the dataloaders
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                            shuffle=True, num_workers=2)

  valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

  return trainloader, valloader, classnames



def get_test_dataloader(test_data_dir, transforms, batch_size=16):

  """This function returns the test dataloader"""

  #Download the test data
  testset = torchvision.datasets.ImageFolder(root=test_data_dir, transform=transforms)



  testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

  return testloader

In [8]:
# Image transformations used in EfficientNet_V2_S
image_size = (384, 384)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Get training transformer
train_transforms = data_transformations(image_size=image_size, mean=mean, std=std, data_augment=True)

# Get the trainloader with complete train dataset (No validation set)
trainloader, valloader, classnames = get_train_and_val_dataloaders(TRAIN_DATA_DIR, train_transforms, valset_size=0.2, batch_size=16)

In [12]:
# Load the model
model = models.efficientnet_v2_s(weights="DEFAULT")

# Change the output layer
in_features = model.classifier[1].in_features
num_classes = 10
model.classifier[1] = nn.Linear(in_features, num_classes)

# Shift the model to the device being used
model.to(device)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  

In [13]:
def train_model(model, trainloader, valloader, criterion, optimizer, device, epochs=10):

  """This function trains and evaluates the model for the specified number of epochs."""

  # Metrics to keep track of
  train_epoch_losses = []
  train_epoch_accuracies = []
  val_epoch_losses = []
  val_epoch_accuracies = []

  for epoch in range(epochs):

    # Set the model in train mode
    model.train()

    # Epoch Metrics
    train_running_loss = 0
    train_correct = 0
    train_total = 0

    for data in tqdm(trainloader):

      inputs, labels = data[0].to(device), data[1].to(device)

      optimizer.zero_grad()

      outputs = model(inputs)

      loss = criterion(outputs, labels)

      loss.backward()
      optimizer.step()

      # Update metrics
      train_running_loss += loss.item() * inputs.size(0)
      train_total += inputs.size(0)
      train_correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

    train_epoch_loss = train_running_loss / train_total
    train_epoch_accuracy = 100 * train_correct / train_total

    train_epoch_losses.append(train_epoch_loss)
    train_epoch_accuracies.append(train_epoch_accuracy)

    # Set the model to evaluation mode
    model.eval()

    # Epoch Metrics
    val_running_loss = 0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
      for data in tqdm(valloader):

        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Update Metrics
        val_running_loss += loss.item() * inputs.size(0)
        val_total += inputs.size(0)
        val_correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

    val_epoch_loss = val_running_loss / val_total
    val_epoch_accuracy = 100 * val_correct / val_total

    val_epoch_losses.append(val_epoch_loss)
    val_epoch_accuracies.append(val_epoch_accuracy)

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_epoch_loss:.3f}, Train Acc: {train_epoch_accuracy:.2f}%, Val Loss: {val_epoch_loss:.3f}, Val Acc: {val_epoch_accuracy:.2f}%")

  return train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies

In [14]:
# Define the criterion and optimizer to be used in training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
# Train the model
train_model(model, trainloader, valloader, criterion, optimizer, device, 10)

100%|██████████| 501/501 [21:04<00:00,  2.52s/it]
100%|██████████| 126/126 [05:34<00:00,  2.65s/it]


Epoch 1/10, Train Loss: 0.813, Train Acc: 75.62%, Val Loss: 0.352, Val Acc: 89.46%


100%|██████████| 501/501 [01:52<00:00,  4.47it/s]
100%|██████████| 126/126 [00:28<00:00,  4.46it/s]


Epoch 2/10, Train Loss: 0.401, Train Acc: 87.77%, Val Loss: 0.363, Val Acc: 88.46%


100%|██████████| 501/501 [01:50<00:00,  4.55it/s]
100%|██████████| 126/126 [00:27<00:00,  4.55it/s]


Epoch 3/10, Train Loss: 0.295, Train Acc: 90.99%, Val Loss: 1.458, Val Acc: 88.56%


100%|██████████| 501/501 [01:50<00:00,  4.53it/s]
100%|██████████| 126/126 [00:27<00:00,  4.51it/s]


Epoch 4/10, Train Loss: 0.247, Train Acc: 92.48%, Val Loss: 0.384, Val Acc: 88.96%


100%|██████████| 501/501 [01:54<00:00,  4.36it/s]
 48%|████▊     | 61/126 [00:13<00:15,  4.25it/s]