# Multi-class Image Classification using EfficientNetB0 on Tiny ImageNet


In [None]:
!pip install torch torchvision scikit-learn --quiet

In [2]:
import os
import shutil
import requests
import zipfile

def download_and_extract():
    url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
    filename = "tiny-imagenet-200.zip"

    if not os.path.exists(filename):
        print("Downloading Tiny ImageNet...")
        r = requests.get(url, stream=True)
        with open(filename, "wb") as f:
            shutil.copyfileobj(r.raw, f)
    else:
        print("Already downloaded.")

    if not os.path.exists("tiny-imagenet-200"):
        print("Extracting...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall(".")
    else:
        print("Already extracted.")

def organize_val_images():
    val_dir = "tiny-imagenet-200/val"
    val_annotations_file = os.path.join(val_dir, "val_annotations.txt")
    val_images_dir = os.path.join(val_dir, "images")

    print("Organizing validation images...")
    # Read class annotations
    with open(val_annotations_file, "r") as f:
        lines = f.readlines()

    annotations = {}
    for line in lines:
        tokens = line.split("\t")
        img_file = tokens[0]
        class_id = tokens[1]
        annotations[img_file] = class_id

    # Create class folders
    for img_file, class_id in annotations.items():
        class_dir = os.path.join(val_dir, class_id)
        if not os.path.exists(class_dir):
            os.makedirs(class_dir)
        src = os.path.join(val_images_dir, img_file)
        dst = os.path.join(class_dir, img_file)
        shutil.move(src, dst)

    # Cleanup
    shutil.rmtree(val_images_dir)
    print("Validation images organized.")

download_and_extract()
organize_val_images()

Downloading Tiny ImageNet...
Extracting...
Organizing validation images...
Validation images organized.


### Import Required Libraries
- PyTorch and torchvision modules for datasets, transforms, models, and optimization.
- `tqdm` for a nice progress bar during training.

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

### Set Device
- Set the computation device to GPU (if available) or CPU.

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Hyperparameters and Paths
- Define important constants like `BATCH_SIZE`, `EPOCHS`, and `IMAGE_SIZE`.
- Define paths to the Tiny ImageNet `train` and `val` folders.
- `NUM_CLASSES = 200` for Tiny ImageNet.

In [5]:
BATCH_SIZE = 64
NUM_CLASSES = 200
EPOCHS = 2
IMAGE_SIZE = 64
TRAIN_DIR = 'tiny-imagenet-200/train'
VAL_DIR = 'tiny-imagenet-200/val'
TEST_DIR = 'tiny-imagenet-200/test'

### Data Augmentation and Normalization
- Apply real-world transformations to training images like:
  - `RandomResizedCrop`, `HorizontalFlip`, and `ColorJitter`.
- Normalize using ImageNet means and standard deviations.
- Helps the model generalize and prevents overfitting.

In [6]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

### Load Dataset with ImageFolder
- Use `ImageFolder` to load datasets assuming subfolders per class.
- `DataLoader` handles batching and shuffling for training and evaluation.

In [7]:
train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=train_transform)
val_dataset = datasets.ImageFolder(root=VAL_DIR, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)



### Load Pretrained EfficientNetB0
- Use `efficientnet_b0(pretrained=True)` to load a pretrained backbone.
- Freeze early layers if needed (optional).
- Replace the final classification layer (`classifier[1]`) with a new `Linear` layer with output size = 200.


In [8]:
# Load Pretrained ResNet50
model = models.efficientnet_b0(pretrained=True)

# # Freeze all layers initially
# for param in resnet50.parameters():
#     param.requires_grad = False

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 109MB/s] 


In [9]:
model.classifier[1] = nn.Linear(model.classifier[1].in_features, NUM_CLASSES)
model = model.to(device)

<img src="https://www.researchgate.net/publication/360334595/figure/fig2/AS:11431281091326457@1666379680352/EfficientNetB0-Network-Architecture.ppm">

<img src="https://i.sstatic.net/wjMIUl0Y.png">

In [10]:
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

### Define Loss Function, Optimizer, Scheduler
- Use `CrossEntropyLoss` with `label_smoothing=0.1` to regularize predictions.
- Optimizer: `Adam` is adaptive and works well for vision tasks.
- Scheduler: `ReduceLROnPlateau` reduces learning rate if val loss plateaus.

In [11]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)

### Training Loop
- Set model to training mode.
- Loop over all training batches:
  - Forward pass
  - Compute loss
  - Backpropagation
  - Update weights
- Track average training loss and accuracy.

### Validation Loop
- Set model to evaluation mode (`model.eval()`).
- Disable gradient computation using `torch.no_grad()` for efficiency.
- Track validation loss and accuracy.
- Scheduler updates learning rate based on validation loss.

In [12]:
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    correct = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

    avg_loss = train_loss / len(train_loader.dataset)
    accuracy = correct / len(train_loader.dataset)
    print(f"Train Loss: {avg_loss:.4f}, Train Accuracy: {accuracy:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            val_correct += (outputs.argmax(1) == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader.dataset)
    val_accuracy = val_correct / len(val_loader.dataset)
    scheduler.step(avg_val_loss)

    print(f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")


Epoch 1/2: 100%|██████████| 1563/1563 [03:22<00:00,  7.71it/s]

Train Loss: 3.8194, Train Accuracy: 0.2348





Val Loss: 3.1270, Val Accuracy: 0.3777


Epoch 2/2: 100%|██████████| 1563/1563 [03:21<00:00,  7.76it/s]

Train Loss: 3.3534, Train Accuracy: 0.3325





Val Loss: 2.9658, Val Accuracy: 0.4153
