Loading the data set

In [1]:
from google.colab import drive
import zipfile
import os
import shutil
from torchvision.datasets import ImageFolder
from sklearn.model_selection import StratifiedShuffleSplit

# Mount Google Drive
drive.mount('/content/drive')

# Path to the ZIP file in your Google Drive
zip_path = '/content/drive/MyDrive/nature_12K.zip'  # Adjust this based on the actual path in your Drive

# Extract the ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('/content/')  # Extract to /content/

# Original and new paths
extracted_folder = '/content/inaturalist_12K'
original_val_path = os.path.join(extracted_folder, 'val')
new_test_path = os.path.join(extracted_folder, 'test')

# Rename 'val' to 'test'
if os.path.exists(original_val_path):
    os.rename(original_val_path, new_test_path)

# Define the paths for the training data
original_train_dir = os.path.join(extracted_folder, 'train')
new_train_dir = os.path.join(extracted_folder, 'train_split')  # Keep it same
new_val_dir = os.path.join(extracted_folder, 'val_split')  # This will be used for new validation data

# Load the samples and labels using ImageFolder
dataset = ImageFolder(root=original_train_dir)
samples = dataset.samples  # List of (path, class_index)
labels = [label for _, label in samples]

# Perform a stratified split (80% train, 20% validation)
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_idx = next(splitter.split(samples, labels))

train_samples = [samples[i] for i in train_idx]
val_samples = [samples[i] for i in val_idx]

# Create new folders for the validation set and training set
for class_name in dataset.classes:
    os.makedirs(os.path.join(new_train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(new_val_dir, class_name), exist_ok=True)

# Copy files to the new training folders
for path, label in train_samples:
    class_name = dataset.classes[label]
    dest = os.path.join(new_train_dir, class_name, os.path.basename(path))
    shutil.copy2(path, dest)

# Copy files to the new validation folders
for path, label in val_samples:
    class_name = dataset.classes[label]
    dest = os.path.join(new_val_dir, class_name, os.path.basename(path))
    shutil.copy2(path, dest)

print(f"Data prepared with 80% training and 20% validation split. The test data remains in: {new_test_path}")


Mounted at /content/drive
Data prepared with 80% training and 20% validation split. The test data remains in: /content/inaturalist_12K/test


## Trial 1: freeze all layers except the final fully connected layer

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# ImageNet standard transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.ImageFolder(root=new_train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=new_val_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=new_test_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
import torch.nn as nn
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained ResNet50
model = models.resnet50(pretrained=True)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Replace final layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10)  # 10 iNaturalist classes
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 161MB/s]


In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-2)  # Only train final layer


In [None]:
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / total, correct / total

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / total
    train_acc = correct / total
    val_loss, val_acc = evaluate(model, val_loader)

    print(f"Epoch {epoch+1}/{epochs} --> "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")


Epoch 1/10 --> Train Loss: 0.9050, Train Acc: 0.7810, Val Loss: 1.0967, Val Acc: 0.7590
Epoch 2/10 --> Train Loss: 0.8579, Train Acc: 0.7817, Val Loss: 1.0631, Val Acc: 0.7640
Epoch 3/10 --> Train Loss: 0.8425, Train Acc: 0.7855, Val Loss: 1.0498, Val Acc: 0.7620
Epoch 4/10 --> Train Loss: 0.7997, Train Acc: 0.7906, Val Loss: 1.0274, Val Acc: 0.7665
Epoch 5/10 --> Train Loss: 0.8040, Train Acc: 0.7885, Val Loss: 1.0338, Val Acc: 0.7630
Epoch 6/10 --> Train Loss: 0.7705, Train Acc: 0.7932, Val Loss: 1.0273, Val Acc: 0.7645
Epoch 7/10 --> Train Loss: 0.7533, Train Acc: 0.7956, Val Loss: 1.0080, Val Acc: 0.7640
Epoch 8/10 --> Train Loss: 0.7507, Train Acc: 0.7930, Val Loss: 1.0120, Val Acc: 0.7660
Epoch 9/10 --> Train Loss: 0.7313, Train Acc: 0.7981, Val Loss: 1.0140, Val Acc: 0.7640
Epoch 10/10 --> Train Loss: 0.7281, Train Acc: 0.7976, Val Loss: 0.9869, Val Acc: 0.7670


In [None]:
test_loss, test_acc = evaluate(model, test_loader)
print(f"\nTest Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")


The results are much better than training from scratch. However, let us see if we can improve the performance futher.

In the below code, changing the value of k changes the last k number of layers to unfreeze. I have chnaged it to k=1 and k=2. However k=2 led to overfitting. But validation accuracy was more than k=1. So to prevent overfitting I added data augmentation for k=2.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths
data_root = '/content/inaturalist_12K'
train_dir = os.path.join(data_root, 'train_split')
val_dir   = os.path.join(data_root, 'val_split')
test_dir  = os.path.join(data_root, 'test')

# Transforms
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])
val_test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds = datasets.ImageFolder(train_dir, transform=train_transform)
val_ds   = datasets.ImageFolder(val_dir,   transform=val_test_transform)
test_ds  = datasets.ImageFolder(test_dir,  transform=val_test_transform)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=2)

# Model
model = models.resnet50(pretrained=True)
# add dropout before final layer
in_feats = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(in_feats, 10)
)
model = model.to(device)

# Freeze all but last k layers
def unfreeze_last_k_layers(model, k=1):
    for p in model.parameters(): p.requires_grad = False
    for p in model.fc.parameters(): p.requires_grad = True
    layers = [model.layer4, model.layer3, model.layer2, model.layer1]
    for i in range(k):
        for p in layers[i].parameters(): p.requires_grad = True

unfreeze_last_k_layers(model, k=2)

# Optimizer, loss, scheduler
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4,
    weight_decay=1e-2
)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, verbose=True)

# Eval helper
def evaluate(model, loader):
    model.eval()
    total_c, total_l = 0, 0.0
    with torch.no_grad():
        for x,y in loader:
            x,y = x.to(device), y.to(device)
            out = model(x)
            loss = criterion(out,y)
            total_l += loss.item()
            preds = out.argmax(dim=1)
            total_c += (preds==y).sum().item()
    return total_c/len(loader.dataset), total_l/len(loader)

# Training with Early Stopping
def train_model(model, train_loader, val_loader, epochs=20, patience=4):
    best_acc, counter = 0.0, 0
    best_wts = model.state_dict()

    for epoch in range(epochs):
        model.train()
        running_loss, running_correct = 0.0, 0

        for x,y in train_loader:
            x,y = x.to(device), y.to(device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            running_correct += (out.argmax(1)==y).sum().item()

        train_acc = running_correct/len(train_loader.dataset)
        val_acc, val_loss = evaluate(model, val_loader)
        print(f"Epoch {epoch+1}: Train Acc {train_acc*100:.2f}%, Val Acc {val_acc*100:.2f}%")

        # LR scheduling on val accuracy
        scheduler.step(val_acc)

        # Early stopping tracking
        if val_acc > best_acc:
            best_acc, counter = val_acc, 0
            best_wts = model.state_dict()
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping at epoch", epoch+1)
                break

    model.load_state_dict(best_wts)

# Run
train_model(model, train_loader, val_loader, epochs=20, patience=4)
test_acc, test_loss = evaluate(model, test_loader)
print(f"Test Acc: {test_acc*100:.2f}% | Test Loss: {test_loss:.4f}")


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 177MB/s]


Epoch 1: Train Acc 65.35%, Val Acc 77.15%
Epoch 2: Train Acc 79.26%, Val Acc 78.20%
Epoch 3: Train Acc 84.65%, Val Acc 79.45%
Epoch 4: Train Acc 88.50%, Val Acc 79.40%
Epoch 5: Train Acc 90.64%, Val Acc 78.40%
Epoch 6: Train Acc 92.81%, Val Acc 77.80%
Epoch 7: Train Acc 96.10%, Val Acc 81.60%
Epoch 8: Train Acc 97.52%, Val Acc 81.00%
Epoch 9: Train Acc 98.30%, Val Acc 82.45%
Epoch 10: Train Acc 98.39%, Val Acc 81.25%
Epoch 11: Train Acc 98.55%, Val Acc 81.85%
Epoch 12: Train Acc 98.65%, Val Acc 81.20%
Epoch 13: Train Acc 99.34%, Val Acc 82.40%
Early stopping at epoch 13
Test Acc: 81.75% | Test Loss: 0.9545


Finally, Trying strategy 3: k=2, with data augmentation and logging the results in W&B

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import os
import wandb

# Init Weights & Biases
wandb.init(project="iNaturalist_CNN_da6401_A2", name="resnet50_finetune_k2")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths
data_root = '/content/inaturalist_12K'
train_dir = os.path.join(data_root, 'train_split')
val_dir   = os.path.join(data_root, 'val_split')
test_dir  = os.path.join(data_root, 'test')

# Transforms
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])
val_test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds = datasets.ImageFolder(train_dir, transform=train_transform)
val_ds   = datasets.ImageFolder(val_dir,   transform=val_test_transform)
test_ds  = datasets.ImageFolder(test_dir,  transform=val_test_transform)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=2)

# Model setup
model = models.resnet50(pretrained=True)
in_feats = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(in_feats, 10)
)
model = model.to(device)

# Unfreeze last k blocks
def unfreeze_last_k_layers(model, k=2):
    for p in model.parameters(): p.requires_grad = False
    for p in model.fc.parameters(): p.requires_grad = True
    layers = [model.layer4, model.layer3, model.layer2, model.layer1]
    for i in range(k):
        for p in layers[i].parameters(): p.requires_grad = True

unfreeze_last_k_layers(model, k=2)

# Optimizer, loss, scheduler
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4,
    weight_decay=1e-2
)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, verbose=True)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    total_correct, total_loss = 0, 0.0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            out = model(x)
            loss = criterion(out, y)
            total_loss += loss.item()
            total_correct += (out.argmax(dim=1) == y).sum().item()
    acc = total_correct / len(loader.dataset)
    avg_loss = total_loss / len(loader)
    return acc, avg_loss

# Training loop with early stopping and wandb logging
def train_model(model, train_loader, val_loader, epochs=20, patience=4):
    best_acc, counter = 0.0, 0
    best_weights = model.state_dict()

    for epoch in range(epochs):
        model.train()
        running_loss, running_correct = 0.0, 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            running_correct += (out.argmax(1) == y).sum().item()

        train_acc = running_correct / len(train_loader.dataset)
        train_loss = running_loss / len(train_loader)
        val_acc, val_loss = evaluate(model, val_loader)

        print(f"Epoch {epoch+1}: Train Acc {train_acc*100:.2f}%, Val Acc {val_acc*100:.2f}%")

        # wandb logging
        wandb.log({
            "epoch": epoch + 1,
            "train_acc": train_acc,
            "train_loss": train_loss,
            "val_acc": val_acc,
            "val_loss": val_loss,
            "lr": optimizer.param_groups[0]["lr"]
        })

        scheduler.step(val_acc)

        # Early stopping
        if val_acc > best_acc:
            best_acc = val_acc
            best_weights = model.state_dict()
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered at epoch", epoch+1)
                break

    model.load_state_dict(best_weights)

#Train
train_model(model, train_loader, val_loader, epochs=20, patience=4)

#Test
test_acc, test_loss = evaluate(model, test_loader)
print(f"Test Accuracy: {test_acc*100:.2f}%, Test Loss: {test_loss:.4f}")

#log final test metrics to wandb
wandb.log({
    "test_acc": test_acc,
    "test_loss": test_loss
})
wandb.finish()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mapoorvaprashanth[0m ([33mapoorvaprashanth-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 150MB/s]


Epoch 1: Train Acc 65.93%, Val Acc 77.10%
Epoch 2: Train Acc 79.27%, Val Acc 78.25%
Epoch 3: Train Acc 84.77%, Val Acc 78.45%
Epoch 4: Train Acc 88.32%, Val Acc 77.95%
Epoch 5: Train Acc 90.47%, Val Acc 77.30%
Epoch 6: Train Acc 92.55%, Val Acc 78.35%
Epoch 7: Train Acc 95.99%, Val Acc 81.75%
Epoch 8: Train Acc 97.32%, Val Acc 81.95%
Epoch 9: Train Acc 98.22%, Val Acc 81.40%
Epoch 10: Train Acc 98.41%, Val Acc 81.55%
Epoch 11: Train Acc 98.69%, Val Acc 82.25%
Epoch 12: Train Acc 98.80%, Val Acc 81.75%
Epoch 13: Train Acc 98.99%, Val Acc 80.90%
Epoch 14: Train Acc 98.75%, Val Acc 82.15%
Epoch 15: Train Acc 99.40%, Val Acc 81.95%
Early stopping triggered at epoch 15
Test Accuracy: 82.35%, Test Loss: 0.9587


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
lr,██████▃▃▃▃▃▃▃▃▁
test_acc,▁
test_loss,▁
train_acc,▁▄▅▆▆▇▇████████
train_loss,█▅▄▃▃▃▂▂▁▁▁▁▁▁▁
val_acc,▁▃▃▂▁▃▇█▇▇█▇▆██
val_loss,▅▆▆▆██▂▂▂▂▁▂▃▂▂

0,1
epoch,15.0
lr,3e-05
test_acc,0.8235
test_loss,0.95865
train_acc,0.994
train_loss,0.54647
val_acc,0.8195
val_loss,0.97015
