In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import torch
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset
from torchvision import transforms, models


class CUB200Dataset(Dataset):
    def __init__(self, root_dir, split='train', transform=None, apply_bg_removal=False):
        self.root_dir = root_dir
        self.split = split
        self.transform = transform or self.default_transform()
        self.apply_bg_removal = apply_bg_removal

        # Load metadata
        self.data = self.load_metadata()

    def load_metadata(self):
        split_file = os.path.join(self.root_dir, f'{self.split}.txt')
        data = pd.read_csv(split_file, sep=' ', names=['filename', 'label'])
        data['filepath'] = data['filename'].apply(lambda x: os.path.join(self.root_dir, self.split, x))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.loc[idx, 'filepath']
        image = self.load_image(img_name)

        # Apply transformations to convert the cropped image to a tensor
        image = self.transform(image)

        label = self.data.loc[idx, 'label']
        return image, label

    @staticmethod
    def load_image(image_path):
        try:
            return Image.open(image_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            return None

    def default_transform(self):
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])



In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision import transforms, models
import torch.optim.lr_scheduler as lr_scheduler

# Define transformations
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])



In [19]:
train_dataset = CUB200Dataset(root_dir='drive/MyDrive/COS30082_preprocessed', split='train', transform=train_transforms)
test_dataset = CUB200Dataset(root_dir='drive/MyDrive/COS30082_preprocessed', split='test', transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

In [20]:


class BirdClassifier(nn.Module):
    def __init__(self, model_name='efficientnet_b5', num_classes=200):
        super(BirdClassifier, self).__init__()

        # Choose the model based on the input argument 'model_name'
        if model_name == 'resnet50':
            self.model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
            in_features = self.model.fc.in_features
            self.model.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(in_features, num_classes)
            )

        elif model_name == 'resnext50_32x4d':
            self.model = models.resnext50_32x4d(weights=models.ResNeXt50_32X4D_Weights.DEFAULT)
            in_features = self.model.fc.in_features
            self.model.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(in_features, num_classes)
            )

        elif model_name == 'efficientnet_b0':
            self.model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
            in_features = self.model.classifier[1].in_features
            self.model.classifier = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(in_features, num_classes)
            )

        elif model_name == 'efficientnet_b5':
            self.model = models.efficientnet_b5(weights=models.EfficientNet_B5_Weights.DEFAULT)
            in_features = self.model.classifier[1].in_features
            self.model.classifier = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(in_features, num_classes)
            )
        else:
            raise ValueError(f"Unsupported model_name: {model_name}. Choose from 'resnet50', 'resnext50_32x4d', 'efficientnet_b0', 'efficientnet_b5'.")

    def forward(self, x):
        return self.model(x)


# 5. Setup Model, Criterion, Optimizer, and Scheduler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# To use EfficientNet B5
model = BirdClassifier(model_name='efficientnet_b5', num_classes=200)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)




In [21]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience  # How many epochs to wait after last improvement
        self.delta = delta  # Minimum change to qualify as an improvement
        self.best_loss = None  # Best validation loss seen so far
        self.counter = 0  # Tracks how long since the last improvement
        self.early_stop = False  # Flag to indicate whether training should stop

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0  # Reset counter if validation loss improves


In [22]:
# 6. Training and Evaluation Functions
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total_images = 0

    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total_images += labels.size(0)

    train_accuracy = (correct / total_images) * 100
    return running_loss / len(loader.dataset), train_accuracy

def evaluate(model, loader, criterion, device, num_classes=200):
    model.eval()
    running_loss = 0.0
    correct_top1 = 0
    total_images = 0

    # Track per-class accuracy
    class_correct = torch.zeros(num_classes).to(device)
    class_total = torch.zeros(num_classes).to(device)

    with torch.no_grad():
        for images, labels in tqdm(loader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

            _, predicted_top1 = torch.max(outputs, 1)
            correct_top1 += (predicted_top1 == labels).sum().item()
            total_images += labels.size(0)

            # Track per-class accuracy
            for label, prediction in zip(labels, predicted_top1):
                class_correct[label] += (prediction == label).item()
                class_total[label] += 1

    # Calculate average accuracy per class
    avg_class_accuracy = (class_correct / class_total).mean().item() * 100
    overall_accuracy = (correct_top1 / total_images) * 100

    return running_loss / len(loader.dataset), overall_accuracy, avg_class_accuracy


In [23]:



# 7. Training Loop with Early Stopping and Class Accuracy
num_epochs = 10
best_acc = 0.0
early_stopper = EarlyStopping(patience=5)

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, avg_class_acc = evaluate(model, test_loader, criterion, device, num_classes=200)

    scheduler.step()

    print(f"Train Loss: {train_loss:.4f} | Validation Loss: {val_loss:.4f} | Train Accuracy: {train_acc:.2f}% | Validation Accuracy: {val_acc:.2f}%")
    print(f"Average Accuracy per Class: {avg_class_acc:.2f}%")

    # Save the best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        print("Model saved!")

    # Early stopping check
    early_stopper(val_loss)
    if early_stopper.early_stop:
        print("Early stopping triggered!")
        break


Epoch 1/10


 27%|██▋       | 82/302 [02:23<06:26,  1.76s/it]


KeyboardInterrupt: 

In [24]:
import torch

# Assuming you have your model architecture and test_dataloader already defined
# Also assuming your checkpoint file path is 'best_model_b5.pth'

# Load your model (make sure to initialize the model architecture before loading)
checkpoint_path = 'best_model_b5_newdataset.pth'  # Replace with the actual path to your checkpoint
model = BirdClassifier(model_name='efficientnet_b5', num_classes=200) # Initialize the model first

# Load the checkpoint

checkpoint = torch.load(checkpoint_path, weights_only=True)
# Load the state_dict into the model
model.load_state_dict(checkpoint) # Directly load the state_dict

# Set the model to evaluation mode
model.eval()

# Assuming you're using GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Initialize variables to track performance
total_correct = 0
total_samples = 0
all_predictions = []
all_labels = []



In [25]:
# Disable gradient calculation for efficiency
with torch.no_grad():
    for inputs, labels in test_loader:
        # Move inputs and labels to the appropriate device
        inputs, labels = inputs.to(device), labels.to(device)

        # Get model predictions
        outputs = model(inputs)
        _, predictions = torch.max(outputs, 1)

        # Track total correct predictions and total samples
        total_correct += (predictions == labels).sum().item()
        total_samples += labels.size(0)

        # Optionally store predictions and labels for further analysis
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate overall accuracy
accuracy = total_correct / total_samples
print(f'Accuracy on test set: {accuracy:.4f}')

Accuracy on test set: 0.7849


In [27]:
import torch
from tqdm import tqdm  # For progress bar

# Assuming you have the test_loader, model, and criterion defined
model.eval()
running_loss = 0.0
correct_top1 = 0
total_images = 0

# Assuming you know the number of classes
num_classes = 200  # Replace with the actual number of classes

# Track per-class accuracy
class_correct = torch.zeros(num_classes).to(device)
class_total = torch.zeros(num_classes).to(device)

# Track overall performance
total_correct = 0
total_samples = 0
all_predictions = []
all_labels = []

# Disable gradient calculation for evaluation
with torch.no_grad():
    for images, labels in tqdm(test_loader):
        images, labels = images.to(device), labels.to(device)

        # Forward pass to get outputs
        outputs = model(images)

        # Calculate the loss
        loss = criterion(outputs, labels)
        running_loss += loss.item() * images.size(0)

        # Get top-1 predictions
        _, predicted_top1 = torch.max(outputs, 1)

        # Track correct predictions for overall accuracy
        correct_top1 += (predicted_top1 == labels).sum().item()
        total_images += labels.size(0)

        # Track per-class accuracy
        for label, prediction in zip(labels, predicted_top1):
            class_correct[label] += (prediction == label).item()
            class_total[label] += 1

        # Optionally store predictions and labels for further analysis
        all_predictions.extend(predicted_top1.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate overall accuracy
overall_accuracy = (correct_top1 / total_images) * 100
print(f'Overall Accuracy on test set: {overall_accuracy:.4f}%')

# Calculate average accuracy per class
class_accuracies = class_correct / class_total
avg_class_accuracy = class_accuracies.mean().item() * 100
print(f'Average Class Accuracy: {avg_class_accuracy:.4f}%')

# Print per-class accuracy
for i in range(num_classes):
    print(f'Accuracy of class {i}: {class_accuracies[i] * 100:.2f}%')

# Calculate the average loss
avg_loss = running_loss / total_images
print(f'Average Loss: {avg_loss:.4f}')

100%|██████████| 76/76 [00:22<00:00,  3.40it/s]

Overall Accuracy on test set: 78.4884%
Average Class Accuracy: 78.0077%
Accuracy of class 0: 57.14%
Accuracy of class 1: 100.00%
Accuracy of class 2: 80.00%
Accuracy of class 3: 100.00%
Accuracy of class 4: 80.00%
Accuracy of class 5: 100.00%
Accuracy of class 6: 100.00%
Accuracy of class 7: 83.33%
Accuracy of class 8: 20.00%
Accuracy of class 9: 80.00%
Accuracy of class 10: 80.00%
Accuracy of class 11: 100.00%
Accuracy of class 12: 100.00%
Accuracy of class 13: 100.00%
Accuracy of class 14: 100.00%
Accuracy of class 15: 83.33%
Accuracy of class 16: 60.00%
Accuracy of class 17: 100.00%
Accuracy of class 18: 71.43%
Accuracy of class 19: 100.00%
Accuracy of class 20: 100.00%
Accuracy of class 21: 25.00%
Accuracy of class 22: 60.00%
Accuracy of class 23: 50.00%
Accuracy of class 24: 83.33%
Accuracy of class 25: 80.00%
Accuracy of class 26: 100.00%
Accuracy of class 27: 85.71%
Accuracy of class 28: 0.00%
Accuracy of class 29: 66.67%
Accuracy of class 30: 71.43%
Accuracy of class 31: 66.67%


