<a href="https://colab.research.google.com/github/Aravindh4404/FYPSeagullClassification01/blob/main/Karpathyimprove1710_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import numpy as np
import random

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Mount Google Drive to save and load the model
from google.colab import drive
drive.mount('/content/drive')

# Define the folder to save model checkpoints
checkpoint_folder = '/content/drive/My Drive/FYP/ModelCheckpoints/'
os.makedirs(checkpoint_folder, exist_ok=True)

# Data Augmentation for Training Set
transform_train = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.RandomHorizontalFlip(),  # Augmentation: Random horizontal flip
    transforms.RandomRotation(15),      # Augmentation: Random rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # Augmentation: Color jitter
    transforms.ToTensor(),              # Convert to tensor
])

# Simple resizing for validation and test sets
transform_val_test = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
])

# Data preparation: Training, Validation, and Test sets
data_path = '/content/drive/My Drive/FYP/Dataset/Original_Adult_In-flight/train'
test_data_path = '/content/drive/My Drive/FYP/Dataset/Original_Adult_In-flight/test'

# Load datasets
train_dataset = datasets.ImageFolder(data_path, transform=transform_train)
test_dataset = datasets.ImageFolder(test_data_path, transform=transform_val_test)

# Split the dataset into 80% training and 20% validation
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create data loaders
batch_size = 16  # Use a larger batch size for better stability
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Use Pre-trained ResNet50 model and modify it for binary classification
class ResNet50Modified(nn.Module):
    def __init__(self):
        super(ResNet50Modified, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        # Replace the final fully connected layer to output 2 classes (Slaty-backed, Glaucous-winged)
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.5),  # Add dropout to the fully connected layer
            nn.Linear(num_ftrs, 2)
        )

    def forward(self, x):
        return self.resnet(x)

# Initialize the ResNet50 model
model = ResNet50Modified()

# Define loss function and optimizer with L2 regularization (weight decay)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # L2 regularization with weight decay

# Learning rate scheduler to reduce learning rate if validation accuracy plateaus
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3, verbose=True)

# Early stopping class to monitor validation accuracy
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_acc = None
        self.early_stop = False

    def __call__(self, val_acc, model, folder_path):
        if self.best_acc is None:
            self.best_acc = val_acc
            self.save_model(model, folder_path)
        elif val_acc <= self.best_acc:
            self.counter += 1
            if self.counter >= self.patience:
                if self.verbose:
                    print("Early stopping triggered!")
                self.early_stop = True
        else:
            self.best_acc = val_acc
            self.save_model(model, folder_path)
            self.counter = 0

    def save_model(self, model, folder_path):
        model_save_path = os.path.join(folder_path, "best_model.pth")
        torch.save(model.state_dict(), model_save_path)
        print(f"Best model saved with accuracy: {self.best_acc:.4f}% at {model_save_path}")

# Initialize early stopping
early_stopping = EarlyStopping(patience=5, verbose=True)

# Training loop with early stopping and learning rate scheduler
def train(model, train_loader, val_loader, criterion, optimizer, scheduler, early_stopping, epochs=20):
    best_val_acc = 0.0  # Initialize the best validation accuracy

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Print the loss per epoch
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.6f}")

        # Validate the model and check accuracy
        val_acc = validate(model, val_loader, criterion)
        scheduler.step(val_acc)

        # Early stopping checks
        early_stopping(val_acc, model, checkpoint_folder)

        if early_stopping.early_stop:
            print("Early stopping - training stopped")
            break

# Validation loop returning accuracy
def validate(model, loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Loss: {val_loss/len(loader):.6f}, Accuracy: {accuracy:.2f}%')
    return accuracy  # Return accuracy to track the best model

# Test function to evaluate on the test set
def test(model, loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Loss: {test_loss/len(loader):.6f}, Test Accuracy: {accuracy:.2f}%')

# Run the full code sequence

# Step 1: Train the model with early stopping and learning rate scheduling
train(model, train_loader, val_loader, criterion, optimizer, scheduler, early_stopping, epochs=20)

# Step 2: Evaluate the model on the test set
test(model, test_loader, criterion)


Mounted at /content/drive


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 128MB/s]


Epoch [1/20], Loss: 0.695752
Validation Loss: 27.411292, Accuracy: 66.67%
Best model saved with accuracy: 66.6667% at /content/drive/My Drive/FYP/ModelCheckpoints/best_model.pth
Epoch [2/20], Loss: 0.517261
Validation Loss: 0.973971, Accuracy: 52.94%
Epoch [3/20], Loss: 0.656729
Validation Loss: 5.368050, Accuracy: 43.14%
Epoch [4/20], Loss: 0.538634
Validation Loss: 2.464585, Accuracy: 50.98%
Epoch [5/20], Loss: 0.432616
Validation Loss: 1.253740, Accuracy: 49.02%
Epoch [6/20], Loss: 0.346550
Validation Loss: 0.327017, Accuracy: 82.35%
Best model saved with accuracy: 82.3529% at /content/drive/My Drive/FYP/ModelCheckpoints/best_model.pth
Epoch [7/20], Loss: 0.329089
Validation Loss: 0.400866, Accuracy: 90.20%
Best model saved with accuracy: 90.1961% at /content/drive/My Drive/FYP/ModelCheckpoints/best_model.pth
Epoch [8/20], Loss: 0.292999
Validation Loss: 0.336263, Accuracy: 88.24%
Epoch [9/20], Loss: 0.254280
Validation Loss: 0.443387, Accuracy: 74.51%
Epoch [10/20], Loss: 0.341440
