<a href="https://colab.research.google.com/github/Aravindh4404/FYPSeagullClassification01/blob/main/Resnet0812HQ2pretrain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import numpy as np
import random
from PIL import Image
import cv2
from datetime import datetime

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Define the device for computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Mount Google Drive to save and load the model (if using Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Define the folder to save model checkpoints
date_str = datetime.now().strftime('%Y%m%d')
checkpoint_folder = f'/content/drive/My Drive/FYP/ResNetHQ2/{date_str}/'
os.makedirs(checkpoint_folder, exist_ok=True)

# Data Augmentation for Training Set
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet expects 224x224 input size
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # ImageNet normalization
])

# Simple resizing for validation and test sets
transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# Load datasets
data_path = '/content/drive/My Drive/FYP/Dataset/HQ2/train'
test_data_path = '/content/drive/My Drive/FYP/Dataset/HQ2/test'
train_dataset = datasets.ImageFolder(data_path, transform=transform_train)
test_dataset = datasets.ImageFolder(test_data_path, transform=transform_val_test)

# Split the dataset into 80% training and 20% validation
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create data loaders
batch_size = 16  # Use a larger batch size for better stability
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define and modify Pretrained ResNet50 Model for Binary Classification
class PretrainedResNet50(nn.Module):
    def __init__(self):
        super(PretrainedResNet50, self).__init__()
        self.resnet = models.resnet50(pretrained=True)

        # Modify the final layer for binary classification (2 classes)
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_ftrs, 2)  # 2 output classes (binary classification)

    def forward(self, x):
        return self.resnet(x)

# Initialize the Pretrained ResNet50 model
model = PretrainedResNet50().to(device)

# Define loss function and optimizer with L2 regularization (weight decay)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3, verbose=True)

# Training loop without Early Stopping
def train(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=20):
    best_val_acc = 0.0  # Initialize the best validation accuracy

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Print the loss per epoch
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.6f}")

        # Validate the model and check accuracy
        val_acc = validate(model, val_loader, criterion)
        scheduler.step(val_acc)

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            save_model(model, checkpoint_folder, 'best_model.pth')

    # Save the final model after all epochs
    save_model(model, checkpoint_folder, 'final_model.pth')

# Validation loop returning accuracy
def validate(model, loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Loss: {val_loss/len(loader):.6f}, Accuracy: {accuracy:.2f}%')
    return accuracy  # Return accuracy to track the best model

# Test function to evaluate on the test set
def test(model, loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Loss: {test_loss/len(loader):.6f}, Test Accuracy: {accuracy:.2f}%')

def save_model(model, folder_path, model_name):
    model_save_path = os.path.join(folder_path, model_name)
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved at {model_save_path}")

# Step 1: Train the model without early stopping and learning rate scheduling
train(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=20)

# Step 2: Evaluate the model on the test set
test(model, test_loader, criterion)


Using device: cpu
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 134MB/s]


Epoch [1/20], Loss: 0.566284
Validation Loss: 2.258463, Accuracy: 64.29%
Model saved at /content/drive/My Drive/FYP/ResNetHQ2/20241208/best_model.pth
Epoch [2/20], Loss: 0.412436
Validation Loss: 0.537595, Accuracy: 85.06%
Model saved at /content/drive/My Drive/FYP/ResNetHQ2/20241208/best_model.pth
Epoch [3/20], Loss: 0.394654
Validation Loss: 1.691787, Accuracy: 58.44%
Epoch [4/20], Loss: 0.472845
Validation Loss: 0.716678, Accuracy: 83.77%
Epoch [5/20], Loss: 0.493657
Validation Loss: 0.465914, Accuracy: 83.12%
Epoch [6/20], Loss: 0.467142
Validation Loss: 0.477301, Accuracy: 82.47%
Epoch [7/20], Loss: 0.469355
Validation Loss: 0.454614, Accuracy: 83.77%
Epoch [8/20], Loss: 0.446069
Validation Loss: 0.443647, Accuracy: 83.12%
Epoch [9/20], Loss: 0.452889
Validation Loss: 0.434388, Accuracy: 82.47%
Epoch [10/20], Loss: 0.443092
Validation Loss: 0.434881, Accuracy: 83.12%
Epoch [11/20], Loss: 0.432903
Validation Loss: 0.418633, Accuracy: 82.47%
Epoch [12/20], Loss: 0.432166
Validation 