<a href="https://colab.research.google.com/github/Aravindh4404/FYPSeagullClassification01/blob/main/VIT3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install timm



In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import random
import matplotlib.pyplot as plt
from datetime import datetime
import timm
from timm.scheduler import CosineLRScheduler
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from torch.cuda.amp import autocast, GradScaler
from collections import Counter

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define checkpoint folder
date_str = datetime.now().strftime('%Y%m%d')
checkpoint_folder = f'/content/drive/My Drive/FYP/VIT2_HQ2_{date_str}/'
os.makedirs(checkpoint_folder, exist_ok=True)

# Data Augmentation with ImageNet Normalization
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.GaussianBlur(kernel_size=3),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std),
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std),
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load datasets
data_path = '/content/drive/My Drive/FYP/Dataset/HQ2/train'
test_data_path = '/content/drive/My Drive/FYP/Dataset/HQ2/test'
train_dataset = datasets.ImageFolder(data_path, transform=transform_train)
test_dataset = datasets.ImageFolder(test_data_path, transform=transform_val_test)

# Split dataset
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# Calculate class weights for imbalanced data
full_targets = [label for _, label in train_dataset]
class_counts = Counter(full_targets)
total_samples = len(full_targets)
n_classes = 2
class_weights = [
    total_samples / (n_classes * class_counts[0]),
    total_samples / (n_classes * class_counts[1])
]
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

# Data Loaders
batch_size = 64
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

# Enhanced ViT Model with Dropout
class ViTEnhanced(nn.Module):
    def __init__(self, dropout_rate=0.3):
        super().__init__()
        self.vit = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=0)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(self.vit.num_features, 2)

    def forward(self, x):
        x = self.vit(x)
        x = self.dropout(x)
        return self.fc(x)

model = ViTEnhanced().to(device)

# Loss Function with Label Smoothing and Class Weights
criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.1)

# Optimizer & Scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)
num_epochs = 30
scheduler = CosineLRScheduler(optimizer, t_initial=num_epochs, warmup_t=5, warmup_lr_init=1e-6)

# Mixed Precision Scaler
scaler = GradScaler() if torch.cuda.is_available() else None

def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / len(loader.dataset)
    f1 = f1_score(all_labels, all_preds, average='binary')
    return total_loss/len(loader), accuracy, f1

def test_tta(model, loader, n_tta=5):
    model.eval()
    all_preds = []
    tta_trans = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean, imagenet_std)
    ])

    unnormalize = transforms.Normalize(
        mean=[-m/s for m, s in zip(imagenet_mean, imagenet_std)],
        std=[1/s for s in imagenet_std]
    )

    with torch.no_grad():
        for inputs, _ in loader:
            inputs = inputs.cpu()
            unnormalized = unnormalize(inputs)
            batch_preds = []

            for _ in range(n_tta):
                augmented = torch.stack([tta_trans(transforms.ToPILImage()(img)) for img in unnormalized])
                augmented = augmented.to(device)
                outputs = model(augmented)
                batch_preds.append(outputs.softmax(1))

            avg_preds = torch.mean(torch.stack(batch_preds), dim=0)
            all_preds.extend(avg_preds.argmax(1).cpu().numpy())

    return all_preds

def train(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=30):
    best_f1 = 0.0
    train_losses, val_losses = [], []
    val_accuracies, val_f1s = [], []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast(enabled=torch.cuda.is_available()):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            if scaler:
                scaler.scale(loss).backward()
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()

            running_loss += loss.item()

        scheduler.step(epoch + 1)
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        # Validation
        val_metrics = validate(model, val_loader, criterion)
        val_loss, val_acc, val_f1 = val_metrics
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)
        val_f1s.append(val_f1)

        # Save Best Model
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), os.path.join(checkpoint_folder, 'best_model.pth'))

        # Save Latest Model
        torch.save(model.state_dict(), os.path.join(checkpoint_folder, 'latest_model.pth'))

        print(f'Epoch {epoch+1}/{epochs}')
        print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}')
        print(f'Val Acc: {val_acc:.2f}% | Val F1: {val_f1:.4f}\n')

    # Plot metrics
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(val_accuracies, label='Val Accuracy')
    plt.plot(val_f1s, label='Val F1')
    plt.legend()
    plt.show()

# Execute Training
train(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=num_epochs)

# Final Evaluation
test_preds = test_tta(model, test_loader)
test_labels = [label for _, label in test_dataset.samples]
test_acc = 100 * accuracy_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds, average='binary')

print(f'Final Test Accuracy: {test_acc:.2f}% | F1 Score: {test_f1:.4f}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  with autocast(enabled=torch.cuda.is_available()):


Epoch 1/30
Train Loss: 0.8411 | Val Loss: 0.8240
Val Acc: 48.68% | Val F1: 0.6214



  with autocast(enabled=torch.cuda.is_available()):


Epoch 2/30
Train Loss: 0.8555 | Val Loss: 0.5918
Val Acc: 96.71% | Val F1: 0.9807



  with autocast(enabled=torch.cuda.is_available()):


Epoch 3/30
Train Loss: 1.0934 | Val Loss: 0.6759
Val Acc: 16.45% | Val F1: 0.0000



  with autocast(enabled=torch.cuda.is_available()):


Epoch 4/30
Train Loss: 0.7404 | Val Loss: 1.2194
Val Acc: 88.16% | Val F1: 0.9333



  with autocast(enabled=torch.cuda.is_available()):


Epoch 5/30
Train Loss: 1.5772 | Val Loss: 0.7157
Val Acc: 80.92% | Val F1: 0.8816



  with autocast(enabled=torch.cuda.is_available()):


Epoch 6/30
Train Loss: 1.1284 | Val Loss: 0.7066
Val Acc: 78.95% | Val F1: 0.8667



  with autocast(enabled=torch.cuda.is_available()):


Epoch 7/30
Train Loss: 0.7935 | Val Loss: 0.7756
Val Acc: 26.32% | Val F1: 0.2113



  with autocast(enabled=torch.cuda.is_available()):


Epoch 8/30
Train Loss: 0.8153 | Val Loss: 0.7308
Val Acc: 89.47% | Val F1: 0.9398



  with autocast(enabled=torch.cuda.is_available()):


Epoch 9/30
Train Loss: 0.7837 | Val Loss: 0.6537
Val Acc: 19.74% | Val F1: 0.0758



  with autocast(enabled=torch.cuda.is_available()):


Epoch 10/30
Train Loss: 0.6992 | Val Loss: 0.6065
Val Acc: 88.16% | Val F1: 0.9286



  with autocast(enabled=torch.cuda.is_available()):


Epoch 11/30
Train Loss: 0.6919 | Val Loss: 0.6646
Val Acc: 89.47% | Val F1: 0.9385



  with autocast(enabled=torch.cuda.is_available()):


Epoch 12/30
Train Loss: 0.6866 | Val Loss: 0.5586
Val Acc: 80.26% | Val F1: 0.8696



  with autocast(enabled=torch.cuda.is_available()):


Epoch 13/30
Train Loss: 0.6266 | Val Loss: 0.7912
Val Acc: 88.82% | Val F1: 0.9368



  with autocast(enabled=torch.cuda.is_available()):


Epoch 14/30
Train Loss: 0.6821 | Val Loss: 0.8497
Val Acc: 92.76% | Val F1: 0.9579



  with autocast(enabled=torch.cuda.is_available()):


Epoch 15/30
Train Loss: 0.7317 | Val Loss: 0.6529
Val Acc: 89.47% | Val F1: 0.9389



  with autocast(enabled=torch.cuda.is_available()):


Epoch 16/30
Train Loss: 0.6415 | Val Loss: 0.6705
Val Acc: 53.95% | Val F1: 0.6196



  with autocast(enabled=torch.cuda.is_available()):


Epoch 17/30
Train Loss: 0.6208 | Val Loss: 0.5564
Val Acc: 85.53% | Val F1: 0.9068



  with autocast(enabled=torch.cuda.is_available()):


Epoch 18/30
Train Loss: 0.6485 | Val Loss: 0.5515
Val Acc: 88.82% | Val F1: 0.9323



  with autocast(enabled=torch.cuda.is_available()):


Epoch 19/30
Train Loss: 0.6015 | Val Loss: 0.5243
Val Acc: 74.34% | Val F1: 0.8219



  with autocast(enabled=torch.cuda.is_available()):
