In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix, accuracy_score
import numpy as np
import pandas as pd

In [None]:
# Ensure GPU usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset Path
data_dir = "./dataset/"

In [None]:
# Hyperparameters
batch_size = 32
learning_rate = 1e-4
num_epochs = 10
img_size = (100, 200)

# Data Transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ]),
    'val': transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ]),
}

In [None]:
# Load Dataset
dataset = datasets.ImageFolder(data_dir, transform=data_transforms['train'])
num_classes = len(dataset.classes) 

# 2-Fold Cross Validation
folds = 2
fold_size = len(dataset) // folds

metrics = []

for fold in range(folds):
    # Split the dataset
    val_start = fold * fold_size
    val_end = val_start + fold_size
    train_indices = list(range(0, val_start)) + list(range(val_end, len(dataset)))
    val_indices = list(range(val_start, val_end))

    train_set = torch.utils.data.Subset(dataset, train_indices)
    val_set = torch.utils.data.Subset(dataset, val_indices)

    train_set.dataset.transform = data_transforms['train']
    val_set.dataset.transform = data_transforms['val']

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4)

    # Load Vision Transformer
    model = models.vit_b_16(pretrained=True)
    model.heads = nn.Sequential(
        nn.Linear(model.heads[0].in_features, num_classes)
    )
    model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

    # Training and Validation Loop
    best_val_acc = 0
    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        train_loss, train_correct = 0.0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            train_correct += torch.sum(preds == labels.data)

        train_loss /= len(train_set)
        train_acc = train_correct.double() / len(train_set)

        # Validation Phase
        model.eval()
        val_loss, val_correct = 0.0, 0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += torch.sum(preds == labels.data)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_loss /= len(val_set)
        val_acc = val_correct.double() / len(val_set)

        # Store metrics
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model = model.state_dict()
            best_fold = fold

        print(f"Fold {fold+1}, Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    # Calculate Confusion Matrix
    conf_matrix = confusion_matrix(all_labels, all_preds)
    metrics.append({
        "fold": fold + 1,
        "train_acc": train_acc.item(),
        "val_acc": val_acc.item(),
        "train_loss": train_loss,
        "val_loss": val_loss,
        "conf_matrix": conf_matrix
    })

In [None]:
torch.save(best_model, "best_vit_model.pth")
print("Best model saved.")

In [None]:
# Display Metrics
for metric in metrics:
    print(f"Fold {metric['fold']}: Train Acc: {metric['train_acc']:.4f}, Val Acc: {metric['val_acc']:.4f}, Train Loss: {metric['train_loss']:.4f}, Val Loss: {metric['val_loss']:.4f}")
    print(f"Confusion Matrix:\n{metric['conf_matrix']}")

In [None]:
# Identify the Best Fold
best_fold_metrics = max(metrics, key=lambda x: x['val_acc'])
print(f"Best Fold: {best_fold_metrics['fold']}")