In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os 
import cv2 as cv 
import glob
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler,Subset
from torchvision import transforms
from IPython.display import HTML
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torchvision  import datasets 
from sklearn.model_selection import KFold
import optuna
from optuna.trial import Trial
from tqdm import tqdm
import torch.optim as optim
import torch.nn as nn
from torchvision import models
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import Subset, DataLoader

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
combined_data = "/kaggle/input/deepfake-and-real-images/Dataset/Test"

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),         # Convert to Tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize
])

In [6]:
combinedData = datasets.ImageFolder(combined_data, transform=transform)

In [7]:
combinedData

Dataset ImageFolder
    Number of datapoints: 10905
    Root location: /kaggle/input/deepfake-and-real-images/Dataset/Test
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
           )

In [8]:
kf = KFold(n_splits=3, shuffle=True, random_state=42) 

In [9]:
 b samples = len(combinedData)
samples

10905

In [10]:
folds = list(kf.split(np.arange(samples)))

In [11]:
folds

[(array([    1,     2,     4, ..., 10900, 10901, 10902]),
  array([    0,     3,     8, ..., 10899, 10903, 10904])),
 (array([    0,     1,     3, ..., 10902, 10903, 10904]),
  array([    2,    15,    18, ..., 10889, 10900, 10901])),
 (array([    0,     2,     3, ..., 10901, 10903, 10904]),
  array([    1,     4,     5, ..., 10896, 10897, 10902]))]

# Split our data into 3 Kfolds

In [12]:
for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(np.arange(samples))):
    print(f"Fold {fold_idx + 1}")
    
    train_size = int(0.75* len(train_val_idx)) 
    val_size = len(train_val_idx) - train_size #25
    
    train_idx = train_val_idx[:train_size]
    val_idx = train_val_idx[train_size:]
    
    # Create subsets
    train_set = Subset(combinedData, train_idx)
    val_set = Subset(combinedData, val_idx)
    test_set = Subset(combinedData, test_idx)
    
    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=32, shuffle=False)
    
    print(f"Train size: {len(train_set)}")
    print(f"Validation size: {len(val_set)}")
    print(f"Test size: {len(test_set)}")

Fold 1
Train size: 5452
Validation size: 1818
Test size: 3635
Fold 2
Train size: 5452
Validation size: 1818
Test size: 3635
Fold 3
Train size: 5452
Validation size: 1818
Test size: 3635


## Applying Fine-tune

In [13]:
def get_model(name):
    if name == "vit_b_16":
        # Load the pretrained Vision Transformer model
        model = models.vit_b_16(pretrained=True)

        # Freeze all layers initially
        for param in model.parameters():
            param.requires_grad = False

        # Fine-tune the last transformer block
        for param in model.encoder.layers[-1].parameters():
            param.requires_grad = True

        # Modify the classifier for the desired number of output classes
        num_features = model.heads.head.in_features
        model.heads.head = nn.Linear(num_features, 2)

        # Ensure the classifier is trainable
        for param in model.heads.head.parameters():
            param.requires_grad = True

        # Print the number of trainable parameters
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"Total trainable parameters: {trainable_params:,}")

    else:
        raise ValueError("Model name must be 'vit_b_16'")

    return model

## Training Function 

In [14]:
def TrainingModels(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)  #loss function used to compute the error between predictions and true labels
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1) # max score from the scores of two classes
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [15]:
def evaluate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return running_loss / total, correct / total

In [16]:
def LR_optimization(trial: Trial):
    # Use a smaller learning rate range for fine-tuning
    lr = trial.suggest_loguniform("lr", 1e-6, 1e-3)
    
    # Rest of the function remains the same
    fold_results = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(np.arange(len(combinedData)))):  # Fixed parenthesis
        print(f"\nFold {fold_idx + 1}")
        
        train_size = int(0.75 * len(train_val_idx))
        train_idx = train_val_idx[:train_size]
        val_idx = train_val_idx[train_size:]

        train_set = Subset(combinedData, train_idx)
        val_set = Subset(combinedData, val_idx)
        test_set = Subset(combinedData, test_idx)

        train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

        for model_name in ["vit_b_16"]:
            print(f"\nFine-tuning model: {model_name.upper()} on Fold {fold_idx + 1}")
            
            model = get_model(model_name).to(device)
            criterion = nn.CrossEntropyLoss()
            
            # Use a smaller weight decay for fine-tuning
            optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
            
            # Add learning rate scheduler for fine-tuning
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='max', factor=0.5, patience=2, verbose=True
            )

            for epoch in range(5):
                current_lr = optimizer.param_groups[0]["lr"]
                print(f"\nEpoch {epoch + 1}/5 | Learning Rate: {current_lr:.6f}")
                
                train_loss, train_acc = TrainingModels(model, train_loader, criterion, optimizer)
                
                # Evaluate after training each epoch
                val_loss, val_acc = evaluate(model, val_loader, criterion)

                # Update learning rate based on validation accuracy
                scheduler.step(val_acc)

                print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
                print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
            
            fold_results.append(val_acc)

    mean_val_acc = sum(fold_results) / len(fold_results)
    return mean_val_acc


In [17]:
study = optuna.create_study(direction="maximize")
study.optimize(LR_optimization, n_trials=5)

best_trial = study.best_trial
print("\nBest Trial:")
print(f"  Value (Mean Validation Accuracy): {best_trial.value}")
print(f"  Params: {best_trial.params}")

[I 2024-12-28 15:54:41,237] A new study created in memory with name: no-name-1802b02a-e357-4435-8b52-85d6da53b3d0
  lr = trial.suggest_loguniform("lr", 1e-6, 1e-3)



Fold 1

Fine-tuning model: VIT_B_16 on Fold 1


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:01<00:00, 194MB/s]  


Total trainable parameters: 7,089,410





Epoch 1/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.5484, Train Acc: 0.7078
Val Loss: 0.8187, Val Acc: 0.5286

Epoch 2/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.4165, Train Acc: 0.8014
Val Loss: 1.1067, Val Acc: 0.4109

Epoch 3/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.3083, Train Acc: 0.8632
Val Loss: 0.5431, Val Acc: 0.7662

Epoch 4/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.2058, Train Acc: 0.9209
Val Loss: 0.8493, Val Acc: 0.6601

Epoch 5/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.1081, Train Acc: 0.9672
Val Loss: 1.2486, Val Acc: 0.5864

Fold 2

Fine-tuning model: VIT_B_16 on Fold 2
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.5358, Train Acc: 0.7196
Val Loss: 0.4907, Val Acc: 0.7816

Epoch 2/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.3960, Train Acc: 0.8144
Val Loss: 0.9563, Val Acc: 0.5077

Epoch 3/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.2867, Train Acc: 0.8755
Val Loss: 0.8327, Val Acc: 0.6238

Epoch 4/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.1827, Train Acc: 0.9303
Val Loss: 0.9627, Val Acc: 0.6177

Epoch 5/5 | Learning Rate: 0.000042


                                                           

Train Loss: 0.0802, Train Acc: 0.9796
Val Loss: 0.7183, Val Acc: 0.7343

Fold 3

Fine-tuning model: VIT_B_16 on Fold 3
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.5393, Train Acc: 0.7183
Val Loss: 0.6812, Val Acc: 0.6381

Epoch 2/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.3900, Train Acc: 0.8212
Val Loss: 0.5668, Val Acc: 0.7173

Epoch 3/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.2776, Train Acc: 0.8833
Val Loss: 0.6085, Val Acc: 0.7316

Epoch 4/5 | Learning Rate: 0.000084


                                                           

Train Loss: 0.1813, Train Acc: 0.9316
Val Loss: 1.0244, Val Acc: 0.6023

Epoch 5/5 | Learning Rate: 0.000084


[I 2024-12-28 16:30:36,963] Trial 0 finished with value: 0.6362302896956362 and parameters: {'lr': 8.350466916021824e-05}. Best is trial 0 with value: 0.6362302896956362.


Train Loss: 0.0968, Train Acc: 0.9707
Val Loss: 1.1847, Val Acc: 0.5880

Fold 1

Fine-tuning model: VIT_B_16 on Fold 1
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.5133, Train Acc: 0.7395
Val Loss: 1.2326, Val Acc: 0.2519

Epoch 2/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.3239, Train Acc: 0.8564
Val Loss: 0.2659, Val Acc: 0.8916

Epoch 3/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.1904, Train Acc: 0.9235
Val Loss: 0.4964, Val Acc: 0.7932

Epoch 4/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.1124, Train Acc: 0.9567
Val Loss: 0.7717, Val Acc: 0.7706

Epoch 5/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.0604, Train Acc: 0.9771
Val Loss: 0.6730, Val Acc: 0.8036

Fold 2

Fine-tuning model: VIT_B_16 on Fold 2
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.5082, Train Acc: 0.7443
Val Loss: 0.4291, Val Acc: 0.8053

Epoch 2/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.3348, Train Acc: 0.8512
Val Loss: 0.4364, Val Acc: 0.8141

Epoch 3/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.2054, Train Acc: 0.9175
Val Loss: 0.8432, Val Acc: 0.6298

Epoch 4/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.1210, Train Acc: 0.9525
Val Loss: 0.5706, Val Acc: 0.8042

Epoch 5/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.0654, Train Acc: 0.9765
Val Loss: 0.9333, Val Acc: 0.7723

Fold 3

Fine-tuning model: VIT_B_16 on Fold 3
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.5200, Train Acc: 0.7388
Val Loss: 0.5078, Val Acc: 0.7789

Epoch 2/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.3310, Train Acc: 0.8514
Val Loss: 0.9639, Val Acc: 0.5578

Epoch 3/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.2294, Train Acc: 0.9054
Val Loss: 0.7784, Val Acc: 0.6799

Epoch 4/5 | Learning Rate: 0.000513


                                                           

Train Loss: 0.1160, Train Acc: 0.9569
Val Loss: 0.5157, Val Acc: 0.8036

Epoch 5/5 | Learning Rate: 0.000513


[I 2024-12-28 17:06:03,226] Trial 1 finished with value: 0.759075907590759 and parameters: {'lr': 0.0005134706023131347}. Best is trial 1 with value: 0.759075907590759.


Train Loss: 0.0717, Train Acc: 0.9721
Val Loss: 1.0963, Val Acc: 0.7013

Fold 1

Fine-tuning model: VIT_B_16 on Fold 1
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.5436, Train Acc: 0.7230
Val Loss: 0.7501, Val Acc: 0.5957

Epoch 2/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.3376, Train Acc: 0.8487
Val Loss: 1.3913, Val Acc: 0.4098

Epoch 3/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.2321, Train Acc: 0.9041
Val Loss: 0.4206, Val Acc: 0.8399

Epoch 4/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.1635, Train Acc: 0.9387
Val Loss: 0.4226, Val Acc: 0.8223

Epoch 5/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.0833, Train Acc: 0.9688
Val Loss: 0.9924, Val Acc: 0.7596

Fold 2

Fine-tuning model: VIT_B_16 on Fold 2
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.5494, Train Acc: 0.7163
Val Loss: 0.5861, Val Acc: 0.7305

Epoch 2/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.3432, Train Acc: 0.8461
Val Loss: 0.4925, Val Acc: 0.7844

Epoch 3/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.2102, Train Acc: 0.9158
Val Loss: 0.7538, Val Acc: 0.6914

Epoch 4/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.1241, Train Acc: 0.9521
Val Loss: 0.8759, Val Acc: 0.7305

Epoch 5/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.0735, Train Acc: 0.9736
Val Loss: 1.3113, Val Acc: 0.6645

Fold 3

Fine-tuning model: VIT_B_16 on Fold 3
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.5173, Train Acc: 0.7438
Val Loss: 0.7817, Val Acc: 0.5633

Epoch 2/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.3122, Train Acc: 0.8637
Val Loss: 0.3710, Val Acc: 0.8421

Epoch 3/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.2002, Train Acc: 0.9182
Val Loss: 0.8981, Val Acc: 0.6452

Epoch 4/5 | Learning Rate: 0.000996


                                                           

Train Loss: 0.1293, Train Acc: 0.9501
Val Loss: 1.0009, Val Acc: 0.6958

Epoch 5/5 | Learning Rate: 0.000996


[I 2024-12-28 17:41:30,598] Trial 2 finished with value: 0.6987532086541988 and parameters: {'lr': 0.0009957911247166083}. Best is trial 1 with value: 0.759075907590759.


Train Loss: 0.0884, Train Acc: 0.9657
Val Loss: 0.9443, Val Acc: 0.6722

Fold 1

Fine-tuning model: VIT_B_16 on Fold 1
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.5453, Train Acc: 0.7093
Val Loss: 0.6409, Val Acc: 0.7129

Epoch 2/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.3683, Train Acc: 0.8292
Val Loss: 0.4905, Val Acc: 0.7783

Epoch 3/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.2223, Train Acc: 0.9098
Val Loss: 0.8035, Val Acc: 0.7332

Epoch 4/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.1404, Train Acc: 0.9470
Val Loss: 1.0448, Val Acc: 0.6436

Epoch 5/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.0659, Train Acc: 0.9758
Val Loss: 1.8934, Val Acc: 0.5545

Fold 2

Fine-tuning model: VIT_B_16 on Fold 2
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.5220, Train Acc: 0.7414
Val Loss: 0.3449, Val Acc: 0.8751

Epoch 2/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.3472, Train Acc: 0.8428
Val Loss: 0.3829, Val Acc: 0.8443

Epoch 3/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.2120, Train Acc: 0.9121
Val Loss: 0.6637, Val Acc: 0.7184

Epoch 4/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.1340, Train Acc: 0.9477
Val Loss: 1.6524, Val Acc: 0.5600

Epoch 5/5 | Learning Rate: 0.000291


                                                           

Train Loss: 0.0485, Train Acc: 0.9829
Val Loss: 1.3389, Val Acc: 0.7547

Fold 3

Fine-tuning model: VIT_B_16 on Fold 3
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.5183, Train Acc: 0.7467
Val Loss: 1.0021, Val Acc: 0.4378

Epoch 2/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.3254, Train Acc: 0.8590
Val Loss: 0.8301, Val Acc: 0.6078

Epoch 3/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.2035, Train Acc: 0.9202
Val Loss: 0.6162, Val Acc: 0.7492

Epoch 4/5 | Learning Rate: 0.000581


                                                           

Train Loss: 0.1200, Train Acc: 0.9552
Val Loss: 1.3979, Val Acc: 0.5594

Epoch 5/5 | Learning Rate: 0.000581


[I 2024-12-28 18:16:59,991] Trial 3 finished with value: 0.6971030436376972 and parameters: {'lr': 0.0005814965816558238}. Best is trial 1 with value: 0.759075907590759.


Train Loss: 0.0804, Train Acc: 0.9721
Val Loss: 0.7193, Val Acc: 0.7822

Fold 1

Fine-tuning model: VIT_B_16 on Fold 1
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.6554, Train Acc: 0.6337
Val Loss: 1.0649, Val Acc: 0.1067

Epoch 2/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.6186, Train Acc: 0.6599
Val Loss: 1.0095, Val Acc: 0.1579

Epoch 3/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5918, Train Acc: 0.6752
Val Loss: 1.0195, Val Acc: 0.1755

Epoch 4/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5706, Train Acc: 0.6904
Val Loss: 0.9468, Val Acc: 0.2679

Epoch 5/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5525, Train Acc: 0.6990
Val Loss: 0.8949, Val Acc: 0.3410

Fold 2

Fine-tuning model: VIT_B_16 on Fold 2
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.6305, Train Acc: 0.6526
Val Loss: 1.0166, Val Acc: 0.1700

Epoch 2/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5972, Train Acc: 0.6803
Val Loss: 0.9607, Val Acc: 0.2360

Epoch 3/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5753, Train Acc: 0.6891
Val Loss: 0.9715, Val Acc: 0.2376

Epoch 4/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5559, Train Acc: 0.7041
Val Loss: 0.9054, Val Acc: 0.3240

Epoch 5/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5384, Train Acc: 0.7135
Val Loss: 0.8971, Val Acc: 0.3509

Fold 3

Fine-tuning model: VIT_B_16 on Fold 3
Total trainable parameters: 7,089,410

Epoch 1/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.6234, Train Acc: 0.6680
Val Loss: 1.0332, Val Acc: 0.1199

Epoch 2/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5886, Train Acc: 0.6878
Val Loss: 1.0079, Val Acc: 0.1689

Epoch 3/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5663, Train Acc: 0.7008
Val Loss: 0.9541, Val Acc: 0.2519

Epoch 4/5 | Learning Rate: 0.000003


                                                           

Train Loss: 0.5479, Train Acc: 0.7117
Val Loss: 0.9499, Val Acc: 0.2827

Epoch 5/5 | Learning Rate: 0.000003


[I 2024-12-28 18:52:33,229] Trial 4 finished with value: 0.3384671800513384 and parameters: {'lr': 3.0948580585533394e-06}. Best is trial 1 with value: 0.759075907590759.


Train Loss: 0.5328, Train Acc: 0.7208
Val Loss: 0.9280, Val Acc: 0.3234

Best Trial:
  Value (Mean Validation Accuracy): 0.759075907590759
  Params: {'lr': 0.0005134706023131347}


# After the Optuna optimization process, we will use the best trial to set the learning rate

In [18]:
# Best learning rate from Optuna
best_lr = 0.0005134706023131347

In [19]:
final_results = {}

for model_name in ["vit_b_16"]:
    print(f"\nTraining model: {model_name.upper()} with best learning rate {best_lr}")

    model = get_model(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=best_lr)

    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

    for epoch in range(10):
        print(f"\nEpoch {epoch + 1}/10")
        train_loss, train_acc = TrainingModels(model, train_loader, criterion, optimizer)
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")

    test_loss, test_acc = evaluate(model, test_loader, criterion)
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

    final_results[model_name] = {
        "Test Loss": test_loss,
        "Test Accuracy": test_acc
    }

print("\nFinal Results:")
for model_name, metrics in final_results.items():
    print(f"{model_name.upper()}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.4f}")



Training model: VIT_B_16 with best learning rate 0.0005134706023131347
Total trainable parameters: 7,089,410

Epoch 1/10


                                                           

Train Loss: 0.5065, Train Acc: 0.7428

Epoch 2/10


                                                           

Train Loss: 0.3093, Train Acc: 0.8672

Epoch 3/10


                                                           

Train Loss: 0.1925, Train Acc: 0.9184

Epoch 4/10


                                                           

Train Loss: 0.1255, Train Acc: 0.9496

Epoch 5/10


                                                           

Train Loss: 0.0562, Train Acc: 0.9780

Epoch 6/10


                                                           

Train Loss: 0.0405, Train Acc: 0.9850

Epoch 7/10


                                                           

Train Loss: 0.0334, Train Acc: 0.9895

Epoch 8/10


                                                           

Train Loss: 0.0233, Train Acc: 0.9930

Epoch 9/10


                                                           

Train Loss: 0.0361, Train Acc: 0.9859

Epoch 10/10


                                                           

Train Loss: 0.0125, Train Acc: 0.9952
Test Loss: 1.0857, Test Acc: 0.8223

Final Results:
VIT_B_16:
  Test Loss: 1.0857
  Test Accuracy: 0.8223


In [21]:
# Function to evaluate the model and get performance metrics
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return np.array(all_preds), np.array(all_labels)

# Initialize cross-validation metrics storage
confusion_matrices = []
accuracies = []
precisions = []
recalls = []
f1_scores = []

best_lr = 0.0005134706023131347

for model_name in ["vit_b_16"]:
    print(f"\nTraining model: {model_name.upper()} with best learning rate {best_lr}")
    
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(np.arange(samples))):
        print(f"Fold {fold_idx + 1}")

        train_size = int(0.75 * len(train_val_idx))
        train_idx = train_val_idx[:train_size]
        val_idx = train_val_idx[train_size:]

        train_set = Subset(combinedData, train_idx)
        val_set = Subset(combinedData, val_idx)
        test_set = Subset(combinedData, test_idx)

        train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_set, batch_size=32, shuffle=False)
        
        
        model = get_model(model_name).to(device)
        optimizer = optim.Adam(model.parameters(), lr=best_lr)
        criterion = nn.CrossEntropyLoss()

        # Train the model
        for epoch in range(10):
            model.train()
            running_loss = 0.0
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            print(f"Epoch {epoch+1}/10, Loss: {running_loss / len(train_loader):.4f}")

    
        preds, labels = evaluate_model(model, test_loader)

        
        accuracy = accuracy_score(labels, preds)
        precision = precision_score(labels, preds, average='weighted', zero_division=0)
        recall = recall_score(labels, preds, average='weighted', zero_division=0)
        f1 = f1_score(labels, preds, average='weighted', zero_division=0)
        cm = confusion_matrix(labels, preds)

        # Store metrics for the fold
        confusion_matrices.append(cm)
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1)

        print(f"Fold {fold_idx + 1} - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")
        print(f"Confusion Matrix:\n{cm}\n")

    # Average metrics across folds
    avg_accuracy = np.mean(accuracies)
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)
    avg_f1_score = np.mean(f1_scores)
    avg_confusion_matrix = np.mean(confusion_matrices, axis=0)

    # Print final average metrics for the model
    print(f"\nFinal Results for {model_name.upper()}:")
    print(f"Average Accuracy: {avg_accuracy:.4f}")
    print(f"Average Precision: {avg_precision:.4f}")
    print(f"Average Recall: {avg_recall:.4f}")
    print(f"Average F1-Score: {avg_f1_score:.4f}")
    print(f"Average Confusion Matrix:\n{avg_confusion_matrix}")


Training model: VIT_B_16 with best learning rate 0.0005134706023131347
Fold 1
Total trainable parameters: 7,089,410
Epoch 1/10, Loss: 0.5391
Epoch 2/10, Loss: 0.3486
Epoch 3/10, Loss: 0.2191
Epoch 4/10, Loss: 0.1267
Epoch 5/10, Loss: 0.0732
Epoch 6/10, Loss: 0.0671
Epoch 7/10, Loss: 0.0462
Epoch 8/10, Loss: 0.0317
Epoch 9/10, Loss: 0.0115
Epoch 10/10, Loss: 0.0363
Fold 1 - Accuracy: 0.8080, Precision: 0.8308, Recall: 0.8080, F1-Score: 0.8037
Confusion Matrix:
[[1759  105]
 [ 593 1178]]

Fold 2




Total trainable parameters: 7,089,410
Epoch 1/10, Loss: 0.5174
Epoch 2/10, Loss: 0.3144
Epoch 3/10, Loss: 0.1746
Epoch 4/10, Loss: 0.1094
Epoch 5/10, Loss: 0.0523
Epoch 6/10, Loss: 0.0500
Epoch 7/10, Loss: 0.0351
Epoch 8/10, Loss: 0.0321
Epoch 9/10, Loss: 0.0233
Epoch 10/10, Loss: 0.0203
Fold 2 - Accuracy: 0.8072, Precision: 0.8235, Recall: 0.8072, F1-Score: 0.8044
Confusion Matrix:
[[1690  144]
 [ 557 1244]]

Fold 3




Total trainable parameters: 7,089,410
Epoch 1/10, Loss: 0.5146
Epoch 2/10, Loss: 0.3499
Epoch 3/10, Loss: 0.2068
Epoch 4/10, Loss: 0.1364
Epoch 5/10, Loss: 0.0741
Epoch 6/10, Loss: 0.0643
Epoch 7/10, Loss: 0.0348
Epoch 8/10, Loss: 0.0235
Epoch 9/10, Loss: 0.0279
Epoch 10/10, Loss: 0.0164
Fold 3 - Accuracy: 0.8085, Precision: 0.8199, Recall: 0.8085, F1-Score: 0.8071
Confusion Matrix:
[[1614  180]
 [ 516 1325]]


Final Results for VIT_B_16:
Average Accuracy: 0.8079
Average Precision: 0.8247
Average Recall: 0.8079
Average F1-Score: 0.8051
Average Confusion Matrix:
[[1687.66666667  143.        ]
 [ 555.33333333 1249.        ]]
