EfficientNet V2 M

Import Necessary Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, models
from torch.cuda.amp import autocast, GradScaler
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import seaborn as sns
import os

In [2]:
# Check if CUDA (GPU) is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA is available! GPU: {torch.cuda.get_device_name(0)}")

    # Check if the GPU supports mixed precision (AMP)
    compute_capability = torch.cuda.get_device_capability(device)
    print(f"Compute Capability: {compute_capability}")

    if compute_capability[0] >= 7:  # Tensor Cores require Compute Capability 7.0+
        print("✅ Your GPU supports Mixed Precision Training (AMP)!")
    else:
        print("❌ Your GPU does NOT fully support Mixed Precision Training.")
else:
    print("❌ CUDA is not available. Running on CPU.")

CUDA is available! GPU: NVIDIA GeForce RTX 3060 Laptop GPU
Compute Capability: (8, 6)
✅ Your GPU supports Mixed Precision Training (AMP)!


In [2]:
# Define custom dataset path and constants
data_dir = ''  # Replace with your dataset path
num_classes = 3 # Three classes because of only three grades
batch_size = 16 # Sets the number of images per batch. High number uses more memory and allows less chances for model to update gradients
num_epochs = 50
learning_rate = 0.001 # Sets the initial learning rate that causes the model to update gradients.
top_k = 3  # For top-k accuracy

# Define checkpoint directory
checkpoint_dir = 'checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

Dataset Management

In [None]:
# Data transformations
train_transforms = transforms.Compose([
    transforms.Resize((480, 480)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((480, 480)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize((480, 480)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = datasets.ImageFolder(root=f'{data_dir}/train', transform=train_transforms)
val_dataset = datasets.ImageFolder(root=f'{data_dir}/val', transform=val_transforms)
test_dataset = datasets.ImageFolder(root=f'{data_dir}/test', transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Loading Model

In [None]:
# Load EfficientNet V2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = models.efficientnet_v2_m(weights='IMAGENET1K_V1')
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model = model.to(device)

# Enable Torch 2.0 compilation for faster training
model = torch.compile(model)

# Define loss function, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-6)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

Initial Variables

In [6]:
# Training and validation
best_val_accuracy = 0.0
class_names = train_dataset.classes

# Metrics for plotting
train_accuracies = []
val_accuracies = []
top_k_accuracies = []

torch.cuda.empty_cache()

Helper Functions

In [None]:
# Function to save checkpoint
def save_checkpoint(epoch, model, optimizer, scheduler, best_val_accuracy, filename='checkpoint.pth'):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'best_val_accuracy': best_val_accuracy,
        'train_accuracies': train_accuracies,
        'val_accuracies': val_accuracies,
        'top_k_accuracies': top_k_accuracies
    }
    torch.save(checkpoint, os.path.join(checkpoint_dir, filename))

# Function to load checkpoint
def load_checkpoint(model, optimizer, scheduler, filename='checkpoint.pth'):
    checkpoint = torch.load(os.path.join(checkpoint_dir, filename))
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    epoch = checkpoint['epoch']
    best_val_accuracy = checkpoint['best_val_accuracy']
    train_accuracies = checkpoint['train_accuracies']
    val_accuracies = checkpoint['val_accuracies']
    top_k_accuracies = checkpoint['top_k_accuracies']
    return epoch, best_val_accuracy, train_accuracies, val_accuracies, top_k_accuracies

def calculate_metrics(y_true, y_pred, class_names):
    conf_matrix = confusion_matrix(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=class_names)
    return conf_matrix, accuracy, report

def top_k_accuracy(output, target, k=3):
    with torch.no_grad():
        _, pred = output.topk(k, dim=1)
        correct = pred.eq(target.view(-1, 1).expand_as(pred))
        return correct.sum().item() / target.size(0)

Checkpointing

In [None]:
# Check if a checkpoint exists and load it
checkpoint_filename = 'checkpoint.pth'
start_epoch = 0
if os.path.exists(os.path.join(checkpoint_dir, checkpoint_filename)):
    print("Loading checkpoint...")
    start_epoch, best_val_accuracy, train_accuracies, val_accuracies, top_k_accuracies = load_checkpoint(model, optimizer, scheduler, checkpoint_filename)
    print(f"Resuming training from epoch {start_epoch + 1}")
else:
    print("No checkpoint found. Starting training from scratch.")

# Check if best_model.pth or last_model.pth exists
best_model_path = 'best_model.pth'
last_model_path = 'last_model.pth'

if os.path.exists(best_model_path):
    print("Loading best model...")
    model.load_state_dict(torch.load(best_model_path))
    print("Resuming training from the best model.")
elif os.path.exists(last_model_path):
    print("Loading last model...")
    model.load_state_dict(torch.load(last_model_path))
    print("Resuming training from the last model.")
elif os.path.exists(os.path.join(checkpoint_dir, checkpoint_filename)):
    print("Loading checkpoint...")
    start_epoch, best_val_accuracy, train_accuracies, val_accuracies, top_k_accuracies = load_checkpoint(model, optimizer, scheduler, checkpoint_filename)
    print(f"Resuming training from epoch {start_epoch + 1}")
else:
    print("No checkpoint or model found. Starting training from scratch.")


Training and Validation Loops

In [None]:
# Enables CuDNN benchmarking, which optimizes GPU performance by selecting the fastest convolution algorithms for your model.
# Helps if input sizes don’t change much (like the fixed 480x480 images).
torch.backends.cudnn.benchmark = True

writer = SummaryWriter(log_dir="runs/efficientnet_v2_m")

# Training and validation loop with checkpoint saving
for epoch in range(start_epoch, num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0

    scaler = GradScaler()  # Initialize gradient scaler

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):

        # Mixed Precision Training
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
    
        with autocast():  # Enable mixed precision
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()  # Scale gradients to prevent underflow
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # ✅ Prevent exploding gradients
        scaler.step(optimizer)  # Update weights
        scaler.update()  # Adjust scaling factor

        # ✅ Update loss and accuracy tracking
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    train_accuracies.append(epoch_acc.item())

    print(f"Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.4f}")

    # ✅ Log training loss and accuracy to TensorBoard
    writer.add_scalar("Loss/train", epoch_loss, epoch)
    writer.add_scalar("Accuracy/train", epoch_acc, epoch)

    # ================= Validation ==================
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    top_k_corrects = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)

            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)
            top_k_corrects += top_k_accuracy(outputs, labels, k=top_k) * labels.size(0)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    val_loss /= len(val_dataset)
    val_accuracy = val_corrects.double() / len(val_dataset)
    val_top_k_accuracy = top_k_corrects / len(val_dataset)  # ✅ Divide by total validation samples
    val_accuracies.append(val_accuracy.item())
    top_k_accuracies.append(val_top_k_accuracy)

    conf_matrix, accuracy, report = calculate_metrics(all_labels, all_preds, class_names)

    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}, Top-{top_k} Accuracy: {val_top_k_accuracy:.4f}")
    print("Classification Report:\n", report)

    # ✅ Log validation loss and accuracy to TensorBoard
    writer.add_scalar("Loss/val", val_loss, epoch)
    writer.add_scalar("Accuracy/val", val_accuracy, epoch)

    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_model.pth')
        print("Saved Best Model")

    # Save checkpoint at the end of each epoch
    save_checkpoint(epoch, model, optimizer, scheduler, best_val_accuracy, checkpoint_filename)
    print(f"Checkpoint saved at epoch {epoch + 1}")

    torch.save(model.state_dict(), 'last_model.pth')

    writer.add_scalar("Learning Rate", optimizer.param_groups[0]['lr'], epoch)  # ✅ Log before LR update
    scheduler.step()

writer.close()  # ✅ Closes TensorBoard writer

# Open terminal and run tensorboard --logdir=runs

Accuracy Over Epochs Graph

In [None]:
# plt.figure(figsize=(12, 6))
# plt.plot(range(1, num_epochs + 1), train_accuracies, label='Training Accuracy')
# plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy')
# plt.title('Training and Validation Accuracy')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.grid()
# plt.savefig('train-val-acc.png')
# plt.show()

plt.figure(figsize=(12, 6))
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Training Accuracy', marker='o')
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy', marker='s')
plt.fill_between(range(1, num_epochs + 1), 
                 np.array(val_accuracies) - 0.01, 
                 np.array(val_accuracies) + 0.01, 
                 color='b', alpha=0.1)  # Shaded variance region
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()
plt.savefig('train-val-acc.png')
plt.show()

Top-K Accuracy

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Top-1 Validation Accuracy', marker='o')
plt.plot(range(1, num_epochs + 1), top_k_accuracies, label=f'Top-{top_k} Validation Accuracy', marker='s')
plt.title('Top-1 and Top-k Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()
plt.savefig('top-k-accuracy.png')
plt.show()


Regular Confusion Matrix

In [None]:
# Confusion matrix (in number)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap=plt.cm.Blues, xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion-matrix.png')
plt.show()

Normalized Confusion Matrix


In [None]:
row_sums = conf_matrix.sum(axis=1, keepdims=True)
row_sums[row_sums == 0] = 1  # ✅ Prevent division by zero
conf_matrix_normalized = conf_matrix.astype('float') / row_sums

# Confusion matrix
plt.figure(figsize=(10, 8))

# Normalize confusion matrix
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]

sns.heatmap(conf_matrix_normalized, annot=True, fmt=".2f", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix (Normalized)")
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion-matrix.png')
plt.show()


Test validation Function

In [None]:
def evaluate_on_test_set(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    test_corrects = 0
    all_test_labels = []
    all_test_preds = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Evaluating on Test Set"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            test_corrects += torch.sum(preds == labels.data)

            all_test_labels.extend(labels.cpu().numpy())
            all_test_preds.extend(preds.cpu().numpy())

    test_loss /= len(test_dataset)
    test_accuracy = test_corrects.double() / len(test_dataset)

    conf_matrix, accuracy, report = calculate_metrics(all_test_labels, all_test_preds, class_names)

    print(f"\n✅ Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
    print("🔍 Classification Report:\n", report)

    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.title("Test Set Confusion Matrix")
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('test-confusion-matrix.png')
    plt.show()

In [None]:
# Evaluate model on the test set after training
evaluate_on_test_set(model, test_loader, criterion)