# Training and Optimizing VGG19

The following goes through the process of fine-tuning a pre-existing VGG19 convolution neural network using the CIFAR10 dataset, then optimizing using pruning

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split, DataLoader
from torchvision.datasets import CIFAR10
from torchvision import transforms
import torchvision.models as models

## Import VGG19 (Pretrained)

In [2]:
model = models.vgg19(pretrained=True)



## Import and Pre-process Dataset

### Import datasets

In [3]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [4]:
train_data = CIFAR10(
    root="./CIFAR10-Train",
    train=True,
    transform=preprocess,
    target_transform=None,
    download=True,
)

In [5]:
test_data = CIFAR10(
    root="./CIFAR10-Test",
    train=False,
    transform=preprocess,
    target_transform=None,
    download=True,
)

### Split dataset and show stats

In [6]:
# Index to split data
split_index = int(len(test_data) / 2)

split_size = len(test_data) // 2
test_set, val_set = random_split(test_data, [split_size, len(test_data) - split_size])

In [7]:
print('------------------------')
print('Stats on CIFA100 Dataset')
print('------------------------')
print(f'Train Dataset Size: {len(train_data)}')
print(f'Train - Number of Unique Targets: {len(train_data.classes)}')
print('')
print(f'Test Dataset Size: {len(test_set)}')
print(f'Test - Number of Unique Targets: {len(test_set.dataset.classes)}')
print('')
print(f'Val Dataset Size: {len(val_set)}')
print(f'Val - Number of Unique Targets: {len(val_set.dataset.classes)}')

------------------------
Stats on CIFA100 Dataset
------------------------
Train Dataset Size: 50000
Train - Number of Unique Targets: 10

Test Dataset Size: 5000
Test - Number of Unique Targets: 10

Val Dataset Size: 5000
Val - Number of Unique Targets: 10


### Pre-process and update model for classifier size

In [8]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False, num_workers=4)

In [9]:
num_classes = len(train_data.classes)

# Convert model's predefined output classifier to new classifier that has our actual number of classes
# Layer 6 is the classifier layer
model.classifier[6] = nn.Linear(4096, num_classes)

## Setting up Model for Training

In [10]:
# We use CrossEntropyLoss for classification loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
device = torch.device("mps" if torch.mps.is_available() else "cpu")
model = model.to(device)

## Defining Helper Functions

### Metric Calculation (F1, Recall, Precision)

In [12]:

def calculate_metrics(classes, df:pd.DataFrame):
    df_copy = df.copy()
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    
    for i in classes:
        true_positives += len(df_copy[(df_copy['y_true'] == i) & (df_copy['y_pred'] == i)])
        false_negatives += len(df_copy[(df_copy['y_true'] == i) & (df_copy['y_pred'] != i)])
        false_positives += len(df_copy[(df_copy['y_true'] != i) & (df_copy['y_pred'] == i)])
    
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1 = 2 * ((precision * recall) / (precision + recall))
    
    return precision, recall, f1

### Train Function

In [13]:
from tqdm import tqdm

def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pred_v_true = pd.DataFrame(columns=['y_true','y_pred'])
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # Put back to CPU and put in proper format
        labels_cpu = labels.cpu().numpy()
        predicted_cpu = predicted.cpu().numpy()
        
        batch_df = pd.DataFrame({
            'y_true': labels_cpu, 
            'y_pred': predicted_cpu
        })
        
        pred_v_true = pd.concat([pred_v_true, batch_df], ignore_index=True)
        
    classes_num = range(1, len(train_data.classes) + 1)
    precision, recall, f1 = calculate_metrics(classes_num, pred_v_true)
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc, precision, recall, f1

### Validation Function

In [14]:
def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pred_v_true = pd.DataFrame(columns=['y_true','y_pred'])
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            # Put back to CPU and put in proper format
            labels_cpu = labels.cpu().numpy()
            predicted_cpu = predicted.cpu().numpy()
            
            batch_df = pd.DataFrame({
                'y_true': labels_cpu, 
                'y_pred': predicted_cpu
            })
            
            pred_v_true = pd.concat([pred_v_true, batch_df], ignore_index=True)
            
    classes_num = range(1, len(train_data.classes) + 1)
    precision, recall, f1 = calculate_metrics(classes_num, pred_v_true)
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc, precision, recall, f1

## Model Training

In [16]:
num_epochs = 10
best_acc = 0.0

# Create lists to store metrics for each epoch
training_history = {
    'epoch': [],
    'train_loss': [],
    'train_acc': [],
    'train_precision': [],
    'train_recall': [],
    'train_f1': [],
    'val_loss': [],
    'val_acc': [],
    'val_precision': [],
    'val_recall': [],
    'val_f1': []
}

# Training loop
for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    print('-' * 10)
    
    # Training phase 
    train_loss, train_acc, train_precision, train_recall, train_f1 = train_epoch(
        model, train_loader, criterion, optimizer, device
    )
    
    # Validation phase 
    val_loss, val_acc, val_precision, val_recall, val_f1 = validate(
        model, val_loader, criterion, device
    )
    
    # Store metrics in history dictionary
    training_history['epoch'].append(epoch + 1)
    training_history['train_loss'].append(train_loss)
    training_history['train_acc'].append(train_acc)
    training_history['train_precision'].append(train_precision)
    training_history['train_recall'].append(train_recall)
    training_history['train_f1'].append(train_f1)
    training_history['val_loss'].append(val_loss)
    training_history['val_acc'].append(val_acc)
    training_history['val_precision'].append(val_precision)
    training_history['val_recall'].append(val_recall)
    training_history['val_f1'].append(val_f1)
    
    # Print training metrics
    print(f'Train Loss: {train_loss:.4f} Acc: {train_acc:.2f}%')
    print(f'Train Precision: {train_precision:.4f} Recall: {train_recall:.4f} F1: {train_f1:.4f}')
    
    # Print validation metrics
    print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.2f}%')
    print(f'Val Precision: {val_precision:.4f} Recall: {val_recall:.4f} F1: {val_f1:.4f}')
    
    # Update best accuracy if current validation accuracy is better
    if val_acc > best_acc:
        best_acc = val_acc
        print(f'New best validation accuracy: {best_acc:.2f}%')
    
    print()

# Print final best accuracy
print(f'Training completed - Best validation accuracy: {best_acc:.2f}%')

# Create DataFrame from training history
training_df = pd.DataFrame(training_history)

# Export to CSV file
training_df.to_csv('./Training_Outputs/CIFAR10/training_history_CIFAR10.csv', index=False)
torch.save(model.state_dict(), './Training_Outputs/CIFAR10/CIFAR10_VGG19_Model.pth')

Epoch 1/10
----------


  0%|          | 0/1563 [00:00<?, ?it/s]