In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd 'project/path/'
%ls
import os
path = os.getcwd()
print('path: ' + path)
OutputPath = path + '/Output'
ModelPath = path + '/Models'

In [21]:
import numpy as np
import pandas as pd
import gzip
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

In [22]:
# Dataset function in DataLoader.py at work folder
from DataLoader import FashionMNISTDataset

In [23]:
# This row is for defining if running training section in this document
trainMode = False

In [24]:
# Data transform
transform = transforms.Compose([
    transforms.Normalize((0.5,), (0.5,))
])

data_path = path + '/FashionMNIST'

# Paths
train_images_path = os.path.join(data_path, 'train-images-idx3-ubyte.gz')
train_labels_path = os.path.join(data_path, 'train-labels-idx1-ubyte.gz')
test_images_path = os.path.join(data_path, 't10k-images-idx3-ubyte.gz')
test_labels_path = os.path.join(data_path, 't10k-labels-idx1-ubyte.gz')

# Dataset objects
full_train_dataset = FashionMNISTDataset(train_images_path, train_labels_path, transform=transform)
test_dataset = FashionMNISTDataset(test_images_path, test_labels_path, transform=transform)

# Trian n validation split
train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_train_dataset, [train_size, val_size])

# DataLoader objects
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

dataiter = iter(train_loader)
images, labels = next(dataiter)
print(f'Batch of images has shape: {images.shape}')
print(f'Batch of labels has shape: {labels.shape}')


Batch of images has shape: torch.Size([64, 1, 28, 28])
Batch of labels has shape: torch.Size([64])


In [25]:
#Base Model LeNet5
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.Tanh(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [26]:
#LeNet5 with dropout
class LeNet5Dropout(nn.Module):
    def __init__(self):
        super(LeNet5Dropout, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, padding=2),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.Tanh(),
            nn.Dropout(0.2),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(84, 10),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [27]:
#LeNet5 with Batch Normalization
class LeNet5BatchNorm(nn.Module):
    def __init__(self):
        super(LeNet5BatchNorm, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, padding=2),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.BatchNorm1d(120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.BatchNorm1d(84),
            nn.ReLU(),
            nn.Linear(84, 10),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [28]:
# Model test function
def test_model(model, data_loader, eval_dropout=False):
    if eval_dropout:
        model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    if eval_dropout:
        model.train()
    return accuracy


In [29]:
# Model train function + saving accuracies
def train_model(model, train_loader, test_loader, criterion, optimizer, n_epochs=10, eval_dropout=False):
    train_accuracies = []
    test_accuracies = []

    for epoch in range(n_epochs):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Calculate accuracy for train and test data
        train_acc = test_model(model, train_loader, eval_dropout)
        test_acc = test_model(model, test_loader)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        print(f'[Epoch {epoch + 1}] Train Acc: {train_acc:.2f}%, Validation Acc: {test_acc:.2f}%')

    print('Finished Training')
    return train_accuracies, test_accuracies


In [30]:
def save_model(model, name):
    path = os.path.join(ModelPath, name)
    torch.save(model.state_dict(), path)
    print(f'Model saved to {path}')

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    model.eval()
    print(f'Model loaded from {path}')

In [31]:
if trainMode:
    # Define the models to train
    model_base = LeNet5()
    model_dropout = LeNet5Dropout()
    model_batchnorm = LeNet5BatchNorm()
    model_base_wd = LeNet5()

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Training parameters
    learning_rate = 0.0002
    n_epochs = 50
    weight_decay = 0.0005

    # Optimizers
    optimizer_base = optim.Adam(model_base.parameters(), lr=learning_rate)
    optimizer_dropout = optim.Adam(model_dropout.parameters(), lr=learning_rate)
    optimizer_batchnorm = optim.Adam(model_batchnorm.parameters(), lr=learning_rate)
    optimizer_base_wd = optim.Adam(model_base_wd.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [32]:
if trainMode:
    # Train and save the base model
    print("Training the base LeNet-5 model:")
    train_accuracies_base, val_accuracies_base = train_model(model_base, train_loader, val_loader, criterion, optimizer_base, n_epochs=n_epochs)
    save_model(model_base, 'lenet5_base.pth')

    # Train and save the model with dropout
    print("Training the LeNet-5 model with Dropout:")
    train_accuracies_dropout, val_accuracies_dropout = train_model(model_dropout, train_loader, val_loader, criterion, optimizer_dropout, n_epochs=n_epochs, eval_dropout=True)
    save_model(model_dropout, 'lenet5_dropout.pth')

    # Train and save the model with batch normalization
    print("Training the LeNet-5 model with Batch Normalization:")
    train_accuracies_batchnorm, val_accuracies_batchnorm = train_model(model_batchnorm, train_loader, val_loader, criterion, optimizer_batchnorm, n_epochs=n_epochs)
    save_model(model_batchnorm, 'lenet5_batchnorm.pth')

    # Train and save the base model with weight decay
    print("Training the base LeNet-5 model with Weight Decay:")
    train_accuracies_base_wd, val_accuracies_base_wd = train_model(model_base_wd, train_loader, val_loader, criterion, optimizer_base_wd, n_epochs=n_epochs)
    save_model(model_base_wd, 'lenet5_base_wd.pth')

    # Save all train and test accuracies per epoch
    data = {
        'Epoch': list(range(1, n_epochs + 1)),
        'Train_Accuracy_Base': train_accuracies_base,
        'Validaiton_Accuracy_Base': val_accuracies_base,
        'Train_Accuracy_Dropout': train_accuracies_dropout,
        'Validaiton_Accuracy_Dropout': val_accuracies_dropout,
        'Train_Accuracy_BatchNorm': train_accuracies_batchnorm,
        'Validaiton_Accuracy_BatchNorm': val_accuracies_batchnorm,
        'Train_Accuracy_Base_WD': train_accuracies_base_wd,
        'Validaiton_Accuracy_Base_WD': val_accuracies_base_wd,
    }

    df_accuracies = pd.DataFrame(data)
    df_accuracies.to_csv(os.path.join(OutputPath,'model_accuracies_per_epoch.csv'), index=False)
    print("Model accuracies per epoch saved to 'model_accuracies_per_epoch.csv'")


In [33]:
def plot_convergence_graphs(train_accuracies_base, val_accuracies_base,
                            train_accuracies_dropout, val_accuracies_dropout,
                            train_accuracies_batchnorm, val_accuracies_batchnorm,
                            train_accuracies_base_wd, val_accuracies_base_wd):
    epochs = range(1, len(train_accuracies_base) + 1)

    fig, axs = plt.subplots(2, 2, figsize=(12, 10))

    # Plot Base LeNet-5
    axs[0, 0].plot(epochs, train_accuracies_base, label='Train Accuracy')
    axs[0, 0].plot(epochs, val_accuracies_base, label='Validation Accuracy')
    axs[0, 0].set_title('LeNet-5')
    axs[0, 0].set_xlabel('Epochs')
    axs[0, 0].set_ylabel('Accuracy (%)')
    axs[0,0].grid(True)
    axs[0, 0].legend()

    # Plot LeNet-5 with Dropout
    axs[0, 1].plot(epochs, train_accuracies_dropout, label='Train Accuracy')
    axs[0, 1].plot(epochs, val_accuracies_dropout, label='Validation Accuracy')
    axs[0, 1].set_title('LeNet-5 with Dropout')
    axs[0, 1].set_xlabel('Epochs')
    axs[0, 1].set_ylabel('Accuracy (%)')
    axs[0, 1].grid(True)
    axs[0, 1].legend()

    # Plot LeNet-5 with Batch Normalization
    axs[1, 0].plot(epochs, train_accuracies_batchnorm, label='Train Accuracy')
    axs[1, 0].plot(epochs, val_accuracies_batchnorm, label='Validation Accuracy')
    axs[1, 0].set_title('LeNet-5 with Batch Normalization')
    axs[1, 0].set_xlabel('Epochs')
    axs[1, 0].set_ylabel('Accuracy (%)')
    axs[1, 0].grid(True)
    axs[1, 0].legend()

    # Plot Base LeNet-5 with Weight Decay
    axs[1, 1].plot(epochs, train_accuracies_base_wd, label='Train Accuracy')
    axs[1, 1].plot(epochs, val_accuracies_base_wd, label='Validation Accuracy')
    axs[1, 1].set_title('LeNet-5 with Weight Decay')
    axs[1, 1].set_xlabel('Epochs')
    axs[1, 1].set_ylabel('Accuracy (%)')
    axs[1, 1].grid(True)
    axs[1, 1].legend()

    plt.tight_layout()
    #plt.savefig('convergence_graphs.png')
    plt.show()

    return fig

In [34]:
def load_models():
    # Define the models again
    model_base = LeNet5()
    model_dropout = LeNet5Dropout()
    model_batchnorm = LeNet5BatchNorm()
    model_base_wd = LeNet5()

    # Load model comstants
    load_model(model_base, os.path.join(ModelPath,'lenet5_base.pth'))
    load_model(model_dropout, os.path.join(ModelPath,'lenet5_dropout.pth'))
    load_model(model_batchnorm, os.path.join(ModelPath,'lenet5_batchnorm.pth'))
    load_model(model_base_wd, os.path.join(ModelPath,'lenet5_base_wd.pth'))

    return model_base, model_dropout, model_batchnorm, model_base_wd

In [35]:
def load_and_evaluate_models():
    # Define the models again
    model_base, model_dropout, model_batchnorm, model_base_wd = load_models()

    # Test models
    test_accuracy_base = test_model(model_base, test_loader)
    test_accuracy_dropout = test_model(model_dropout, test_loader)
    test_accuracy_batchnorm = test_model(model_batchnorm, test_loader)
    test_accuracy_base_wd = test_model(model_base_wd, test_loader)

    # Train history data from CSV
    accuracies_df = pd.read_csv(os.path.join(OutputPath,'model_accuracies_per_epoch.csv'))
    train_accuracies_base = accuracies_df['Train_Accuracy_Base']
    train_accuracies_dropout = accuracies_df['Train_Accuracy_Dropout']
    train_accuracies_batchnorm = accuracies_df['Train_Accuracy_BatchNorm']
    train_accuracies_base_wd = accuracies_df['Train_Accuracy_Base_WD']
    val_accuracies_base = accuracies_df['Validaiton_Accuracy_Base']
    val_accuracies_dropout = accuracies_df['Validaiton_Accuracy_Dropout']
    val_accuracies_batchnorm = accuracies_df['Validaiton_Accuracy_BatchNorm']
    val_accuracies_base_wd = accuracies_df['Validaiton_Accuracy_Base_WD']

    # Print summary table
    data = {
        "Model": [
            "Base LeNet-5",
            "LeNet-5 with Dropout",
            "LeNet-5 with Batch Normalization",
            "Base LeNet-5 with Weight Decay"
        ],
        "Final Train Accuracy (%)": [
            train_accuracies_base.iloc[-1],
            train_accuracies_dropout.iloc[-1],
            train_accuracies_batchnorm.iloc[-1],
            train_accuracies_base_wd.iloc[-1]
        ],
        "Final Validation Accuracy (%)": [
            val_accuracies_base.iloc[-1],
            val_accuracies_dropout.iloc[-1],
            val_accuracies_batchnorm.iloc[-1],
            val_accuracies_base_wd.iloc[-1]
        ],
        "Final Test Accuracy (%)": [
            test_accuracy_base,
            test_accuracy_dropout,
            test_accuracy_batchnorm,
            test_accuracy_base_wd
        ]
    }

    df = pd.DataFrame(data)
    print(df)

    # Save summary table as image
    df.to_csv(os.path.join(OutputPath,'summary_table.csv'))
    print("Summary table saved to 'summary_table.csv'")

    # Plot convergence graphs
    fig = plot_convergence_graphs(train_accuracies_base, val_accuracies_base,
                            train_accuracies_dropout, val_accuracies_dropout,
                            train_accuracies_batchnorm, val_accuracies_batchnorm,
                            train_accuracies_base_wd, val_accuracies_base_wd)
    return fig

In [None]:
# Loading and plotting
fig = load_and_evaluate_models()
fig.savefig('convergence_graphs.png')

In [37]:
# Un-comment following code lines in order to run different test set from *.gz files

#new_test_dataset = FashionMNISTDataset(test_images_path, test_labels_path, transform=transform)
#test_loader = DataLoader(new_test_dataset, batch_size=64, shuffle=False)
#test_accuracy = test_model(model, data_loader, eval_dropout=False)
#print('Test Acc: {test_accuracy:.2f}')