In [1]:
import os
import torch
from PIL import Image
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import numpy as np
from skimage import io, transform
import matplotlib.pyplot as plt
import time
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import torch.mps 
import itertools
import csv

# plt.ion() 
plt.gray()


<Figure size 640x480 with 0 Axes>

In [2]:
# %%time
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    print("MPS found")
    device = torch.device("mps")
    

MPS found


In [3]:
class FERPlusDataset(Dataset):
    """FERPlus dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.img_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_frame)

#     to access elements using the []
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

#   to create the image name
        img_name = os.path.join(self.root_dir, self.img_frame.iloc[idx, 0])

        image = io.imread(img_name)
        emotions = self.img_frame.iloc[idx, 2:]
        emotions = np.asarray(emotions)
        emotions = emotions.astype('float32')

        sample = {'image': image, 'emotions': emotions} # a dictionary of an image with its label
        if self.transform:
            sample = self.transform(sample)

        return sample #return a transformed image with label
    
    def to(self, device):
        # move to device
        return self.to(device)
        

In [4]:
#     class to transform to a normalized tensor (only the image pixel value is transformed)
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, emotions = sample['image'], sample['emotions']
        transform = transforms.ToTensor()

        return {'image': transform(image),
                'emotions': emotions}

In [5]:
train_folder_path = './data/FER2013Train'
test_folder_path = './data/FER2013Test'
valid_folder_path = './data/FER2013Valid'

In [6]:
train_dataset = FERPlusDataset(os.path.join(train_folder_path,"label.csv"), train_folder_path, transform=ToTensor())
valid_dataset = FERPlusDataset(os.path.join(valid_folder_path, "label.csv"), valid_folder_path, transform=ToTensor())
test_dataset = FERPlusDataset(os.path.join(test_folder_path, "label.csv"), test_folder_path, transform=ToTensor())

In [7]:
outputs_path = 'stats/outputs3'

In [8]:
def write_to_table (epochs, optimizer ,criterion, batch_size, learning_rate, activation_func, elapsed_time, train_accuracy, valid_accuracy, id, scheduler, device_name): 
    with open(f'{outputs_path}/statistics.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # if csvfile.tell() == 0:
        #     # Write the column headers
        #     writer.writerow(['trial', 'Batch size', 'Epochs', 'Activation function', 'Loss function', 'Initial Learning rate', 'Optimizer', 'Scheduler',
        #                      'Min training accuracy', 'Max training accuracy', 'Average training accuracy',
        #                      'Min validation accuracy', 'Max validation accuracy', 'Average validation accuracy',
        #                      'Total time'])

        # Write the row of data
        writer.writerow([id, batch_size, epochs, activation_func, criterion, learning_rate, optimizer, scheduler,
                         round(min(train_accuracy), 2) , round(max(train_accuracy), 2), round(sum(train_accuracy) / len(train_accuracy), 2),
                         round(min(valid_accuracy), 2), round(max(valid_accuracy), 2), round(sum(valid_accuracy) / len(valid_accuracy), 2),
                         time.strftime('%H:%M:%S', time.gmtime(elapsed_time)), device_name])
        
        

In [9]:
def create_plot(epochs, train_accuracy, valid_accuracy, train_loss, valid_loss):      
    # Plotting the loss and accuracy
    plt.figure(figsize=(10, 5))

    # Training and validation loss
    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs+1), train_loss, label='Training')
    plt.plot(range(1, epochs+1), valid_loss, label='Validation')
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Training and validation accuracy
    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs+1), train_accuracy, label='Training')
    plt.plot(range(1, epochs+1), valid_accuracy, label='Validation')
    plt.title('Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()

In [10]:
def output_details_to_text (epochs, optimizer, scheduler ,criterion, batch_size, learning_rate, id):    
    # Create a file to write the output
    filename = f'{outputs_path}/trial_details/trial_{id+24}'
    output_file = open(filename, "w")

    output_file.write(f"Parameter Combination: \n")
    output_file.write(f"epochs: {epochs} \n")
    output_file.write(f"initial learning_rate: {learning_rate} \n")
    output_file.write(f"batch_size: {batch_size} \n")
    output_file.write(f"optimizer: {optimizer} \n")
    output_file.write(f"scheduler: {scheduler} \n")
    output_file.write(f"criterion: {criterion} \n")
    output_file.write(f"\n")
    
    output_file.write(f"Finished Training with this combination\n")
    
    output_file.write("#"*70)
    output_file.close()

In [11]:
def train_and_validate(epochs, optimizer, scheduler ,criterion, model, trainloader, validloader, batch_size, learning_rate, activation_func, iteratior_id, device):
    train_loss = []
    train_accuracy = []
    valid_loss = []
    valid_accuracy = []
    opt_name = optimizer.__name__
    optimizer = optimizer(model.parameters(), lr=learning_rate)
    
    if scheduler == optim.lr_scheduler.ReduceLROnPlateau:
        scheduler = scheduler(optimizer)
        # print("plateu")
        # print(type(scheduler))
        
    elif scheduler == optim.lr_scheduler.ExponentialLR: 
        scheduler = scheduler(optimizer, gamma=0.9)
        # print(type(scheduler))
        
    st = time.time()

# Training - Validation loop
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        model.train()
        # Perform training
        for i, data in enumerate(trainloader, 0):
            # labels = data['emotions']
            # inputs = data['image']
            labels = data['emotions'].to(device)
            inputs = data['image'].to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # print("label before: ", labels)
            # print("predicted before: ", outputs)
            # Calculate and store training accuracy
            _, predicted = torch.max(outputs, 1)
            _, labels = torch.max(labels, 1)
            
            # print("label: ", labels)
            # print("predicted: ", predicted)
            # print("pred size: " , predicted.shape)
            total += labels.size(0)
            correct +=  (predicted == labels).sum().item()
            # print("1 more correct..")
        
        if type(scheduler) == optim.lr_scheduler.ReduceLROnPlateau:
            scheduler.step(running_loss / len(validloader))
            
        else:        
            scheduler.step()
                
        train_loss.append(running_loss / len(trainloader))
        train_accuracy.append(100 * correct / total)
        
        # Perform validation
        model.eval()
        correct = 0
        total = 0
        running_loss = 0.0
        
        with torch.inference_mode():
            for data in validloader:
                # labels = data['emotions']
                # images = data['image']
                labels = data['emotions'].to(device)
                images = data['image'].to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
                
                _, predicted = torch.max(outputs, 1)
                _, labels = torch.max(labels, 1)
                
                total += labels.size(0)
                # print("total: " , total)
                correct +=  (predicted == labels).sum().item() #can be torch.eq(pred, labels).sum().item()
        
        valid_loss.append(running_loss / len(validloader))
        valid_accuracy.append(100 * correct / total)
        
        # Print the training and validation loss and accuracy
        # print(f'Epoch {epoch+1}/{epochs}:')
        # print(f'Training Loss: {train_loss[-1]:.4f} | Training Accuracy: {train_accuracy[-1]:.2f}%')
        # print(f'Validation Loss: {valid_loss[-1]:.4f} | Validation Accuracy: {valid_accuracy[-1]:.2f}%')
        # print('-----------------------------------')

    elapsed_time = time.time() - st
    # print('Execution time:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
    # print('Finished Training')
    # with open(f'{outputs_path}/statistics.csv', 'a', newline='') as csvfile:
    #     writer = csv.writer(csvfile)

    #     if csvfile.tell() == 0:
    #         # Write the column headers
    #         writer.writerow(['trial', 'Batch size', 'Epochs', 'Activation function', 'Loss function', 'Initial Learning rate', 'Optimizer', 'Scheduler',
    #                         'Min training accuracy', 'Max training accuracy', 'Average training accuracy',
    #                         'Min validation accuracy', 'Max validation accuracy', 'Average validation accuracy',
    #                         'Total time', 'device'])

    if device == torch.device("cpu"):
            device_name = 'CPU'
    elif device == torch.device("mps"):
            device_name = 'MPS'
            
    write_to_table(epochs, opt_name, criterion, batch_size, learning_rate, activation_func.__name__, elapsed_time, train_accuracy, valid_accuracy, iteratior_id, scheduler.__class__.__name__, device_name)
    
    create_plot(epochs, train_accuracy, valid_accuracy, train_loss, valid_loss)
    
    output_details_to_text(epochs, optimizer, scheduler.__class__.__name__, criterion, batch_size, learning_rate, iteratior_id)

In [12]:
# Define parameter grids
criterions = [nn.CrossEntropyLoss()]
optimizers = [optim.SGD, optim.Adam]
activations = [F.relu]
learning_rates = [0.01]
epochs = [50]
batch_size = [ 32]
schedulers = [optim.lr_scheduler.ExponentialLR, optim.lr_scheduler.ReduceLROnPlateau]
device = [torch.device("mps")]

In [13]:
# Create all possible parameter combinations
parameter_grid = itertools.product(learning_rates, batch_size, epochs, schedulers, optimizers, activations, criterions, device)

In [14]:
for i, params in enumerate(parameter_grid):
    learning_rate, batch_size, epochs, scheduler, optimizer, activation, criterion, device = params 
    # print all params
    # print(params)
    
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    # testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    
    # Build the CNN model with the given parameters
    class Net(nn.Module):
        def __init__(self, drop=0.2):
            super().__init__()
            self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5) 
        # output size = 6 *44*44 values 
        # image size : n*n 
        # filter size: f*f (f is odd number)
        # shrinked_image size : (n - f + 1)^2 

            self.bn1 = nn.BatchNorm2d(6)  # Batch normalization after conv1
            
            self.pool = nn.MaxPool2d(2, 2)
        # default stride is 2 because it was not specified so defaults to kernel size which is 2
        # output size = ((n-f+1)/2)^2 = 22*22 *6  
            
            self.conv2 = nn.Conv2d(6, 16, 5)
        #output size = 18 * 18 * 16 = 5184   
            
            self.bn2 = nn.BatchNorm2d(16)  # Batch normalization after conv2
            
            self.fc1 = nn.Linear(16 * 9 * 9, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)
            
            self.dropout = nn.Dropout(p=drop)
            
        def forward(self, x):
            x = self.pool(activation(self.bn1(self.conv1(x)))) 
            # 44*44*6 , 22*22*6 
            
            x = self.pool(activation(self.bn2(self.conv2(x))))
            # 18*18*16 , 9*9*16 
            
            x = torch.flatten(x, 1) # flatten all dimensions except batch
            x = activation(self.dropout(self.fc1(x)))
            # x = self.dropout(x)
            x = activation(self.dropout(self.fc2(x)))
            # x = self.dropout(x)
            x = self.fc3(x)
            return x

    torch.manual_seed(42)
    torch.mps.manual_seed(42) #do I need it ? wasn't in the current stats
    model = Net()
    model.to(device)
    train_and_validate(epochs, optimizer, scheduler , criterion, model, trainloader, validloader, batch_size, learning_rate, activation, i, device)
    plt.savefig(f"{outputs_path}/plots/trial{i+24} act_func={activation.__name__}, loss_func={criterion.__class__.__name__}, opt={optimizer.__name__}, LRS={scheduler.__name__} .png")
    plt.close()
    plt.close('all')
    # make sure there is no memory leaks .. not working as expected 
    torch.mps.empty_cache()
    # clear ram
    del model
    # free memory
    import gc
    gc.collect()
    # free dataloader memory 
    del trainloader
    del validloader
    


In [None]:
# import requests
# from pathlib import Path 

# # Download helper functions from Learn PyTorch repo (if not already downloaded)
# if Path("helper_functions.py").is_file():
#   print("helper_functions.py already exists, skipping download")
# else:
#   print("Downloading helper_functions.py")
#   request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
#   with open("helper_functions.py", "wb") as f:
#     f.write(request.content)

# from helper_functions import plot_predictions, plot_decision_boundary