In [1]:
import sys
print(sys.version)

3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)]


# Model manager, contains code that is used multiple times in the project 

### Imports

In [7]:
import time
from IPython.display import display, Javascript

import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets
import torchvision.transforms as transforms
import nbimporter

In [8]:
import json
import numpy as np

In [9]:
import os
import stat

### Local imports

In [10]:
from MyModels import create_empty_model
from MyModels import create_loss_function
from MyModels import create_optimizer
import HelperFunctions as hf

In [11]:
class model_manager():
    """
    Model managers gathers everything with testing and training in one class!
    To create a new model manager you need: model_class, loss_function_name ,optimizer_name,optimizer_params. 
    Has default values for: model_parameters = {},  hidden_layers = 0, layer_sizes = [], singel_outputs = False, 
    stats = []
    """
    
    
    def __init__(self, model_class, loss_function_name ,optimizer_name,optimizer_params , model_parameters = None,  
                 hidden_layers = 0, layer_sizes = None, singel_outputs = False, stats = None):
        """ Needs: model_class, loss_function_name ,optimizer_name,optimizer_params. 
    Has default values for: model_parameters = {},  hidden_layers = 0, layer_sizes = [], singel_outputs = False,  stats = [] """
        # avoid issues with parameters being created at initialization by exlicitly making the None then create emrty mutables
        if model_parameters == None: model_parameters = {}
        if stats == None:            stats = []
        if layer_sizes == None:      layer_sizes = []
        
        
        # Set parameters
        self.device = get_device()
        self.model_class = model_class
        self.model_parameters = model_parameters
        self.loss_function_name = loss_function_name  
        self.optimizer_name = optimizer_name
        self.optimizer_params = optimizer_params
        self.hidden_layers = hidden_layers
        self.layer_sizes = layer_sizes
        self.singel_outputs = singel_outputs
        self.num_epochs = len(stats)
        self.stats = stats
        
        print("parameters stored")
         
        self.model = create_empty_model(self.model_class,  parameters = model_parameters , device = self.device)
        self.loss_function = create_loss_function(self.loss_function_name)
        self.optimizer = create_optimizer(self.model ,self.optimizer_name, self.optimizer_params)
        print(f"model type initialized: {self.model_class}  ") 
        print(f"Optimizer: {self.optimizer_name}") 
        print(f"loss function: {self.loss_function_name}") 
        print(self.model)


      

    def initiate_training(self, epochs,  train_dataloader, test_dataloader):
        """This is the main training function. Takes a number of epochs, training dataloader, and test dataloader and initiates the actual trainig.
    When training data is also collected about how the model performs. """
        
        for t in range(epochs): # train for epochs number of iterations
           
            epoch_stats = {"epoch": self.num_epochs +1}
            print(f"Epoch count {self.num_epochs +1}\n----------------------------------")
            start_time = time.time()  # Start timer
            self.train(train_dataloader)
            end_time = time.time()  # End timer
            result_time = end_time - start_time
            print(f"Execution time of epoch: {result_time:.2f} seconds")
            epoch_stats["time"] = result_time

            match self.model_class:
                case "SimpleAutoencoder":
                    epoch_stats["loss"], epoch_stats["test_originals"] , epoch_stats["test_created"] = self.test_autoencoder(test_dataloader)#output 
                case _:
                    epoch_stats["accuracy"] , epoch_stats["loss"] , epoch_stats["list_of_fails"]=  self.test(test_dataloader)
            

            self.num_epochs += 1 # With a succesful training add one more epoch to the total
            self.stats.append(epoch_stats) # Store metadata of the training in the stats member of the class
        print("Done!")
        self.task_complete_alert()
        
        return self.stats #If the person doing the training wants to access the stats they can easily use this return

    def initiate_training_shape(self, epochs,  train_dataloader, test_dataloader):
        """This is the main training function. Takes a number of epochs, training dataloader, and test dataloader and initiates the actual trainig.
    When training data is also collected about how the model performs. """
        
        for t in range(epochs): # train for epochs number of iterations
           
            epoch_stats = {"epoch": self.num_epochs +1}
            print(f"Epoch count {self.num_epochs +1}\n----------------------------------")
            start_time = time.time()  # Start timer
            self.train(train_dataloader)
            end_time = time.time()  # End timer
            result_time = end_time - start_time
            print(f"Execution time of epoch: {result_time:.2f} seconds")
            epoch_stats["time"] = result_time

            match self.model_class:
                case "SimpleAutoencoder":
                    epoch_stats["loss"], epoch_stats["test_originals"] , epoch_stats["test_created"] = self.test_autoencoder(test_dataloader)#output 
                case _:
                    epoch_stats["accuracy"] , epoch_stats["loss"] , epoch_stats["list_of_fails"]=  self.test(test_dataloader)
            

            self.num_epochs += 1 # With a succesful training add one more epoch to the total
            self.stats.append(epoch_stats) # Store metadata of the training in the stats member of the class
        print("Done!")
        self.task_complete_alert()
        
        return self.stats #If the person doing the training wants to access the stats they can easily use this return
    
    
    def train(self, train_dataloader):
        
        size = len(train_dataloader.dataset)
        self.model.train() # set model to trainig mode
        
        for batch, (X, y) in enumerate(train_dataloader):
            X, y = X.to(self.device), y.to(self.device) # set the data to the appropriate device

            # Compute prediction error
            pred = self.model(X)
            
            match self.model_class:
                case "SimpleAutoencoder":
                    loss = self.loss_function(pred, X)
                case _:
                    loss = self.loss_function(pred, y)

            # Backpropagation
            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()
        
            if batch % 100 == 0:
                loss_val, current = loss.item(), (batch + 1) * len(X)
                print(f"loss: {loss_val:>7f}  [{current:>5d}/{size:>5d}]")


    def test(self, test_dataloader):
        list_of_fails = []
        size = len(test_dataloader.dataset)
        num_batches = len(test_dataloader)
        self.model.eval() # set model to evaluation mode
        test_loss, correct = 0, 0
        with torch.no_grad():
            for batch, (X, y) in enumerate(test_dataloader):
                X, y = X.to(self.device), y.to(self.device)
                pred = self.model(X)
                test_loss += self.loss_function(pred, y).item()

                # Get predicted labels to create a mask of successes
                predicted_labels = pred.argmax(1)
                correct_mask = predicted_labels == y
                correct += correct_mask.type(torch.float).sum().item()

                #Produce a list of all fails
                indices_of_failed_pred = (~correct_mask).nonzero(as_tuple=True)[0]
                list_of_fails.extend([
                {
                    "index": batch * len(y) + j.item(),  # Unique index based on batch
                    "predic": predicted_labels[j].item(),
                    "actual": y[j].item(),
                }
                for j in indices_of_failed_pred
                ])
                
        test_loss /= num_batches
        correct /= size
        print(f"Test Error of epoch {self.num_epochs +1}: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
        return [correct, test_loss, list_of_fails]

    def test_autoencoder(self, test_dataloader):
        """Special test case for autoencoders. We have to compare the images to images, and can visually inspect how the model progresses."""
        size = len(test_dataloader.dataset)
        num_batches = len(test_dataloader)
        self.model.eval() # set model to evaluation mode
        test_loss = 0
        original_images, genereated_images = [] , []
        with torch.no_grad():
            for batch, (X, y) in enumerate(test_dataloader):
                X, y = X.to(self.device), y.to(self.device)
                pred = self.model(X)
                test_loss += self.loss_function(pred, X).item()
            X, y = next(iter(test_dataloader))  # Get the first batch
            X_samples = X[:10]
            for X in X_samples: #Display comparision of original and prediciton of the the first ten images
                pred = self.model(X)
                original_images.append(X) 
                genereated_images.append(pred) 
        
        test_dataloader = iter(test_dataloader) #Reset the dataloader, we want the same comarisons each time
        test_loss /= num_batches
        
        
        
        print(f"Test Error of epoch {self.num_epochs +1}: \n Avg loss: {test_loss:>8f} \n")
        display_comparissions_autoencoder(original_images, genereated_images)
        return [test_loss, original_images, genereated_images]
        
    
    def print_arcitecture(self):
        print (self.model)

    def task_complete_alert(self):
        """Browser notification 'Task completed!'"""
        
        display(Javascript('alert("Task completed!")'))
        return None

    def get_max_accuarcy(self):
        return max(self.stats, key=lambda x: x["accuracy"])


    def print_time_spent(self):
        """gives a description of time spent to console and returns the total time spent training"""
        time = sum(t["time"] for t in self.stats)
        epochs = len(self.stats)
        if (epochs == 0): 
            print("Model has not trained yet!") 
        else:
            minutes, seconds = divmod(time, 60)
            print(f"In {epochs} epochs you have trained for a total of {round(minutes)} minutes and {round(seconds)} seconds!\nAn average of {(time/epochs):>0.2f} seconds per epoch!")
        return time


    def return_time_spent(self):
        return sum(t["time"] for t in self.stats)