In [1]:
import random

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T

import os
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.preprocessing import MinMaxScaler

from collections import OrderedDict

import os.path

In [2]:
def train_model(input_PATH, output_PATH, hidden_layers_list, act_function, loss_type, n_epoch, batch_size, lr, accurate_loss_baseline, output_scaler, model_name):
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"

    print("Current device is ",device)

    # Fit the MinMaxScalar for output data if it's specified
    if output_scaler == "MinMax":
        output_for_Scalar = np.load(output_PATH)
        scaler = MinMaxScaler()
        scaler.fit(output_for_Scalar)
        print("Fit the output range to [0,1] using MinMaxscaler")
    
    # make results determinstic
    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # Customised Dataset class
    class KMNIST(Dataset):
    
        def __init__(self, input_path, output_path):
            # Load the input and output
            input_data = np.load(input_path)
            output_data = np.load(output_path)
        
            if output_scaler == "MinMax":
                output_data = scaler.transform(np.load(output_path))
        
            self.input = input_data
            self.output = output_data
        
        def __len__(self):
            return len(self.input)

        def __getitem__(self, index):
            input_item = self.input[index]
            output_item = self.output[index]
        
            return input_item, output_item
        
    testingAndValidation_split = 0.2
    validation_split = 0.1

    # Load the input and output as a complete dataset

    geoid_dataset = KMNIST(
        input_PATH,
        output_PATH,
    )


    # Creating data indices for training, testing and validation splits
    # Reference: https://stackoverflow.com/questions/50544730/how-do-i-split-a-custom-dataset-into-training-and-test-datasets
    geoid_dataset_size = len(geoid_dataset)
    geoid_indices = list(range(geoid_dataset_size))

    geoid_training_testing_split = int(np.floor(testingAndValidation_split * geoid_dataset_size))
    geoid_testing_validation_split = int(np.floor(validation_split * geoid_dataset_size))

    np.random.shuffle(geoid_indices)
    geoid_train_indices, geoid_val_indices ,geoid_test_indices = geoid_indices[geoid_training_testing_split:], geoid_indices[:geoid_testing_validation_split], geoid_indices[geoid_testing_validation_split:geoid_training_testing_split] 

    # Creating data samplers
    geoid_train_sampler = SubsetRandomSampler(geoid_train_indices)
    geoid_test_sampler = SubsetRandomSampler(geoid_test_indices)
    geoid_valid_sampler = SubsetRandomSampler(geoid_val_indices)

    geoid_train_loader = DataLoader(
        dataset=geoid_dataset,
        batch_size = batch_size,
        sampler=geoid_train_sampler,
    )

    geoid_test_loader = DataLoader(
        dataset=geoid_dataset,
        batch_size = batch_size,
        sampler=geoid_test_sampler,
    )

    geoid_validation_loader = DataLoader(
        dataset=geoid_dataset,
        batch_size = batch_size,
        sampler=geoid_valid_sampler,
    )
    
    print("Dataset Loaded!\n")
    
    # Training Function
    def train(model, train_loader, val_loader, device, optimizer, n_epoch):
    
        criterion = nn.MSELoss()
        if loss_type == 'L1Loss':
            criterion = nn.L1Loss()
    
        minimum_validation_loss = 10000000
        best_model_index = -1
    
        running_loss_list = []
        validation_loss_list = []

        # n_epoch times of iterations
        for epoch in range(n_epoch):

            running_loss = 0.0

            model.train()
        
            for data in train_loader:
                # get a batch of inputs and labels
                inputs, labels = data[0].to(device), data[1].to(device)
                inputs = inputs.to(torch.float32)

                # zero the parameter gradients
                optimizer.zero_grad(set_to_none=True)

                # Get output features, calculate loss and optimize
                outputs = model(inputs)
                loss = criterion(outputs.float(), labels.float())
            
                loss.backward()
                optimizer.step()

                # Add to the total training loss
                running_loss += loss.item()

            # print some statistics
            if epoch+1 == n_epoch // 4:
                print("25% of the training has finished")
                
            if epoch+1 == n_epoch // 2:
                print("50% of the training has finished")
                
            if epoch+1 == 3 * n_epoch // 4:
                print("75% of the training has finished")
            
            #print("Current training loss is ",running_loss)
            running_loss_list.append(running_loss)
            running_loss = 0.0

            # Valiadation
            model.eval()
            with torch.no_grad():
                correct = 0
                total = 0
                valid_loss = 0.0
                for data in val_loader:
                    inputs, labels = data[0].to(device), data[1].to(device)
                    inputs = inputs.to(torch.float32)
                
                    # Get output features and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                
                    # If the loss value is less than accurate loss baseline, we consider it being accurate
                    for j in range(len(labels)):
                        single_loss = criterion(outputs[j], labels[j])
                        if single_loss.item() <= accurate_loss_baseline:
                            correct += 1
                        total += 1
    
                    # Add to the validation loss
                    valid_loss += loss.item()

                # Calculate valiadation accuracy and print Validation statistics
                #print("Validation loss for this epoch is",valid_loss)
                #print("Validation Accuracy for this epoch is", 100*correct//total)
                validation_loss_list.append(valid_loss)

            # Update the statistics for the best model
            if valid_loss <= minimum_validation_loss:
                minimum_validation_loss = valid_loss

                # Store the best models for each model in the list
        
                PATH = '1D_results/{}.pth'.format(model_name)

                torch.save(model.state_dict(), PATH)
                #print("This model is now saved to Path:",PATH)
            
                best_model_index = epoch
            
            #print()
    
        # Training finished, print the statistics for the best model
        print('Training Finished')
        print("Best model has a validation loss of {} and it's in epoch {}".format(minimum_validation_loss, best_model_index+1))
        print("Best model saved to path "+'1D_results/{}.pth'.format(model_name))
    
        # Save the Training loss and validation loss to a file
        
        text_file = open('1D_results/{}_trainingData.txt'.format(model_name), "w")
        n1 = text_file.write("/".join([str(elem) for elem in running_loss_list])+"\n")
        n2 = text_file.write("/".join([str(elem) for elem in validation_loss_list]))
        text_file.close()
        print("Training Data saved! The path is "+'1D_results/{}_trainingData.txt'.format(model_name))
        
        
    # Initialize Model and Optimizer
    neurons_list = [len(geoid_dataset.input[0])] + hidden_layers_list + [len(geoid_dataset.output[0])]
    layers_od = OrderedDict()
    for i in range(len(neurons_list)-1):
        fc_name = 'fc' + str(i+1)
        layers_od[fc_name] = nn.Linear(neurons_list[i], neurons_list[i+1])
        
        if act_function == "ReLU" and i != len(neurons_list)-2:
            act_name = 'relu' + str(i+1)
            layers_od[act_name] = nn.ReLU()
            
    model = nn.Sequential(layers_od).to(device)
    print(model)
    print()
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    # Training
    print("Training Start!")
    train(model, geoid_train_loader, geoid_validation_loader, device, optimizer, n_epoch)

In [3]:
# Using readlines()
file1 = open('ModelList.txt', 'r')
Lines = file1.readlines()
 
count = 0
# Strips the newline character
for line in Lines:
    if line[0] != '#':
        
        ip, op, hidden_layers_string, act_function, loss_type, n_epoch, batch_size, lr, accurate_loss_baseline,output_scaler,model_name = line.strip().split(',')
        hidden_layers_list = [int(x) for x in hidden_layers_string.split('/')]
        
        count += 1
        print("Model {}: ".format(count))
        print("Input Path: ", ip)
        print("Output Path: ", op)
        print("Hidden Layers Nuerons: ", hidden_layers_string)
        print("Activation Function: ", act_function)
        print("Loss Function: ", loss_type)
        print("Total epochs: ", n_epoch)
        print("Batch size: ", batch_size)
        print("Optimizer Learning rate: ", lr)
        print("Accurate Loss Baseline: ", accurate_loss_baseline)
        print("Scaler for the output data: ", output_scaler)
        print("Name for this model: ", model_name)
        print()
        
        model_path = '1D_results/{}.pth'.format(line.strip().replace('/',':'))
        
        # Only train a model when it is not already trained
        if os.path.isfile(model_path):
            print("Model already trained for this commit!")
        else:
            train_model(input_PATH=ip,
                        output_PATH=op,
                        hidden_layers_list=hidden_layers_list,
                        act_function=act_function,
                        loss_type=loss_type,
                        n_epoch=int(n_epoch),
                        batch_size=int(batch_size),
                        lr=float(lr),
                        accurate_loss_baseline = float(accurate_loss_baseline),
                        output_scaler=output_scaler,
                        model_name=line.strip().replace('/',':'))
        
        print()

Model 1: 
Input Path:  Data/Reduced/small_8_1k-inv.npy
Output Path:  Data/Reduced/small_8_1k-geoid.npy
Hidden Layers Nuerons:  20/30
Activation Function:  ReLU
Loss Function:  MSELoss
Total epochs:  200
Batch size:  16
Optimizer Learning rate:  0.0001
Accurate Loss Baseline:  0.01
Scaler for the output data:  MinMax
Name for this model:  invGeoid

Model already trained for this commit!

Model 2: 
Input Path:  Data/Geoid/new_results_1k_zero/zero_1k-inv.npy
Output Path:  Data/Geoid/new_results_1k_zero/zero_1k-geoid.npy
Hidden Layers Nuerons:  200/160/120/80
Activation Function:  ReLU
Loss Function:  MSELoss
Total epochs:  200
Batch size:  16
Optimizer Learning rate:  0.0001
Accurate Loss Baseline:  0.01
Scaler for the output data:  MinMax
Name for this model:  1k_zero

Model already trained for this commit!

