In [None]:
from __future__ import print_function
from torch import nn, optim
import numpy as np
from random import randint
import copy                                                # to keep track of the evolution of the weights of the network
import os   
# from sklearn import manifold                               # for the t-SNE
# import matplotlib                                          # for the t-SNE
# from itertools import cycle, islice                        # for the t-SNE
from matplotlib import pyplot as plt 
from collections import OrderedDict
import pandas as pd
import datetime
import time
import torch                       
import torch.utils.data
from torch.nn import functional as F
# from torchvision import transforms
from datasets import YeastDataset # dataloader loading the yeast dataset in the correct format, replace it with one suitable for your problem
from data_elaboration_utilities import *
print("Libraries loaded")

In [None]:
# recover the parameters and define the datasets
records_path = ""

records = pd.read_csv(records_path, encoding="utf-8")   
best = sorted(records["validation_loss"])[:10]               
parameters = records.loc[records["validation_loss"] == best[i]].iloc[0]

no_cuda = False               # SHOULD BE FALSE                    
seed = 1                   
log_interval = 10       
percent_train = 0.7  
percent_validation = 0.2

dataset_paths = params["dataset_path"]
parameters["dataset_path"] = dataset_path

training_path = paths[-1]                                                      
test_path = paths[-1] 
parameters["training_path"] = training_path
parameters["test_path"] = test_path
                                               
input_size = 19

reconstruction_weight = [input_size]          # default

num_neurons = params["num_neurons"]

z_size = params["z_size"]

cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)

device = torch.device("cuda" if cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

partition, labels = splitTrainingValidationTestSet(dataset_path, percent_train, percent_validation)
    
training_set = YeastDataset(partition['training'], labels)
validation_set = YeastDataset(partition['validation'], labels)
test_set = YeastDataset(partition['test'], labels)
        
torch.set_printoptions(precision=9)                                 # to print more digits for the loss

print("Device: ", device)                           # simple check

In [None]:
print(dataset_path)

In [6]:
class trialRegressor(nn.Module):
    def __init__(self, num_neurons, num_neurons2, dropout):  
        super(trialRegressor, self).__init__()
        self.input_size = input_size
        self.num_neurons = num_neurons
        self.num_neurons2 = num_neurons2
        self.dropout = dropout
            
        self.fc1 = nn.Linear(self.input_size, self.num_neurons)
        self.fc2 = nn.Linear(self.num_neurons, self.num_neurons2)
        self.fc3 = nn.Linear(self.num_neurons2, 1)
        self.dropt = nn.Dropout(self.dropout)

    def forward(self, x):
        h = F.relu(self.fc1(x.float()))
        h1 = F.relu(self.fc2(self.dropt(h)))
        return self.fc3(self.dropt(h1))

In [7]:
def loss_function(real, predict, loss):
    if loss == "L1_loss":
        return F.l1_loss(predict, real)
    elif loss == "MSE_loss":
        return F.mse_loss(predict, real)
    else:
        return F.smooth_l1_loss(predict, real)

In [8]:
def optimizer_function(optimizer, model, lr):
    if optimizer == "Adam":
        return optim.Adam(model.parameters(), lr=lr)
    elif optimizer == "SGD":
        return optim.SGD(model.parameters(), lr=lr, weight_decay=0.1)
    elif optimizer == "RMSprop":
        return optim.RMSprop(model.parameters(), lr=lr)
    else:
        return optim.Adadelta(model.parameters(), lr=lr)    

In [9]:
def train(epoch, lo):
    model.train()
    train_loss = 0
    for _, (data, labels) in enumerate(train_loader):
        # print("Epoch: {}, Batch index: {}".format(epoch, batch_idx))
        data = data.to(device)
        labels = labels.to(device)
        optmz.zero_grad()
        output = model(data)
        loss = loss_function(labels.float(), output, lo)
        loss.backward()
        train_loss += loss.item()
        optmz.step()
    with torch.no_grad():
        loss_evolution.append(loss.cpu().detach().numpy())   

    print('====> Epoch: {} Average loss: {:.4f}'.format(          
        epoch, loss / len(train_loader.dataset)))

    # print('====> Epoch: {} Average loss: {:.4f}'.format(
    #      epoch, train_loss / len(train_loader.dataset)))


def test(epoch, lo):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        batch_stop = (len(test_loader.dataset) // parameters["batch_size"]) \
        if (len(test_loader.dataset) % parameters["batch_size"]) else \
        (len(test_loader.dataset) // parameters["batch_size"] -1)
        for batch_idx, (data, labels) in enumerate(test_loader):
            data = data.to(device)
            labels = labels.to(device)
            output = model(data)
            loss = loss_function(labels.float(), output, lo)
            test_loss += loss.item()
            if epoch == parameters["epochs"] and batch_idx == batch_stop:
                REC_targets.append(labels.cpu().numpy().reshape(1, -1))
                REC_predictions.append(output.cpu().reshape(1, -1))
        test_loss_evolution.append(loss.cpu().numpy()) 
    # valid_loss /= len(validation_loader.dataset)                              
    # print('====> Validation set loss: {:.4f}'.format(valid_loss))

In [None]:
directory_path = "path to the directory where to save the final results for the dataset\TEST" + "\\" + str(datetime.datetime.now().
                                                                                     strftime("%d_%m_%Y-%H.%M"))
model = trialRegressor(parameters["num_neurons"], parameters["num_neurons2"], parameters["dropout"]).to(device)
       
optmz = optimizer_function(parameters["optimizer"], model, parameters["learning_rate"])
step_size = parameters["epochs"]//2
gamma = parameters["gamma"]
scheduler = optim.lr_scheduler.StepLR(optmz, step_size=step_size, gamma=gamma)
    
train_loader = torch.utils.data.DataLoader(training_set, shuffle=True, batch_size=parameters["batch_size"], **kwargs)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=parameters["batch_size"], shuffle=True, **kwargs)
    
REC_targets = list()
REC_predictions = list()
loss_evolution = list()              # superfluous
test_loss_evolution = list()
    
for epoch in range(1, parameters["epochs"] + 1):                                   
    scheduler.step()
    train(epoch, parameters["loss"])
    test(epoch, parameters["loss"])  
        
if not os.path.exists(directory_path):                
    os.makedirs(directory_path)
    
# save the losses for comparison (superfluous given the saves in the csv file)
text_file = open("where tp save all the losses \Losses.txt", "a")
text_file.write("\n" + directory_path[68:] + "  " + str(test_loss_evolution[-10:]))  # change according to your directory_path
text_file.close()
    
torch.save(model.state_dict(), directory_path + "\\weights.pt")   
    
visualizeLossesOverEpochs(test_loss_evolution, 0, 0, "Test loss", "0", "0", 50, 100, 'o-', False)

plt.savefig(fname=directory_path + "\\test_loss.png", bbox_inches="tight")
    
# save full test loss
text_file = open(directory_path +"\\test_loss.txt", "w")
text_file.write(str(test_loss_evolution) + "\n")
text_file.close()
    
# save last batch targets
text_file = open(directory_path +"\\last_batch_targets.txt", "w")
text_file.write(str(REC_targets) + "\n")
text_file.close()
    
# save last batch predictions
text_file = open(directory_path +"\\last_batch_predictions.txt", "w")
text_file.write(str(REC_predictions) + "\n")
text_file.close()
    
parameters["training_loss"] = loss_evolution[-1]              
parameters["test_loss_evolution"] = test_loss_evolution[-1]
parameters["training_path"] = str(training_path)
parameters["test_path"] = str(test_path)
parameters["experiment"] = directory_path[68:]    

text_file = open(directory_path + "\\" + "Parameters.txt", "w")
text_file.write("Parameters used: \n\n")
for _, (key, value) in enumerate(parameters.items()):
    text_file.write(key + " = " + str(value) + "\n")
text_file.write("\n Dataset path: " + dataset_path)
text_file.write("\n Training set path: " + str(training_path))  
text_file.write("\n Test set path: " + str(test_path))
    
text_file.close()