# Jupyter magic commands

Use %lsmagic to see all magic commands.

Use %command to use it.

Use %%command to use it accross the cell

Use ?command to find out what that command does.

# Import libraries

In [None]:
import numpy as np
import matplotlib as plt
%matplotlib inline
import sklearn as sk
import torch
from torch import nn
import pandas as pd

# Preparing the dataset

## Replace missing data by the mean of the column 

In [None]:
# Method for pandas
empty = data.apply(lambda col: pd.isnull(col)).sum() # just detects which ones are empty. Not necessary
data['Column'].fillna(data['Column'].mode()[0], inplace=True)

In [None]:
# Method for Numpy arrays
# --> I have still to come across it / develop it

## Divide into training and test partitions

In [None]:
from sklearn.model_selection import train_test_split
# X is all data; rows = n_samples and columns = features. Y is the labels associated to them (shape = (n_samples,))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=0)
# We have to divide both observations and data

## Data normalization

In [None]:
from sklearn.preprocessing import StandardScaler

transformer = StandardScaler() # Define the object
transformer.fit(X_train)  # fit does nothing, just learns mean and std from training data
X_train_norm = transformer.transform(X_train)
X_test_norm =  transformer.transform(X_test) 

**IMPORTANT**: both test and training data are normalized with the same mean and std, that of the training data, so they are normalized in the same way.

## Model evaluation
- **Training MSE**: $$MSE_{train} = \frac{1}{N_{train}} \sum_{i=1}^{N_{train}} \left(y^{(i)}-f({\bf x_{train}}^{(i)})\right)^2$$

- **Test MSE**: \begin{align}
MSE_{test} =  \frac{1}{N_{test}}\sum_{i=1}^{N_{test}} \left(y^{(i)}-f({\bf x_{test}}^{(i)})\right)^2
\end{align}

Note that we are interested in evaluating how well our data **generalizes to data we have never seen**. Therefore, **the test database should NEVER be used** at any stage of the training, nor during the selection of the hyperparameters.

## Data cross validation
In data cross validation, the training data is split in training and validation data iteratively, each time changing which subset of data is used for validation. Then, the results are averaged.

Also, this is done over different (hyper)parameters values in order to find their optimal values.

In [None]:
from sklearn.model_selection import GridSearchCV
parameters_dictionary = {'n_neighbors' : np.arange(1,40)}
model = KNNreg() # model in which we want to optimize the hyperparameters
# cv = 10 means that a 10 fold cross validation is performed
# That is, training data is divided in 10 subsets and each is used as validation once, over 10 different trials
cross_val = GridSearchCV(model,parameters,iid=False,cv=10,scoring= 'neg_mean_squared_error')
# Before, we just defined it. With .fit, it iterates over the data
cross_val.fit(X_train,Y_train) # this executes the cross-validation
# The results are the following:
optimal_estimator = cross_val.best_estimator_ # model already trained and optimal parameters
dict_parameters = cross_val.best_params_ # dictionary containing the optimal values of the parameters

## Create a KNN regressor

In [None]:
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors = 5)
neigh.fit(X_train,Y_train)
y_test_predicted = neigh.predict(X_test)

# Training a Neural Network


### One-layer NN (logistic regressor)

In [None]:
# First, it is needed to define our NN as a class, inheriting from nn.Module
from torch import nn
from torch import optim

class LR(nn.Module):
    def __init__(self,dimx):
        super().__init__() # needed to inherit        
        # Define the nn.Parameters, i.e. the values to be optimized
        self.weights = nn.Parameter(torch.randn(dimx,1),requires_grad = True)        
        self.bias = nn.Parameter(torch.randn(1,1),requires_grad = True)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):  # define the forward function. In this example, a simple sigmoid
        # Pass the input tensor through each of our operations
        p = self.sigmoid(torch.matmul(x,self.weights)+self.bias)
        return p
    
my_classifier = LR(x.shape[1]) # Instantiate the NN. 
# Remember thn the __init__ method requires dimx, the dimension (number of features) of the data
criterion = nn.BCELoss() # define a binary cross entropy as loss function
output = my_classifier.forward(torch.tensor(x))
loss = criterion(output,torch.tensor(y))  # it is a scalar value
# But contains the information to compute the gradients of the operations and parameters that led to such value
loss.backward() # Through the backward operator, the gradient for each parameter is computed and stored in x.grad
# If we were to perform .backward again, gradients get added (not overwritten)
# So it is necessary to set them to zero before using it again:
my_classifier.zero_grad()

# Now, it would be needed to iterate and optimize with respect to the gradient
# That has to be done with the specific optimizer library we are using, with a .step() function or sth similar
# Of course, it has to be iterated in a for loop

### Multilayer dense NN (including training and evaluation methods)

In [None]:
class myNNvalidation(nn.Module):
  def __init__(self,dim_input,nlabels,epochs=10,learning_rate=0.001):
    super().__init__()

    self.dimx = dim_input
    self.nlabels = nlabels
    self.epochs = epochs
    self.lr = learning_rate

    self.layer1 = nn.Linear(dim_input,256)
    self.layer2 = nn.Linear(256,128)
    self.layer3 = nn.Linear(128,64)
    self.layer4 = nn.Linear(64,nlabels)

    self.optim = optim.Adam(self.parameters(), self.lr)
    self.criterion = nn.NLLLoss()  
    self.relu = nn.ReLU()
    self.logsoftmax = nn.LogSoftmax(dim=1)  

    self.train_loss_during_training = [] 
    self.valid_loss_during_training = []
    self.valid_acc_during_training = []
    self.train_acc_during_training = []

    self.epoch_time = []

  def forward(self, x):

    x = self.layer1(x)
    x = self.relu(x)

    x = self.layer2(x)
    x = self.relu(x)

    x = self.layer3(x)
    x = self.relu(x)

    x = self.layer4(x)
    x = self.logsoftmax(x)  
    return x

  def trainloop(self, trainloader, validloader):
    for e in range(int(self.epochs)):
      t_start = time.time()
      epoch_error = 0.
      acc = 0.
      for images, labels in trainloader:
        self.optim.zero_grad()
        out = self.forward(images.view(images.shape[0], -1))
        loss = self.criterion(out,labels)
        loss.backward() 
        self.optim.step()
        epoch_error += loss.item()

        top_pos, top_class = out.topk(1,dim=1) # find the position (and label) for the most probable label for each datapoint
        equals = (top_class == labels.view(images.shape[0],1))
        acc += torch.sum(equals.type(torch.FloatTensor))
      self.train_loss_during_training.append(epoch_error / len(trainloader))
      self.train_acc_during_training.append(acc / len(trainloader.dataset.data))

      # Testing accuracy and loss over validation dataset
      with torch.no_grad():            
        running_loss = 0.
        acc = 0.
        for images,labels in validloader:           
          out = self.forward(images.view(images.shape[0], -1))
          loss = self.criterion(out,labels)
          running_loss += loss.item()
          top_pos, top_class = out.topk(1,dim=1) # find the position (and label) for the most probable label for each datapoint
          equals = (top_class == labels.view(images.shape[0],1))
          acc += torch.sum(equals.type(torch.FloatTensor))
        self.valid_loss_during_training.append(running_loss / len(validloader))
        self.valid_acc_during_training.append(acc / len(validloader.dataset.data))
        t_end = time.time()
        self.epoch_time.append(t_end-t_start)

      print("Epoch %d: Training loss is %f and validation loss is %f. Elapsed time: %f" %(e,self.train_loss_during_training[-1],self.valid_loss_during_training[-1],self.epoch_time[-1]))
      
  def evaluation(self,dataloader):
    acc = 0
    with torch.no_grad():
      for images,labels in dataloader:
        output = self.forward(images.view(images.shape[0],-1))  # output is a series of prob (one prob per possible label, per object --> from logsoftmax)
        top_pos, top_class = output.topk(1,dim=1) # find the position (and label) for the most probable label for each datapoint
        equals = (top_class == labels.view(images.shape[0],1))
        acc += torch.sum(equals.type(torch.FloatTensor))
      return acc / len(dataloader.dataset.data)

### Multilayer dense NN including dropout and saving method

In [None]:
class myNN_dropout(myNNvalidation_save):
  def __init__(self,dim_input,nlabels, dropout_prob=0.5, epochs=10,learning_rate=0.0005, savepath='/content/drive/My Drive/checkpoints'):
    super().__init__(dim_input,nlabels,epochs,learning_rate, savepath)
    self.dropout_prob = dropout_prob
    self.dropout = nn.Dropout(p=self.dropout_prob) # Dropout module

  # Redefine the forward method to include dropout
  def forward(self, x):

    x = self.layer1(x)
    x = self.relu(x)
    x = self.dropout(x)

    x = self.layer2(x)
    x = self.relu(x)
    x = self.dropout(x)

    x = self.layer3(x)
    x = self.relu(x)
    x = self.dropout(x)

    x = self.layer4(x)
    x = self.logsoftmax(x)  
    return x   

  # Redefine the training loop to set evaluation mode on and off
  def trainloop(self, trainloader, validloader):
    for e in range(int(self.epochs)):
      self.train()  # set training mode       ################################
      t_start = time.time()
      epoch_error = 0.
      acc = 0.
      for images, labels in trainloader:
        self.optim.zero_grad()
        out = self.forward(images.view(images.shape[0], -1))
        loss = self.criterion(out,labels)
        loss.backward() 
        self.optim.step()
        epoch_error += loss.item()

        top_pos, top_class = out.topk(1,dim=1) # find the position (and label) for the most probable label for each datapoint
        equals = (top_class == labels.view(images.shape[0],1))
        acc += torch.sum(equals.type(torch.FloatTensor))
      self.train_loss_during_training.append(epoch_error / len(trainloader))
      self.train_acc_during_training.append(acc / len(trainloader.dataset.data))

      # Testing accuracy and loss over validation dataset
      with torch.no_grad(): 
        self.eval()  # set in evaluation mode    ################################     
        running_loss = 0.
        acc = 0.
        for images,labels in validloader:           
          out = self.forward(images.view(images.shape[0], -1))
          loss = self.criterion(out,labels)
          running_loss += loss.item()
          top_pos, top_class = out.topk(1,dim=1) # find the position (and label) for the most probable label for each datapoint
          equals = (top_class == labels.view(images.shape[0],1))
          acc += torch.sum(equals.type(torch.FloatTensor))
        self.valid_loss_during_training.append(running_loss / len(validloader))
        self.valid_acc_during_training.append(acc / len(validloader.dataset.data))
        self.train()  # set back in training mode  ###############################

      # Let's include automatic saving in all iterations
      self.save_during_training(path = self.savepath, epoch_number = e, valid_loss = self.valid_loss_during_training[-1])
      t_end = time.time()
      self.epoch_time.append(t_end-t_start)
      print("Epoch %d: Training loss is %f and validation loss is %f. Elapsed time: %f" %(e,self.train_loss_during_training[-1],self.valid_loss_during_training[-1],self.epoch_time[-1]))

  # Redefine the evaluation method to set evaluation mode
  def evaluation(self,dataloader):
    acc = 0.
    self.eval()   ################################################################
    with torch.no_grad():
      for images,labels in dataloader:
        output = self.forward(images.view(images.shape[0],-1))  # output is a series of prob (one prob per possible label, per object --> from logsoftmax)
        top_pos, top_class = output.topk(1,dim=1) # find the position (and label) for the most probable label for each datapoint
        equals = (top_class == labels.view(images.shape[0],1))
        acc += torch.sum(equals.type(torch.FloatTensor))
    self.train()
    return acc / len(dataloader.dataset.data)

  def save_during_training(self, path, epoch_number, valid_loss = 0.000):
    import datetime
    currentDT = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    nn_name = 'myNNdropout'
    savename = '{0}/{1}_checkpoint_{2}_epoch{3:d}_validloss{4:.4f}.pth'.format(path,nn_name,currentDT,epoch_number,valid_loss)
    torch.save(self.state_dict(), savename)

### Include GPU acceleration

In [None]:
class Lenet5_extended_GPU(Lenet5_extended):
    def __init__(self,####):
        super().__init__(####)  

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # Define the device, in the GPU

        self.to(self.device)
        # Put all the model in the GPU
            
    def trainloop(self,trainloader,validloader):
        for e in range(int(self.epochs)):
            #####################
            for images, labels in trainloader:
                
              # Move input and label tensors to the default device
              images, labels = images.to(self.device), labels.to(self.device) 
              # Move the data to the GPU (so that data does not have to be transfered all the time) 

              ###########################################
              ###########################################
                        
            # Turn off gradients for validation, saves memory and computations
            with torch.no_grad():

              for images,labels in validloader:
            
                # Move input and label tensors to the default device
                images, labels = images.to(self.device), labels.to(self.device)               
                
                ###########################################
                ###########################################

            print("Epoch %d: Training loss is %f and validation loss is %f. Elapsed time: %f" %(e,self.train_loss_during_training[-1],self.valid_loss_during_training[-1],self.epoch_time[-1]))

    def evaluation(self,dataloader):
      with torch.no_grad():
            
        for images,labels in dataloader:
          # Move input and label tensors to the default device
          images, labels = images.to(self.device), labels.to(self.device) 
            
          ##########################################################
    
        return (eval_loss/len(dataloader) , acc / len(dataloader) )
    

### Auto-save and load methods

In [None]:
def save_during_training(self): # save weights (state_dict)
    import datetime
    currentDT = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    nn_name = 'Lenet5_densedropout'
    savename = '{0}/{1}_checkpoint_{2}_epoch{3:d}_validloss{4:.4f}.pth'.format(self.savepath,nn_name,currentDT,self.current_epoch,self.valid_loss_during_training[-1])
    torch.save(self.state_dict(), savename)
    # save training data (acc and loss over training)
    filename = self.savepath + '/training_information'
    my_vars = [self.current_epoch, self.train_loss_during_training, self.valid_loss_during_training, self.valid_acc_during_training, self.train_acc_during_training, self.epoch_time]
    with open(filename,'wb') as f:
      pickle.dump(my_vars, f)
    
def load(self, path, filename):
    state_dict = torch.load(path+'/'+filename+'.pth')
    self.load_state_dict(state_dict)
    with open(path+'/training_information','rb') as f:
      self.current_epoch, self.train_loss_during_training, self.valid_loss_during_training, self.valid_acc_during_training, self.train_acc_during_training, self.epoch_time = pickle.load(f)
    return self