# 10.5 Training of different models and save weights

This notebook contains the training of different models to generate their final weights. Deeper analysis of each architecture is provided in notebooks 7.x where different parameters are studied in each case. The comparison of the models in terms of accuracy and training time can be performed exercuting the script `training_models.py`. Here we use those models that have been more promising or are interesting for the sake of comparison. 

In [8]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
import pandas as pd
import random
import tqdm
import time

import matplotlib.pyplot as plt

from torch import optim
from models import Net_Final, Net_Final_BIG, ImagesP2, ImagesP4
import training_models 

### General configurations

In [30]:
random.seed(285558)
global MAX_V, mini_batch_size, lr, inp_size
MAX_V = 30
mini_batch_size = 50

tables = list(range(1,98))
random.shuffle(tables)

lr = 0.0001
inp_size = 4 + MAX_V*6

train_tables, test_tables, validation_tables = \
tables[:int(len(tables)*0.6)], tables[int(len(tables)*0.6):int(len(tables)*0.9)], tables[int(len(tables)*0.9):]

In [33]:
print(sorted(test_tables))

[2, 13, 14, 16, 17, 20, 23, 26, 28, 36, 38, 40, 41, 42, 45, 49, 51, 52, 53, 54, 57, 62, 67, 68, 69, 78, 83, 86, 96]


In [3]:
%run training_models


CASE 4. Double Input Double Output MODEL 3. IMAGE SIZE = 30

Epoch 15. Train Loss: 5.264 Accuracy: 0.565 Test Loss: 4.629 Accuracy: 0.243
Average time per epoch 11.628s +- 0.510
Max accuracy of 0.244 achieved at epoch 12
Epoch 15. Train Loss: 5.071 Accuracy: 0.578 Test Loss: 4.709 Accuracy: 0.243
Average time per epoch 11.867s +- 0.153
Max accuracy of 0.244 achieved at epoch 12
Epoch 15. Train Loss: 4.097 Accuracy: 0.567 Test Loss: 4.867 Accuracy: 0.236
Average time per epoch 11.819s +- 0.122
Max accuracy of 0.236 achieved at epoch 14
Epoch 15. Train Loss: 4.686 Accuracy: 0.574 Test Loss: 5.478 Accuracy: 0.249
Average time per epoch 11.860s +- 0.092
Max accuracy of 0.249 achieved at epoch 14
Average accuracy 0.243 +- 0.005. Av loss 4.273
 -------------
Num parameters: 435740	 Num Trainable parameters: 435740

CASE 5. Double Input Double Output MODEL 3. IMAGE SIZE = 50

Epoch 15. Train Loss: 6.285 Accuracy: 0.610 Test Loss: 6.179 Accuracy: 0.252
Average time per epoch 29.298s +- 0.143


### Training functions

In [35]:
def epoch_train_single_inp_single_out(model, train_table, test_tables, optimizer, criterion1, \
                e, im_size, clipped):
    

    sum_loss = 0
    acc = []
    idx_failures = []
    dist = 1/(im_size-1)

    # train data
    for k, TABLE in enumerate(train_tables):
        
        data = np.load('./minmax_data/data_vector_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        data_y = np.load('./minmax_data/data_vector_y_{}.npy'.format(TABLE), allow_pickle=True).tolist()

        idx = list(data.keys())
        nv = len(data[idx[0]]) - 1
        
        

        for b in range(0, len(idx), mini_batch_size):
            # idx of clients to analyse
            t_idx = idx[b:b+mini_batch_size]
            
            x = np.zeros((len(t_idx), nv+1, im_size, im_size))
            loc_y = np.zeros((len(t_idx), 2))
           
            
            for k, cl in enumerate(t_idx):
                loc = int(data_y[cl])
                loc_y[k] = [data[cl][loc+1][2], data[cl][loc+1][3]]
                
                x[k][0][int(data[cl][0][0]//dist)][int(data[cl][0][1]//dist)] = 1
                x[k][0][int(data[cl][0][2]//dist)][int(data[cl][0][3]//dist)] = -1

                if clipped:
                    for i in range(1,31):
                        x[k][1][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
                else:
                    for i in range(1,31):
                        x[k][i][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
            
            
            y = np.hstack([data_y[i] for i in t_idx])
           
            train_x = torch.tensor(x).type(torch.FloatTensor)
            train_y = torch.tensor(y).type(torch.LongTensor)
            
            # set gradient to zero
            optimizer.zero_grad()
            
            # compute output
            
            output2 = model(train_x)
            batch_loss = criterion1(output2, train_y)
            
            batch_loss.backward()
            optimizer.step()

            sum_loss = sum_loss + batch_loss.item()
            _, a = torch.max(output2,1)
            acc.append(float((train_y == a).sum())/len(train_y))
            
    
    
    test_loss = 0
    test_acc = []
    
    #model.eval()
    for k,TABLE in enumerate(test_tables):
        data = np.load('./minmax_data/data_vector_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        data_y = np.load('./minmax_data/data_vector_y_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        
        idx = list(data.keys())
        #random.shuffle(idx)
        
        for b in range(0, len(idx), mini_batch_size):

            t_idx = idx[b:b+mini_batch_size]
            
            x = np.zeros((len(t_idx), nv+1, im_size, im_size))
            loc_y = np.zeros((len(t_idx), 2))
           
            
            for k, cl in enumerate(t_idx):
                loc = int(data_y[cl])
                loc_y[k] = [data[cl][loc+1][2], data[cl][loc+1][3]]
                
                x[k][0][int(data[cl][0][0]//dist)][int(data[cl][0][1]//dist)] = 1
                x[k][0][int(data[cl][0][2]//dist)][int(data[cl][0][3]//dist)] = -1

                if clipped:
                    for i in range(1,31):
                        x[k][1][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
                else:
                    for i in range(1,31):
                        x[k][i][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
            
            y = np.hstack([data_y[i] for i in t_idx])
            
            
            test_x = torch.tensor(x).type(torch.FloatTensor)
            test_y = torch.tensor(y).type(torch.LongTensor)
            
            output2 = model(test_x)
            batch_loss = criterion1(output2, test_y)

            test_loss += batch_loss.item()
            _, a = torch.max(output2,1)
            
            test_acc.append(float((test_y == a).sum())/len(test_y))
            idx_failures += [t_idx[i] for i in np.where(test_y != a)[0]]
            
            
    print('\rEpoch {}. Train Loss: {:.3f} Accuracy: {:.3f} Test Loss: {:.3f} Accuracy: {:.3f}'.format(e+1, sum_loss, np.mean(acc), test_loss,np.mean(test_acc)), end="")
    return sum_loss, np.sum(acc)/len(acc), test_loss, np.sum(test_acc)/len(test_acc), idx_failures
    

In [36]:
def epoch_train_single_input_double_output(model, train_table, test_tables, optimizer, criterion1, criterion2, \
                e, im_size, weighted, clipped):
    

    sum_loss = 0
    acc = []
    idx_failures = []
    dist = 1/(im_size-1)

    # train data
    for k, TABLE in enumerate(train_tables):
        
        data = np.load('./minmax_data/data_vector_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        data_y = np.load('./minmax_data/data_vector_y_{}.npy'.format(TABLE), allow_pickle=True).tolist()

        idx = list(data.keys())
        nv = len(data[idx[0]]) - 1
        
        

        for b in range(0, len(idx), mini_batch_size):
            # idx of clients to analyse
            t_idx = idx[b:b+mini_batch_size]
            
            x = np.zeros((len(t_idx), nv+1, im_size, im_size))
            loc_y = np.zeros((len(t_idx), 2))
            
            for k, cl in enumerate(t_idx):
                loc = int(data_y[cl])
                loc_y[k] = [data[cl][loc+1][2], data[cl][loc+1][3]]
                
                x[k][0][int(data[cl][0][0]//dist)][int(data[cl][0][1]//dist)] = 1
                x[k][0][int(data[cl][0][2]//dist)][int(data[cl][0][3]//dist)] = -1

                if clipped:
                    for i in range(1,31):
                        x[k][1][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
                else:
                    for i in range(1,31):
                        x[k][i][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
            
            
            y = np.hstack([data_y[i] for i in t_idx])
           
            train_x = torch.tensor(x).type(torch.FloatTensor)
            train_y = torch.tensor(y).type(torch.LongTensor)
            train_y_aux = torch.tensor(loc_y).type(torch.FloatTensor)
            
            # set gradient to zero
            optimizer.zero_grad()
            
            # compute output
            output1, output2 = model(train_x)
            batch_loss = 100*weighted*criterion1(output1, train_y_aux) + (1- weighted)*criterion2(output2, train_y)
                
            batch_loss.backward()
            optimizer.step()

            sum_loss = sum_loss + batch_loss.item()
            _, a = torch.max(output2,1)
            acc.append(float((train_y == a).sum())/len(train_y))
            
    
    
    test_loss = 0
    test_acc = []
    
    #model.eval()
    for k,TABLE in enumerate(test_tables):
        data = np.load('./minmax_data/data_vector_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        data_y = np.load('./minmax_data/data_vector_y_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        
        idx = list(data.keys())
        #random.shuffle(idx)
        
        for b in range(0, len(idx), mini_batch_size):

            t_idx = idx[b:b+mini_batch_size]
            
            x = np.zeros((len(t_idx), nv+1, im_size, im_size))
            loc_y = np.zeros((len(t_idx), 2))
            
            for k, cl in enumerate(t_idx):
                loc = int(data_y[cl])
                loc_y[k] = [data[cl][loc+1][2], data[cl][loc+1][3]]
                
                x[k][0][int(data[cl][0][0]//dist)][int(data[cl][0][1]//dist)] = 1
                x[k][0][int(data[cl][0][2]//dist)][int(data[cl][0][3]//dist)] = -1

                if clipped:
                    for i in range(1,31):
                        x[k][1][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
                else:
                    for i in range(1,31):
                        x[k][i][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
            
            y = np.hstack([data_y[i] for i in t_idx])
            
            
            test_x = torch.tensor(x).type(torch.FloatTensor)
            test_y = torch.tensor(y).type(torch.LongTensor)
            test_y_aux = torch.tensor(loc_y).type(torch.FloatTensor)
            
            output1, output2 = model(test_x)
            batch_loss = 100*weighted*criterion1(output1, test_y_aux) + (1- weighted)*criterion2(output2, test_y)

            test_loss += batch_loss.item()
            _, a = torch.max(output2,1)
            
            test_acc.append(float((test_y == a).sum())/len(test_y))
            idx_failures += [t_idx[i] for i in np.where(test_y != a)[0]]
            
            
    print('\rEpoch {}. Train Loss: {:.3f} Accuracy: {:.3f} Test Loss: {:.3f} Accuracy: {:.3f}'.format(e+1, sum_loss, np.mean(acc), test_loss,np.mean(test_acc)), end="")
    return sum_loss, np.sum(acc)/len(acc), test_loss, np.sum(test_acc)/len(test_acc), idx_failures

In [37]:
def epoch_train(model, train_table, test_tables, optimizer, criterion1, criterion2, \
                e, im_size, weighted, clipped):
    

    sum_loss = 0
    acc = []
    idx_failures = []
    dist = 1/(im_size-1)

    # train data
    for k, TABLE in enumerate(train_tables):
        
        data = np.load('./minmax_data/data_vector_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        data_y = np.load('./minmax_data/data_vector_y_{}.npy'.format(TABLE), allow_pickle=True).tolist()

        idx = list(data.keys())
        nv = len(data[idx[0]]) - 1
        
        

        for b in range(0, len(idx), mini_batch_size):
            # idx of clients to analyse
            t_idx = idx[b:b+mini_batch_size]
            if clipped:
                x = np.zeros((len(t_idx), 2, im_size, im_size))
            else:
                x = np.zeros((len(t_idx), nv+1, im_size, im_size))
            loc_y = np.zeros((len(t_idx), 2))
            x_aux = []
            
            for k, cl in enumerate(t_idx):
                loc = int(data_y[cl])
                loc_y[k] = [data[cl][loc+1][2], data[cl][loc+1][3]]
                
                x_aux.append(torch.tensor(np.asarray(data[cl][1:])).type(torch.FloatTensor))
                x[k][0][int(data[cl][0][0]//dist)][int(data[cl][0][1]//dist)] = 1
                x[k][0][int(data[cl][0][2]//dist)][int(data[cl][0][3]//dist)] = -1

                if clipped:
                    for i in range(1,31):
                        x[k][1][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
                else:
                    for i in range(1,31):
                        x[k][i][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
            
            
            y = np.hstack([data_y[i] for i in t_idx])
           
            train_x = torch.tensor(x).type(torch.FloatTensor)
            train_x_aux = torch.stack(x_aux).type(torch.FloatTensor)
            train_y = torch.tensor(y).type(torch.LongTensor)
            train_y_aux = torch.tensor(loc_y).type(torch.FloatTensor)
            
            # set gradient to zero
            optimizer.zero_grad()
            
            # compute output
            output1, output2 = model(train_x, train_x_aux)
            batch_loss = 100*weighted*criterion1(output1, train_y_aux) + (1- weighted)*criterion2(output2, train_y)

            batch_loss.backward()
            optimizer.step()

            sum_loss = sum_loss + batch_loss.item()
            _, a = torch.max(output2,1)
            acc.append(float((train_y == a).sum())/len(train_y))
            
    
    
    test_loss = 0
    test_acc = []
    
    #model.eval()
    for k,TABLE in enumerate(test_tables):
        data = np.load('./minmax_data/data_vector_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        data_y = np.load('./minmax_data/data_vector_y_{}.npy'.format(TABLE), allow_pickle=True).tolist()
        
        idx = list(data.keys())
        #random.shuffle(idx)
        
        for b in range(0, len(idx), mini_batch_size):

            t_idx = idx[b:b+mini_batch_size]
            
            if clipped:
                x = np.zeros((len(t_idx), 2, im_size, im_size))
            else:
                x = np.zeros((len(t_idx), nv+1, im_size, im_size))
            loc_y = np.zeros((len(t_idx), 2))
            x_aux = []
            
            for k, cl in enumerate(t_idx):
                loc = int(data_y[cl])
                loc_y[k] = [data[cl][loc+1][2], data[cl][loc+1][3]]
                
                x_aux.append(torch.tensor(np.asarray(data[cl][1:])).type(torch.FloatTensor))
                x[k][0][int(data[cl][0][0]//dist)][int(data[cl][0][1]//dist)] = 1
                x[k][0][int(data[cl][0][2]//dist)][int(data[cl][0][3]//dist)] = -1

                if clipped:
                    for i in range(1,31):
                        x[k][1][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
                else:
                    for i in range(1,31):
                        x[k][i][int(data[cl][i][2]//dist)][int(data[cl][i][3]//dist)] = 1
            
            y = np.hstack([data_y[i] for i in t_idx])
            
            
            test_x = torch.tensor(x).type(torch.FloatTensor)
            test_x_aux = torch.stack(x_aux).type(torch.FloatTensor)
            test_y = torch.tensor(y).type(torch.LongTensor)
            test_y_aux = torch.tensor(loc_y).type(torch.FloatTensor)
            
            output1, output2 = model(test_x, test_x_aux)
            batch_loss = 100*weighted*criterion1(output1, test_y_aux) + (1-weighted)*criterion2(output2, test_y)


            test_loss += batch_loss.item()
            _, a = torch.max(output2,1)
            
            test_acc.append(float((test_y == a).sum())/len(test_y))
            idx_failures += [t_idx[i] for i in np.where(test_y != a)[0]]
            
            
    print('\rEpoch {}. Train Loss: {:.3f} Accuracy: {:.3f} Test Loss: {:.3f} Accuracy: {:.3f}'.format(e+1, sum_loss, np.mean(acc), test_loss,np.mean(test_acc)), end="")
    return sum_loss, np.sum(acc)/len(acc), test_loss, np.sum(test_acc)/len(test_acc), idx_failures
    

In [38]:
def evaluating_model(model, im_size, n_epochs, simple=False, weighted=0.5, clipped=False, single_inp=False):
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion2 = nn.CrossEntropyLoss() 
    criterion1 = nn.MSELoss()
        
    
    loss, acc, test_loss, test_acc, idx_f, times = [], [], [], [], [], []

    for epoch in range(n_epochs):
        current_t = time.time()
        if single_inp and simple:
            train_l, accuracy, test_l, test_a, idx_failures = \
            epoch_train_single_inp_single_out(model, train_tables, test_tables, optimizer, criterion2, \
                epoch, im_size, clipped)
        elif single_inp and not simple:
            train_l, accuracy, test_l, test_a, idx_failures = \
            epoch_train_single_input_double_output(model, train_tables, test_tables, optimizer, criterion1, criterion2, \
                epoch, im_size, weighted, clipped)
        else:
            train_l, accuracy, test_l, test_a, idx_failures = \
            epoch_train(model, train_tables, test_tables, optimizer, criterion1, criterion2, \
                    epoch, im_size, weighted, clipped)
            
        
        times.append(time.time() - current_t)
        loss.append(train_l)
        test_loss.append(test_l)
        acc.append(accuracy)
        test_acc.append(test_a)
        idx_f.append(idx_failures)

    print('\nAverage time per epoch {:.3f}s +- {:.3f}'.format(np.mean(times), 2*np.std(times)))

    max_acc = np.max(test_acc)
    iter_max = np.where(test_acc ==  max_acc)

    print('Max accuracy of {:.3f} achieved at epoch {}'.format(max_acc, iter_max[0][0]))
    
    return loss, acc, test_loss, test_acc, idx_f, times

# Model 1. Single Input - Single Output

In [39]:
n_epochs = 10
im_size = 30

model1 = ImagesP2(31, MAX_V, im_size)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model1, im_size, n_epochs, simple=True, clipped=False, single_inp=True)
torch.save(model1, 'model_weights2/model1_evaluation.pt')

Epoch 10. Train Loss: 210.501 Accuracy: 0.377 Test Loss: 148.023 Accuracy: 0.129
Average time per epoch 11.233s +- 1.699
Max accuracy of 0.131 achieved at epoch 4


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [40]:
n_epochs = 20
im_size = 30

model2 = ImagesP4(31, MAX_V, im_size)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model2, im_size, n_epochs, weighted=0.999, clipped=False, single_inp=True)
torch.save(model2, 'model_weights2/model2_evaluation.pt')

Epoch 20. Train Loss: 593.330 Accuracy: 0.225 Test Loss: 350.094 Accuracy: 0.121
Average time per epoch 11.433s +- 1.128
Max accuracy of 0.122 achieved at epoch 17


  "type " + obj.__name__ + ". It won't be checked "


In [41]:
n_epochs = 10
im_size = 30

model3 = ImagesP4(31, MAX_V, im_size)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model2, im_size, n_epochs, weighted=0.5, clipped=False, single_inp=True)
torch.save(model3, 'model_weights2/model3_evaluation.pt')

Epoch 10. Train Loss: 386.647 Accuracy: 0.323 Test Loss: 268.233 Accuracy: 0.135
Average time per epoch 11.042s +- 0.114
Max accuracy of 0.135 achieved at epoch 7


In [42]:
n_epochs = 10
im_size = 30

model4 = Net_Final(31, MAX_V, im_size, 3)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model4, im_size, n_epochs, weighted=0.999, clipped=False)
torch.save(model4, 'model_weights2/model4_evaluation.pt')

Epoch 10. Train Loss: 442.326 Accuracy: 0.421 Test Loss: 388.499 Accuracy: 0.195
Average time per epoch 10.356s +- 0.452
Max accuracy of 0.198 achieved at epoch 8


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [43]:
n_epochs = 10
im_size = 50

model5 = Net_Final(31, MAX_V, im_size, 3)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model5, im_size, n_epochs, weighted=0.999, clipped=False)
torch.save(model5, 'model_weights2/model5_evaluation.pt')

Epoch 10. Train Loss: 472.155 Accuracy: 0.428 Test Loss: 364.644 Accuracy: 0.202
Average time per epoch 25.312s +- 1.017
Max accuracy of 0.204 achieved at epoch 8


In [44]:
n_epochs = 10
im_size = 30

model6 = Net_Final(2, MAX_V, im_size, 3)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model6, im_size, n_epochs, weighted=0.999, clipped=True)
torch.save(model6, 'model_weights2/model6_evaluation.pt')

Epoch 10. Train Loss: 535.794 Accuracy: 0.424 Test Loss: 386.773 Accuracy: 0.205
Average time per epoch 6.341s +- 0.250
Max accuracy of 0.205 achieved at epoch 7


In [45]:
n_epochs = 10
im_size = 50

model7 = Net_Final(2, MAX_V, im_size, 3)
loss, acc, test_loss, test_acc, idx_f, times = \
    evaluating_model(model7, im_size, n_epochs, weighted=0.999, clipped=True)
torch.save(model7, 'model_weights2/model7_evaluation.pt')

Epoch 10. Train Loss: 470.826 Accuracy: 0.427 Test Loss: 500.189 Accuracy: 0.204
Average time per epoch 13.930s +- 0.535
Max accuracy of 0.207 achieved at epoch 8
