<a href="https://colab.research.google.com/github/GonMazzini/Loads_Surrogate_Transferability/blob/main/TuningFramework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Thits notebook is the MASTER notebook for running hyper-parameter tuning.


> Section 1


1.   Read data
2.   Train-Val-Test split
3.   Scale data




In [1]:
!pip install parameter-sherpa

Collecting parameter-sherpa
  Downloading parameter-sherpa-1.0.6.tar.gz (513 kB)
[?25l[K     |▋                               | 10 kB 19.7 MB/s eta 0:00:01[K     |█▎                              | 20 kB 15.2 MB/s eta 0:00:01[K     |██                              | 30 kB 7.5 MB/s eta 0:00:01[K     |██▌                             | 40 kB 6.7 MB/s eta 0:00:01[K     |███▏                            | 51 kB 3.8 MB/s eta 0:00:01[K     |███▉                            | 61 kB 4.4 MB/s eta 0:00:01[K     |████▌                           | 71 kB 4.6 MB/s eta 0:00:01[K     |█████                           | 81 kB 4.8 MB/s eta 0:00:01[K     |█████▊                          | 92 kB 5.3 MB/s eta 0:00:01[K     |██████▍                         | 102 kB 4.4 MB/s eta 0:00:01[K     |███████                         | 112 kB 4.4 MB/s eta 0:00:01[K     |███████▋                        | 122 kB 4.4 MB/s eta 0:00:01[K     |████████▎                       | 133 kB 4.4 MB/s eta 0:00

In [2]:
# Import all required libraries

from __future__ import print_function
import sherpa
from sherpa.algorithms import Genetic
import time
import pandas as pd 
import numpy as np
import math
from random import shuffle

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("GPU")
else:
    device = torch.device("cpu")
    print("CPU")

GPU


---
---
---
---
# Section 1:

Section 1.1: Read data




In [3]:
df = pd.read_excel('LoadsDataBase_6D_Set123_FiltMinMaxCrit.xlsx') # Average the values from Set1,Set2 and Set3.
df.head(2)
# 0 : TT_Mx_avg# 1 : TT_My_avg# 2 : TB_Mx_avg# 3 : TB_My_avg# 4 : MS_Mz_avg# 5 : BR_Mx_avg# 6 : BR_My_avg# 7 : TT-Mz_avg

Unnamed: 0.1,Unnamed: 0,pointno,U,SigmaU,Alpha,MannL,MannGamma,VeerDeltaPhi,TT_Mx_avg,TT_My_avg,TB_Mx_avg,TB_My_avg,TT_Mz_avg,MS_Mz_avg,BR_Mx_avg,BR_My_avg
0,0,1,4.0,0.1,-0.65,7.5,1.0,-22.25,747.561872,200.666288,6708.717789,8861.885588,819.209904,63.457528,4253.317748,15006.72686
1,1,2,10.150758,1.208656,-0.139692,48.470634,1.363636,-4.771217,3556.031457,676.339081,16692.647572,6329.099515,3746.460605,1354.995442,10409.290476,16289.414152


---
Section 1.2: Train-Val-Test split.

In [4]:
X = df.iloc[:,2:8]
y = df.iloc[:,8:]

In [5]:
# Test split:
X, X_test, y, y_test = train_test_split(X,y, test_size = 0.2, shuffle = True,  random_state = 101)

print(f'The filtered data set consits on: {len(df)} entries.')
print(f'A total of {len(X)} will be used for training and validation.')
print(f'A total of {len(X_test)} will be used for testing the final model.')


The filtered data set consits on: 7664 entries.
A total of 6131 will be used for training and validation.
A total of 1533 will be used for testing the final model.


---
Section 1.3: Feature scaling

In [6]:
feature_range = (0, 1)
scaler_x = preprocessing.MinMaxScaler(feature_range=feature_range).fit(X)
X_scaled = scaler_x.transform(X)

### Separte between train and validation

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled,y.values, test_size = 0.2, shuffle = True,  random_state = 101)

In [8]:
# printing number of samples for train-validation-test
print(f'A total of {y_train.shape[0]} for training, {round(100*y_train.shape[0]/len(df),1)} % of total data')
print(f'A total of {y_val.shape[0]} for validation, {round(100*y_val.shape[0]/len(df),1)} % of total data')
print(f'A total of {y_test.shape[0]} for testing, {round(100*y_test.shape[0]/len(df),1)} % of total data')

A total of 4904 for training, 64.0 % of total data
A total of 1227 for validation, 16.0 % of total data
A total of 1533 for testing, 20.0 % of total data


---
---
---
---
# Section 2: Model Selection



> Select the model according to the hyper-parameter to be tuned. The following classes are available:



>> *BaseModel* (**same hidden units per layer**)


*   2 hidden layers with same number of hidden units.
*   Weights initialized with Normal Kaimin (=He)
*   ReLu act_fn

>> *VariableLayers* (**just for number of hidden units**)


*   Variable number of Hidden Layers
*   Weights initialized with Normal Kaimin (=He)
*   ReLu act_fn

>> *VariableUnits* (**different units per layer**)







In [9]:
input_size = 6             # np.shape(X_train)[1]
output_channels = 8        # np.shape(y_train)[1]
hidden_size = 50

In [10]:
class BaseModel(nn.Module):
    
    def __init__(self, hidden_size):   
        super(BaseModel,self).__init__()  # inherit from the superclass Module
        self.hidden_size = hidden_size
        
        self.fc1 = nn.Linear(in_features= input_size,
                             out_features= self.hidden_size,                             
                            bias = True)  
        nn.init.kaiming_normal_(self.fc1.weight)

        self.fc2 = nn.Linear(in_features = self.hidden_size, 
                             out_features = self.hidden_size,
                            bias = True)
        nn.init.kaiming_normal_(self.fc1.weight)

        self.fc3 = nn.Linear(in_features = self.hidden_size, 
                             out_features = output_channels,
                            bias = True)
        nn.init.kaiming_normal_(self.fc1.weight)
        
    def forward(self,x):
        
        out = self.fc1(x)  
        out = F.relu(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.fc3(out)                       #  torch.tanh(self.fc3(out))
        
        return out  

In [11]:
# TODO :: Add HE intializing (how to acces each module?)

class VariableLayers(nn.Module):

    """ A feedforward network designed for tuning number of layers and hidden units.
    By @GonMazzini"""

    def __init__(self, input_dim, output_dim, n_hidLayers, hidden_size):
        super(VariableLayers, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_size = hidden_size
        self.n_hidLayers = n_hidLayers
        current_dim = input_dim
        self.layers = nn.ModuleList()
        
        for hdim in [self.hidden_size]*self.n_hidLayers:
            self.layers.append(nn.Linear(current_dim, hdim))
            current_dim = hdim
        self.layers.append(nn.Linear(current_dim, output_dim))

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = F.relu(layer(x))
        out = F.relu(self.layers[-1](x))
        return out 

In [41]:
class VariableUnits(nn.Module):

  def __init__(self, hidden_size1, hidden_size2):
    super(VariableUnits, self).__init__()
    self.hidden_size1 = hidden_size1
    self.hidden_size2 = hidden_size2

    self.fc1 = nn.Linear(in_features = input_size,
                         out_features = self.hidden_size1,
                         bias = True)
    nn.init.kaiming_normal_(self.fc1.weight)

    self.fc2 = nn.Linear(in_features = self.hidden_size1,
                         out_features = self.hidden_size2,
                         bias = True)
    
    nn.init.kaiming_normal_(self.fc2.weight)

    self.fc3 = nn.Linear(in_features = self.hidden_size2,
                      out_features = output_channels,
                      bias = True)

    nn.init.kaiming_normal_(self.fc3.weight)

  def forward(self,x):
        
    out = self.fc1(x)  
    out = F.relu(out)
    out = self.fc2(out)
    out = F.relu(out)
    out = self.fc3(out)                       #  torch.tanh(self.fc3(out))
    
    return out  

---
---
---
---
# Section 3: DataLoader 


>  Use the PyTorch DataLoader and Dataset utils.

- DataLoader class combines a dataset and a sampler, and provides an iterable over the given dataset for training the model
- Dataset: just an abstract class representing a :class:`Dataset`

In [35]:
class FatigueLoads_TrainSet(Dataset):

    def __init__(self):
        self.n_samples = X_train.shape[0]
        self.x_data = torch.from_numpy(X_train) # size [n_samples, n_features]
        self.y_data = torch.from_numpy(y_train) # size [n_samples, 1]

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples
    
class FatigueLoads_ValidationSet(Dataset):

    def __init__(self):
        self.n_samples = X_val.shape[0]
        self.x_data = torch.from_numpy(X_val) # size [n_samples, n_features]
        self.y_data = torch.from_numpy(y_val) # size [n_samples, 1]

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

train_dataset = FatigueLoads_TrainSet()
valid_dataset = FatigueLoads_ValidationSet()

### Get first sample and unpack. 
# Note that the enviromental inputs are normalized using MinMaxScaler

first_data = train_dataset[0]
features, loads = first_data
print(features, loads)

tensor([0.9213, 0.5395, 0.2971, 0.1574, 0.7369, 0.3075], dtype=torch.float64) tensor([ 7116.1376,   808.7441, 21966.9989, 16383.7768,  7347.3186,   807.4632,
        18567.6736, 16935.8620], dtype=torch.float64)


---
---
---
---
# Section 4: Select algorithm and parameters.

> Options


1.   RandomSearch  :  [ lr , hu1   , hu2 ] 
2.   RandomSearch  :  [ lr ,n_lays , hu12 ]
3.   GridSearch    : 





In [42]:
# Option 1:RandomSearch
# To be used with Model "VariableUnits"
algorithm = sherpa.algorithms.RandomSearch(max_num_trials = 32)
parameters = [sherpa.Ordinal('hidden_size1', [10,25,50,100]),
              sherpa.Ordinal('hidden_size2', [10,25,50,100]),
              sherpa.Ordinal('lr',[0.001,0.005,0.01,0.05,0.1])]

In [37]:
# Option 2: RandomSearch
# To be used with "VariableLayers" 
# TODO
algorithm = sherpa.algorithms.GridSearch(num_grid_points=3)
parameters = [sherpa.Discrete('n_hidLayers', [2, 4]),
              sherpa.Discrete('hidden_size', [16, 64])]

In [38]:
# Option 3: GridSearch
# To be used with "VariableLayers

In [None]:
# Option 4: Bayesian Optimization
# TODO

---
---
---
---
# Section 5: Define the training parameters and sherpa study.

In [43]:
loss = nn.MSELoss()

# list to store results
train_losses , val_losses= [],[]

batch_size = 128
num_epochs = 3

num_batches_train = X_train.shape[0] // batch_size
num_batches_test = X_val.shape[0] // batch_size

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

In [44]:
study = sherpa.Study(parameters= parameters,
                     algorithm=  algorithm,
                     lower_is_better=True)

INFO:sherpa.core:
-------------------------------------------------------
SHERPA Dashboard running. Access via
http://172.28.0.2:8884 if on a cluster or
http://localhost:8884 if running locally.
-------------------------------------------------------


 * Serving Flask app "sherpa.app.app" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


---
---
---
---
# Section 6: Select number of models to train for each paramter configuration. 

# TODO: add to.(device) to enable GPU.

1.   Use first case for training just one model.
2.   Use second case for training 3 models. 
3.   Add checkpoints to save results as df. 



In [None]:
# 1: To be used when Model = VariableUnits.
# Just one model trained.
for trial in study:

  print("Trial {}:\t{}".format(trial.id, trial.parameters))
  model = VariableUnits(trial.parameters['hidden_size1'],
                        trial.parameters['hidden_size2'])
  
  optimizer = optim.Adam(model.parameters(), lr = trial.parameters['lr'])
  
  for epoch in range(num_epochs):
    
    model.train()
    
    for i, (inputs, loads) in enumerate(train_loader):
        #print(features, loads)
        
        optimizer.zero_grad()                      # zeroize accumulated gradients in parameters             
        
        output = model(inputs.float())             # forwards pass       
        batch_loss = loss(output, loads.float())   # compute loss for current batch
        
        batch_loss.backward()                      # compute the gradient of the loss wrt. model parameters
        optimizer.step()                           # update weights according to the comptued gradients
        
    
    epoch_loss_train = 0
    epoch_loss_test = 0
    model.eval()
    
    ##### Evaluate training
    for i, (inputs, loads) in enumerate(train_loader):
        
        output = model(inputs.float())
        
        batch_loss_train = loss(output, loads.float())  # compute loss for the current batch
        epoch_loss_train += batch_loss_train            # accumulate loss for the current epoch
        
        #print(f'Epoch: {epoch+1}/{num_epochs}  | Step {i+1}/{n_iterations}')
    
    ##### Evaluate validation    
    for i, (inputs, loads) in enumerate(valid_loader):
        
        output = model(inputs.float())
        
        batch_loss_test = loss(output, loads.float())  # compute loss for the current batch
        epoch_loss_test += batch_loss_test     # accumulate loss for the current epoch
        
        #print(f'Epoch: {epoch+1}/{num_epochs}  | Step {i+1}/{n_iterations}')
    
    if epoch % 1 == 0: 
        print(f'Epoch: {epoch+1}/{num_epochs} | Train loss: {epoch_loss_train/num_batches_train}       | Val loss {epoch_loss_test/num_batches_test}')

        study.add_observation(trial=trial,
                              iteration=epoch,
                              objective=epoch_loss_test.detach().numpy())
    
    if study.should_trial_stop(trial):
        break 
    # store in list for plotting the loss per epoch    
    val_losses.append(epoch_loss_test/num_batches_test)  
    train_losses.append(epoch_loss_train/num_batches_train)  
  #study.finalize(trial)  

In [47]:
study.results

Unnamed: 0,Trial-ID,Status,Iteration,hidden_size1,hidden_size2,lr,Objective
0,1,INTERMEDIATE,0,100,10,0.100,349758000.0
1,1,INTERMEDIATE,1,100,10,0.100,236138190.0
2,1,INTERMEDIATE,2,100,10,0.100,179342300.0
3,2,INTERMEDIATE,0,10,50,0.050,761814500.0
4,2,INTERMEDIATE,1,10,50,0.050,291997250.0
...,...,...,...,...,...,...,...
91,31,INTERMEDIATE,1,50,50,0.005,1699453200.0
92,31,INTERMEDIATE,2,50,50,0.005,1233039500.0
93,32,INTERMEDIATE,0,100,10,0.010,1823856400.0
94,32,INTERMEDIATE,1,100,10,0.010,1384841200.0
