# Experiment Notebook Gridsearch

In [1]:
import numpy as np
import math
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import os
from data.load_data import load_monk, load_MLCup
from src.MetricFunctions import get_metric_instance
from src.MLP import MLP
from src.GridSearch import GridSearch, RandomGridsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.EarlyStopping import EarlyStopping
from tqdm import tqdm

# warnings to false
import warnings
warnings.filterwarnings("ignore")

In [2]:
path = os.getcwd()
file_train = '/data/ML-CUP22-TR.csv'
file_test = '/data/ML-CUP22-TS.csv'

labels_train = ['x1','x2','x3','x4','x5','x6','x7','x8','x9','y1','y2']
labels_test = ['x1','x2','x3','x4','x5','x6','x7','x8','x9']

X_train, y_train = load_MLCup(path + file_train, labels_train)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state = 42)

X_test = load_MLCup(path + file_test, labels_test)

## 1. Dre

In [3]:
mlp = MLP([40, 20], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.005, 0.001, 0.0005],
    "momentum": [0.5 , 0.75],
    'alpha_l2' : [0.01, 0.005, 0],
    'alpha_l1' : [0.01, 0.005, 0],
    'Nesterov' : [True, False],             
    'rprop' : [True, False],
    'adaptive_gradient' : [True, False],
    "batch_size" : [-1, 500],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_40_20_dre1 = GridSearch(mlp)
grid_40_20_dre1.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True, verbose = False)

print(grid_40_20_dre1.best_model.evaluate_model(X_val, y_val, 'mee'))

Grid search of combinations: 864
Parallelisation activated


Best parameters: {'step': 0.005, 'momentum': 0.75, 'alpha_l2': 0.005, 'alpha_l1': 0, 'Nesterov': True, 'rprop': False, 'adaptive_gradient': False, 'batch_size': 500, 'weights_initialization': 'he', 'regularization': 'elastic', 'n_epochs': 2000, 'tolerance': 1e-05, 'patience': 50}
Best score: 1.5174841768721512


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (864, 3) + inhomogeneous part.

In [4]:
mlp = MLP([40, 20], 9, 2, task = "regression", activation_function = "sigm")

params_grid = {
    "step": [0.03, 0.015, 0.0075],
    "momentum": [0.5 , 0.75],
    'alpha_l2' : [0.01, 0.005, 0],
    'alpha_l1' : [0.01, 0.005, 0],
    'Nesterov' : [True, False],             
    'rprop' : [True, False],
    'adaptive_gradient' : [True, False],
    "batch_size" : [-1, 500],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}


grid_40_20_dre2 = GridSearch(mlp)
grid_40_20_dre2.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True, verbose = False)

print(grid_40_20_dre2.best_model.evaluate_model(X_val, y_val))



Grid search of combinations: 864
Parallelisation activated


Best parameters: {'step': 0.015, 'momentum': 0.75, 'alpha_l2': 0, 'alpha_l1': 0, 'Nesterov': False, 'rprop': False, 'adaptive_gradient': False, 'batch_size': 500, 'weights_initialization': 'he', 'regularization': 'elastic', 'n_epochs': 2000, 'tolerance': 1e-05, 'patience': 50}
Best score: 1.4557360612266517


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (864, 3) + inhomogeneous part.

In [5]:
mlp = MLP([40, 20, 15, 10], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.005, 0.001, 0.0005],
    "momentum": [0.5 , 0.75],
    'alpha_l2' : [0.01, 0.005, 0],
    'alpha_l1' : [0.01, 0.005, 0],
    'Nesterov' : [True, False],             
    'rprop' : [True, False],
    'adaptive_gradient' : [True, False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}


grid_40_20_15_10 = GridSearch(mlp)
grid_40_20_15_10.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True, verbose = False)

print(grid_40_20_15_10.best_model.evaluate_model(X_val, y_val))

Grid search of combinations: 432
Parallelisation activated


Best parameters: {'step': 0.005, 'momentum': 0.5, 'alpha_l2': 0.005, 'alpha_l1': 0.005, 'Nesterov': False, 'rprop': False, 'adaptive_gradient': False, 'batch_size': -1, 'weights_initialization': 'he', 'regularization': 'elastic', 'n_epochs': 2000, 'tolerance': 1e-05, 'patience': 50}
Best score: 1.540510943639297


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (432, 3) + inhomogeneous part.

## 2. Bianchy

In [None]:
topologies = [[40,20], [80,30], [60,10]]

for topology in topologies:
    mlp = MLP(topology, 9, 2, task = "regression", activation_function = 'tanh')
    grid = GridSearch(mlp)
    
    params_grid = {
        "step" : [0.01, 0.015, 0.02, 0.03],
        "momentum" : [0, 0.5, 0.75],
        "regularization" : ["elastic"],
        "alpha_l1" : [0, 0.001, 0.002, 0.004, 0.008],
        "alpha_l2" : [0, 0.001, 0.002, 0.004, 0.008],
        "Nesterov" : [True, False],
        "adaptive_gradient" : [False],
        "rprop" : [False],
        "batch_size" : [-1],
        "n_epochs" : [2000],
        "patience" : [50],
        "tolerance" : [1e-4]
    }

    grid.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)
    joblib.dump(grid, f"results/GridSearch_{'-'.join([str(x) for x in topology])}")
    

In [None]:
for topology in topologies:
    grid = joblib.load(f"results/GridSearch_{'-'.join([str(x) for x in topology])}")
    print(topology)
    print("best score")
    print(grid.results)
    print(grid.best_parameters)

## 3. Aku

In [None]:
topologies = [[40, 20]]
# topologies = [[40, 20], [60, 10], [80, 30]]
params_grid = {
    "step" : [0.02, 0.03],
    "momentum" : [0.5, 0.75],
    "alpha_l1" : [0.004],
    "alpha_l2" : [0.004],
    "Nesterov" : [False],

    # "step" : [0.01, 0.02, 0.03],
    # "momentum" : [0, 0.5, 0.75],
    # "alpha_l1" : [0, 0.001, 0.002, 0.004],
    # "alpha_l2" : [0, 0.001, 0.002, 0.004],
    # "Nesterov" : [True, False],
    "rprop" : [False],
    "adaptive_gradient" : [False],
    "batch_size" : [-1],
    "weights_initialization": ["scaled"],
    "regularization" : ["elastic"],
    "n_epochs" : [2000],
    "tolerance" : [1e-4],
    "patience" : [50]
}

for top in topologies:
    mlp = MLP(top, 9, 2, task = "regression", activation_function = "sigmoid")
    grid = GridSearch(mlp)
    grid.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)
    b = grid.best_model.evaluate_model(X_val, y_val)
    print(f"best model of topology {top}: {b}")
    

## 4. Duro

In [None]:
elle2 = [1/(2**j) for j in range(5, 10)]
elle2.append(0)
elle2

In [None]:
mlp = MLP([40, 20], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.015, 0.007, 0.003],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.015, 0.008, 0.004, 0.002, 0],
    'alpha_l1' : [0.015, 0.008, 0.004, 0.002, 0],
    'Nesterov' : [False, True], 
                              
    'rprop' : [False],
    'adaptive_gradient' : [False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_40_20 = GridSearch(mlp)
grid_40_20.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_40_20.best_model.evaluate_model(X_val, y_val))


Grid search of combinations: 6
Parallelisation activated


Best parameters: {'step': 0.015, 'Nesterov': True, 'rprop': False, 'adaptive_gradient': False, 'batch_size': -1, 'weights_initialization': 'he', 'regularization': 'elastic', 'n_epochs': 2000, 'tolerance': 1e-05, 'patience': 50}
Best score: 1.7010252002256778
2.070938046528258


In [None]:
mlp = MLP([80, 30], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.015, 0.007, 0.003],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.015, 0.008, 0.004, 0.002, 0],
    'alpha_l1' : [0.015, 0.008, 0.004, 0.002, 0],
    'Nesterov' : [False, True], 
                              
    'rprop' : [False],
    'adaptive_gradient' : [False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_80_30 = GridSearch(mlp)
grid_80_30.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_80_30.best_model.evaluate_model(X_val, y_val))

In [None]:
mlp = MLP([60, 10], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.015, 0.007, 0.003],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.015, 0.008, 0.004, 0.002, 0],
    'alpha_l1' : [0.015, 0.008, 0.004, 0.002, 0],
    'Nesterov' : [False, True], 
                              
    'rprop' : [False],
    'adaptive_gradient' : [False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_60_10 = GridSearch(mlp)
grid_60_10.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_60_10.best_model.evaluate_model(X_val, y_val))