# Experiment Notebook Gridsearch

In [2]:
import numpy as np
import math
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import os
from data.load_data import load_monk, load_MLCup
from src.MetricFunctions import get_metric_instance
from src.MLP import MLP
from src.GridSearch import GridSearch, RandomGridsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.EarlyStopping import EarlyStopping
from tqdm import tqdm

In [3]:
path = os.getcwd()
file_train = '/data/ML-CUP22-TR.csv'
file_test = '/data/ML-CUP22-TS.csv'

labels_train = ['x1','x2','x3','x4','x5','x6','x7','x8','x9','y1','y2']
labels_test = ['x1','x2','x3','x4','x5','x6','x7','x8','x9']

X_train, y_train = load_MLCup(path + file_train, labels_train)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state = 42)

X_test = load_MLCup(path + file_test, labels_test)

## 1. Dre

In [None]:
mlp = MLP([40, 20], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.015, 0.0075, 0.004, 0.002, 0.001, 0.0005, 0.0001],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.01, 0.005, 0.002, 0.00075, 0.0002, 0],
    'alpha_l1' : [0.01, 0.005, 0.002, 0.00075, 0.0002, 0],
    'Nesterov' : [False, True],             
    'rprop' : [False, True],
    'adaptive_gradient' : [False, True],
    "batch_size" : [-1, 200, 500],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_40_20_dre1 = GridSearch(mlp)
grid_40_20_dre1.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_40_20_dre1.best_model.evaluate_model(X_val, y_val))

In [None]:
mlp = MLP([40, 20], 9, 2, task = "regression", activation_function = "sigm")

params_grid = {
    "step": [0.2, 0.05, 0.01, 0.0075, 0.001, 0.0005, 0.0001],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.01, 0.005, 0.002, 0.00075, 0.0002, 0],
    'alpha_l1' : [0.01, 0.005, 0.002, 0.00075, 0.0002, 0],
    'Nesterov' : [False, True],             
    'rprop' : [False, True],
    'adaptive_gradient' : [False, True],
    "batch_size" : [-1, 200, 500],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}


grid_40_20_dre2 = GridSearch(mlp)
grid_40_20_dre2.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_40_20_dre2.best_model.evaluate_model(X_val, y_val))

In [None]:
mlp = MLP([40, 20, 15, 10], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.015, 0.0075, 0.004, 0.002, 0.001, 0.0005, 0.0001],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.01, 0.005, 0.002, 0.00075, 0.0002, 0],
    'alpha_l1' : [0.01, 0.005, 0.002, 0.00075, 0.0002, 0],
    'Nesterov' : [False, True],             
    'rprop' : [False, True],
    'adaptive_gradient' : [False, True],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}


grid_40_20_15_10 = GridSearch(mlp)
grid_40_20_15_10.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_40_20_15_10.best_model.evaluate_model(X_val, y_val))

## 2. Bianchy

In [None]:
topologies = [[40,20], [80,30], [60,10]]


for topology in topologies:
    mlp = MLP(topology, 9, 2, task = "regression", activation_function = 'tanh')
    grid = GridSearch(mlp)

    params_grid = {
        "step" : [0.01, 0.02, 0.03],
        "momentum" : [0, 0.5, 0.75],
        "regularization" : ["elastic"],
        "alpha_l1" : [0, 0.001, 0.002, 0.004],
        "alpha_l2" : [0, 0.001, 0.002, 0.004],
        "Nesterov" : [True, False],
        "adaptive_gradient" : [False],
        "rprop" : [False],
        "batch_size" : [-1],
        "n_epochs" : [2000],
        "patience" : [50],
        "tolerance" : [1e-4]
    }

    grid.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)
    joblib.dump(grid, f"results/GridSearch_{'-'.join([str(x) for x in topology])}")
    

In [None]:
for topology in topologies:
    grid = joblib.load(f"results/GridSearch_{'-'.join([str(x) for x in topology])}")
    print(topology)
    print("best score")
    print(grid.best_score)
    print(grid.best_scores_list)
    print(grid.best_parameters)

## 3. Aku

In [None]:
topologies = [[40, 20], [60, 10], [80, 30]]
params_grid = {
    
}
grid = {}
for top in topologies:
    mlp = MLP(top, 9, 2, task = "regression", activation_function = "relu")
    grid[top] = GridSearch(mlp)
    grid[top].fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)
    grid[top].best_model.evaluate_model(X_val, y_val)

## 4. Duro

In [None]:
mlp = MLP([40, 20], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.015, 0.007, 0.004, 0.002, 0.001, 0.00048828125, 0],
    "momentum": [0.5 , 0.7, 0.8],
    'alpha_l2' : [0.015625, 0.0078125, 0.00390625, 0.001953125, 0.0009765625, 0.00048828125, 0],
    'alpha_l1' : [0.015625, 0.0078125, 0.00390625, 0.001953125, 0.0009765625, 0.00048828125, 0],
    'Nesterov' : [False, True],             
    'rprp' : [False],
    'adaptive_gradient' : [False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_40_20 = GridSearch(mlp)
grid_40_20.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_40_20.best_model.evaluate_model(X_val, y_val))


In [None]:
mlp = MLP([80, 30], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.03, 0.015, 0.008, 0.002],
    "momentum": [0.75, 0.5, 0],
    'alpha_l2' : [0.01, 0.005, 0.002, 0.001, 0],
    'alpha_l1' : [0.01, 0.005, 0.002, 0.001, 0],
    'Nesterov' : [False, True],
                  
    'rprop' : [False],
    'adaptive_gradient' : [False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_80_30_duro = GridSearch(mlp)
grid_80_30_duro.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_80_30_duro.best_model.evaluate_model(X_val, y_val))

In [None]:
mlp = MLP([60, 10], 9, 2, task = "regression", activation_function = "relu")

params_grid = {
    "step": [0.03, 0.015, 0.008, 0.002],
    "momentum": [0.75, 0.5, 0],
    'alpha_l2' : [0.01, 0.005, 0.002, 0.001, 0],
    'alpha_l1' : [0.01, 0.005, 0.002, 0.001, 0],
    'Nesterov' : [False, True],
                  
    'rprop' : [False],
    'adaptive_gradient' : [False],
    "batch_size" : [-1],                        
    'weights_initialization' : ['he'],      
    'regularization' : ['elastic'],
    'n_epochs' : [2000],
    'tolerance' : [1e-5],
    'patience': [50]
                  
}

grid_60_10_duro = GridSearch(mlp)
grid_60_10_duro.fit(X_train, y_train, params_grid, n_folds = 5, parallel = True)

print(grid_60_10_duro.best_model.evaluate_model(X_val, y_val))