# Experiments

In [1]:
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import os
from data.load_data import load_monk, load_MLCup
from src.MetricFunctions import get_metric_instance
from src.MLP import MLP
from src.GridSearch import GridSearch, RandomGridsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.EarlyStopping import EarlyStopping

## Task 1: Regression

Preparation: Loading MLCup Dataset

In [2]:
path = os.getcwd()
file_train = '/data/ML-CUP22-TR.csv'
file_test = '/data/ML-CUP22-TS.csv'

labels_train = ['x1','x2','x3','x4','x5','x6','x7','x8','x9','y1','y2']
labels_test = ['x1','x2','x3','x4','x5','x6','x7','x8','x9']

X_train, y_train = load_MLCup(path + file_train, labels_train)
X_test = load_MLCup(path + file_test, labels_test)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)



(1492, 9)
(1492, 2)
(529, 9)


## Task 2: Classification

### Preparation: Loading Monk Datasets

In [3]:
path = os.getcwd()
file_train = '/data/monks-1.train'

# Set T[i][j]: Set i, j = 0: variables, j = 1: targets
TR_sets = [[]] 
TS_sets = [[]] 
for i in range(0,3):
    TR_sets.append(load_monk(path + f'/data/monks-{i + 1}.train'))
    TS_sets.append(load_monk(path + f'/data/monks-{i + 1}.test'))

### Experiment 2.1 - Sigmoid Activation, Sigmoid Output

In [4]:

for i in range(1,4):

    print("**************************************")
    print(f"Monk Dataset {i}")
    input_size = TR_sets[i][0][0].shape[0]
    mlp = MLP([5], input_size = input_size, output_size = 1, task = "classification", activation_function = 'relu')

    learning_curve = mlp.fit(TR_sets[i][0], TR_sets[i][1], n_epochs = 500, batch_size = 10,\
        error = "NLL", verbose = False) #, regularization='l2', alpha_l2 = 0.001)
    print("\n", "\n")
    print(f"Accuracy on test set {i}: {mlp.evaluate_model(TS_sets[i][0], TS_sets[i][1])}")
    print("\n", "\n")



**************************************
Monk Dataset 1

 

Accuracy on test set 1: 0.7268518518518519

 

**************************************
Monk Dataset 2

 

Accuracy on test set 2: 0.6550925925925926

 

**************************************
Monk Dataset 3

 

Accuracy on test set 3: 0.8101851851851852

 



In [5]:

grid_parameters = {'n_epochs': [2000, 5000],
    'batch_size': [5, 10], 
    'step': [0.01, 0.1, 1],
    'momentum' : [0.5],
    'error' : ["NLL"],
    'verbose' : [False],
    'regularization' : ['l2'],
    'alpha_l1' : [0],
    'alpha_l2' : [e-10]
    }


for i in range(1,4):

    print("**************************************")
    print(f"Monk Dataset {i}")
    input_size = TR_sets[i][0][0].shape[0]
    mlp = MLP([10], input_size = input_size, output_size = 1, task = "classification", activation_function = 'sigmoid')

    grid = GridSearch(mlp)
    grid.fit(X = TR_sets[i][0], y = TR_sets[i][1], parameters_grid = grid_parameters, verbose = True, test_size = 0.1)

    #fig, ax = plt.subplots(figsize=(5, 5), layout='constrained')
    #ax.plot(range(n_epochs), learning_curve, line width = 1)
    #ax.set_xlabel('Epoch')  
    #ax.set_ylabel("Accuracy")  

    best_model = grid.best_model

    print("\n", "\n")
    print(f"Accuracy on test set {i}: {best_model.evaluate_model(TS_sets[i][0], TS_sets[i][1])}")
    print("\n", "\n")

**************************************
Monk Dataset 1
Grid search of combinations: 12
Parallelisation not active
-----------------------------------
Combination 1/12
Parameters: {'n_epochs': 2000, 'batch_size': 5, 'step': 0.01, 'momentum': 0.5, 'error': 'NLL', 'verbose': False}
Validation score: 0.7692307692307693
-----------------------------------
Combination 2/12
Parameters: {'n_epochs': 2000, 'batch_size': 5, 'step': 0.1, 'momentum': 0.5, 'error': 'NLL', 'verbose': False}
Validation score: 1.0
-----------------------------------
Combination 3/12
Parameters: {'n_epochs': 2000, 'batch_size': 5, 'step': 1, 'momentum': 0.5, 'error': 'NLL', 'verbose': False}
Validation score: 1.0
-----------------------------------
Combination 4/12
Parameters: {'n_epochs': 2000, 'batch_size': 10, 'step': 0.01, 'momentum': 0.5, 'error': 'NLL', 'verbose': False}
Validation score: 0.7692307692307693
-----------------------------------
Combination 5/12
Parameters: {'n_epochs': 2000, 'batch_size': 10, 'step'