# Experiments

In [None]:
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import os
from data.load_data import load_monk, load_MLCup
from src.MetricFunctions import get_metric_instance
from src.MLP import MLP
from src.GridSearch import GridSearch, RandomGridsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.EarlyStopping import EarlyStopping
from src.BestModelSearch import BestModelSearch

## Task 1: Regression

Preparation: Loading MLCup Dataset

In [None]:
path = os.getcwd()
file_train = '/data/ML-CUP22-TR.csv'
file_test = '/data/ML-CUP22-TS.csv'

labels_train = ['x1','x2','x3','x4','x5','x6','x7','x8','x9','y1','y2']
labels_test = ['x1','x2','x3','x4','x5','x6','x7','x8','x9']

X_train, y_train = load_MLCup(path + file_train, labels_train)
X_test = load_MLCup(path + file_test, labels_test)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)



In [None]:
fig, ax = plt.subplots()
for step in [0.02,0.03,0.04,0.05]:
    mlp = MLP([60,40,20], 9, 2, task = "regression", activation_function = 'sigm')
    mlp.fit(X_train, y_train, n_epochs = 500, batch_size = 256, momentum = 0.8, Nesterov = False, step = step, patience = 1000)
    ax.plot(mlp.validation_curve, label = str(step))
    print(mlp.validation_curve[499])
ax.legend()

In [None]:
for alpha_l2 in [0,0.05]:
    mlp = MLP([60,20], 9, 2, task = "regression", activation_function = 'sigm')
    mlp.fit(X_train, y_train, n_epochs = 500, batch_size = 256, step = 0.02, 
            momentum = 0.8, Nesterov = True, patience = 1000, regularization = "l2", alpha_l2=alpha_l2)
    fig, ax = plt.subplots()
    ax.plot(mlp.learning_curve, label = "training MSE")
    ax.plot(mlp.validation_curve, label = "validation MSE")
    ax.legend()
    print(mlp.validation_curve[499])
    

## Task 2: Classification

### Preparation: Loading Monk Datasets

In [None]:
path = os.getcwd()
file_train = '/data/monks-1.train'

labels = ['class','x1','x2','x3','x4','x5','x6']

TR_sets = [[]] 
TS_sets = [[]] 
for i in range(1,4):
    TR_sets.append(load_monk(path + f'/data/monks-{i}.train', labels))
    TS_sets.append(load_monk(path + f'/data/monks-{i}.test', labels))


### Experiment 2.1 - Tanh

In [None]:
curves = []
params = {
    "n_epochs" : 1500, 
    "error" : "mse", 
    "verbose" : False,
    "step" : 1, 
    "momentum" : 0.5, 
    "weights_scale" : 0.01,
    "weights_initialization" : 'xavier',
    "early_stopping" : False,
    "eval_metric" : 'mse'
    }



def name(i):
    if i in (0,1,2):
        return i + 1
    else:
        return i

# Record Statistics for 10 runs

for k in range(0,10):
    
    for i in range(0,4):

        print("**************************************")
        print(f"Monk Dataset {name(i)}, Random Run {k + 1}")
        input_size = TR_sets[name(i)][0][0].shape[0]
        n_samples = TR_sets[name(i)][0].shape[0]
        params["batch_size"] = n_samples
        
        mlp = MLP([4], input_size = input_size, output_size = 1, task = "classification", activation_function = 'tanh')

        # Curves with MSE/Accuracy
        if i < 3:
            # All without regularization
            params["regularization"] = 'no'
            mlp.fit(X = TR_sets[name(i)][0], y_true = TR_sets[name(i)][1], X_test = TS_sets[name(i)][0], y_test =  TS_sets[name(i)][1], **params)
        else:
            # Monk 3 with regularization
            params["regularization"] = 'l2'
            params["alpha_l2"] = 0.00075
            mlp.fit(X = TR_sets[name(i)][0], y_true = TR_sets[name(i)][1], X_test = TS_sets[name(i)][0], y_test =  TS_sets[name(i)][1], **params)
        
        if k == 1:
            curves.append([[mlp.learning_curve, mlp.learning_accuracy_curve], [mlp.validation_curve, mlp.test_accuracy_curve]])
        
        # print()
        print(f"Accuracy on test set {i}, Random Run {k + 1}: {mlp.evaluate_model(TS_sets[name(i)][0], TS_sets[name(i)][1], metric = 'accuracy')}")
        print()
    





# Plot the learning curves
figure, axis = plt.subplots(4, 2, figsize = (20, 32))

for j in range(0,4):

    # Plot MSE training and validation curves
    axis[j, 0].plot(range(0, len(curves[j][0][0]), 1), curves[j][0][0],\
        linewidth = 1, label = 'Training MSE')
    axis[j, 0].plot(range(0, len(curves[j][1][0]), 1), curves[j][1][0],\
        linewidth = 1, label = 'Test MSE')
    axis[j, 0].set_title(f"MONK's-{name(j)} results (MSE) - Random Run 1")
    axis[j, 0].set_xlabel('Epoch') 
    axis[j, 0].set_ylabel('MSE')  
    axis[j, 0].legend()


    # Plot Accuracy training and validation curves
    axis[j, 1].plot(range(0, len(curves[j][0][1]), 1), curves[j][0][1],\
        linewidth = 1, label = 'Training Accuracy')
    axis[j, 1].plot(range(0, len(curves[j][1][1]), 1), curves[j][1][1],\
        linewidth = 1, label = 'Test Accuracy')
    axis[j, 1].set_title(f"MONK's-{name(j)} results (Accuracy) - Random Run 1")
    axis[j, 1].set_xlabel('Epoch') 
    axis[j, 1].set_ylabel('Accuracy')  
    axis[j, 1].legend()

plt.show()
