In [1]:
from utils import *
from NeuralNetwork import *
import pandas as pd
import numpy as np
import warnings
# to ignore pandas warning
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.pyplot as plt

# Training
The choice of the number of neurons and the output layer's activation function was taken making some tests on the dataset, instead the choice of **tanh** as the hidden layer's activation function was because it performs very well with xavier (Glorot) weight initializzation as stated in the paper on **https://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf**

## Monk 1


#### Variance and mean of MSE and Accuracy over 10 training instances of the model **{eta = 0.15, mb = 1, alpha (momentum) = 0.85}** 
The research of the set of hyperparameters has begun from a grid-search and it was refined with some tests choosing a set which makes the model able to yield an accuracy of 100% with low variance on MSE (test set) over different training instances

In [None]:
%%time
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-1.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-1.test", sep =" ",  names = col_names)


epochs=300
params={
    "eta" : 0.15,
    "mb" : 1,
    "momentum" : 0.85,
    "epochs" : epochs,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":None,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}
n=10
test_errors=np.zeros((n,epochs))
training_errors=np.zeros((n,epochs))
acc_test_errors=np.zeros((n,epochs))
acc_train_errors=np.zeros((n,epochs))
for i in range(n):
    tr = process_monk_data(tr_data)
    test = process_monk_data(test_data)
    net = NeuralNetwork()

    n_inputs = tr.shape[1] - 2

    net.add_input_layer(n_inputs)

    net.add_hidden_layer(n_inputs, 4)

    net.add_output_layer(4, 1)
    test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,type="monk",outFun2="accuracy")
    test_errors[i]=test_error
    training_errors[i]=training_error
    acc_test_errors[i]=acc_test
    acc_train_errors[i]=acc_train
    print(acc_test[-1])

print("Train")
print(f"MSE: mean = {np.mean(training_errors[:,-1])}, Var = {np.var(training_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_train_errors[:,-1])}, Var = {np.var(acc_train_errors[:,-1])}")
print("Test")
print(f"MSE: mean = {np.mean(test_errors[:,-1])}, Var = {np.var(test_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_test_errors[:,-1])}, Var = {np.var(acc_test_errors[:,-1])}")

#### Hyperparameters = {eta = 0.15, mb = 1, alpha (momentum) = 0.85}

In [None]:
%%time
params={
    "eta" : 0.15,
    "mb" : 1,
    "momentum" : 0.85,
    "epochs" : 300,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":None,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}

fig,axs = plt.subplots(1,2,figsize=(10,5))
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-1.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-1.test", sep =" ",  names = col_names)


#creazione rete
tr = process_monk_data(tr_data)
test = process_monk_data(test_data)
net = NeuralNetwork()

n_inputs = tr.shape[1] - 2
net.add_input_layer(n_inputs)
net.add_hidden_layer(n_inputs, 4)
net.add_output_layer(4, 1)

#training 
test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,type="monk",outFun2="accuracy")
print(f"MSE(tr) = {training_error[-1]}, MSE(test) = {test_error[-1]}")
print(f"Accuracy(tr) = {acc_train[-1]}, Accuracy(test) = {acc_test[-1]}")

plot_loss_Monk(training_error,"mse",axs[0],test_losses=test_error)
plot_loss_Monk(acc_train,"acc",axs[1],test_losses=acc_test)


fig.tight_layout(pad=2.0)
plt.show()

## Monk 2

#### Variance and mean of MSE and Accuracy over 10 training instances of the model **{eta = 0.2, mb = 1, alpha (momentum) = 0.8}** 

In [None]:
%%time
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-2.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-2.test", sep =" ",  names = col_names)


epochs=300
params={
    "eta" : 0.2,
    "mb" : 1,
    "momentum" : 0.8,
    "epochs" : epochs,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":None,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}
n=10
test_errors=np.zeros((n,epochs))
training_errors=np.zeros((n,epochs))
acc_test_errors=np.zeros((n,epochs))
acc_train_errors=np.zeros((n,epochs))
for i in range(n):
    tr = process_monk_data(tr_data)
    test = process_monk_data(test_data)
    net = NeuralNetwork()

    n_inputs = tr.shape[1] - 2

    net.add_input_layer(n_inputs)

    net.add_hidden_layer(n_inputs, 4)

    net.add_output_layer(4, 1)
    test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,type="monk",outFun2="accuracy")
    test_errors[i]=test_error
    training_errors[i]=training_error
    acc_test_errors[i]=acc_test
    acc_train_errors[i]=acc_train
    print(acc_test[-1])


print("Train")
print(f"MSE: mean = {np.mean(training_errors[:,-1])}, Var = {np.var(training_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_train_errors[:,-1])}, Var = {np.var(acc_train_errors[:,-1])}")
print("Test")
print(f"MSE: mean = {np.mean(test_errors[:,-1])}, Var = {np.var(test_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_test_errors[:,-1])}, Var = {np.var(acc_test_errors[:,-1])}")


#### Hyperparameters = {eta = 0.2, mb = 1, alpha (momentum) = 0.8}

In [None]:
%%time
params={
    "eta" : 0.2,
    "mb" : 1,
    "momentum" : 0.8,
    "epochs" : 300,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":None,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}

fig,axs = plt.subplots(1,2,figsize=(10,5))
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-2.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-2.test", sep =" ",  names = col_names)


#creazione rete
tr = process_monk_data(tr_data)
test = process_monk_data(test_data)
net = NeuralNetwork()

n_inputs = tr.shape[1] - 2
net.add_input_layer(n_inputs)
net.add_hidden_layer(n_inputs, 4)
net.add_output_layer(4, 1)

#training 
test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,outFun2="accuracy",type="monk")
print(f"MSE(tr) = {training_error[-1]}, MSE(test) = {test_error[-1]}")
print(f"Accuracy(tr) = {acc_train[-1]}, Accuracy(test) = {acc_test[-1]}")

plot_loss_Monk(training_error,"mse",axs[0],test_losses=test_error)
plot_loss_Monk(acc_train,"acc",axs[1],test_losses=acc_test)


fig.tight_layout(pad=2.0)
plt.show()

## Monk 3 no reg

#### Variance and mean of MSE over 10 training instances of the model **{eta = 0.003, mb = 1, alpha (momentum) = 0.7}** 


In [None]:
%%time
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-3.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-3.test", sep =" ",  names = col_names)


epochs=300
params={
    "eta" : 0.003,
    "mb" : 1,
    "momentum" : 0.7,
    "epochs" : epochs,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":None,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}
n=10
test_errors=np.zeros((n,epochs))
training_errors=np.zeros((n,epochs))
acc_test_errors=np.zeros((n,epochs))
acc_train_errors=np.zeros((n,epochs))
for i in range(n):
    tr = process_monk_data(tr_data)
    test = process_monk_data(test_data)
    net = NeuralNetwork()

    n_inputs = tr.shape[1] - 2

    net.add_input_layer(n_inputs)

    net.add_hidden_layer(n_inputs, 4)

    net.add_output_layer(4, 1)
    test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,type="monk",outFun2="accuracy")
    test_errors[i]=test_error
    training_errors[i]=training_error
    acc_test_errors[i]=acc_test
    acc_train_errors[i]=acc_train

print("Train")
print(f"MSE: mean = {np.mean(training_errors[:,-1])}, Var = {np.var(training_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_train_errors[:,-1])}, Var = {np.var(acc_train_errors[:,-1])}")
print("Test")
print(f"MSE: mean = {np.mean(test_errors[:,-1])}, Var = {np.var(test_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_test_errors[:,-1])}, Var = {np.var(acc_test_errors[:,-1])}")

#### Hyperparameters = {eta = 0.003, mb = 1, alpha (momentum) = 0.7}

In [None]:
%%time
params={
    "eta" : 0.003,
    "mb" : 1,
    "momentum" : 0.7,
    "epochs" : 300,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":None,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}

fig,axs = plt.subplots(1,2,figsize=(10,5))
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-3.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-3.test", sep =" ",  names = col_names)


#creazione rete
tr = process_monk_data(tr_data)
test = process_monk_data(test_data)
net = NeuralNetwork()

n_inputs = tr.shape[1] - 2
net.add_input_layer(n_inputs)
net.add_hidden_layer(n_inputs, 4)
net.add_output_layer(4, 1)

#training 
test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,outFun2="accuracy",type="monk")
print(f"MSE(tr) = {training_error[-1]}, MSE(test) = {test_error[-1]}")
print(f"Accuracy(tr) = {acc_train[-1]}, Accuracy(test) = {acc_test[-1]}")

plot_loss_Monk(training_error,"mse",axs[0],test_losses=test_error)
plot_loss_Monk(acc_train,"acc",axs[1],test_losses=acc_test)


fig.tight_layout(pad=2.0)
plt.show()

## Monk 3 Regularized

#### Variance and mean of MSE and Accuracy over 10 training instances of the model:
#### **{eta = 0.001, mb = 1, alpha (momentum) = 0.9, lambda (L2) = 1 * 10^-5}**


In [None]:
%%time
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-3.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-3.test", sep =" ",  names = col_names)


epochs=300
params={
    "eta" : 0.001,
    "mb" : 1,
    "momentum" : 0.9,
    "epochs" : epochs,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":0.00001,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}
n=10
test_errors=np.zeros((n,epochs))
training_errors=np.zeros((n,epochs))
acc_test_errors=np.zeros((n,epochs))
acc_train_errors=np.zeros((n,epochs))
for i in range(n):
    tr = process_monk_data(tr_data)
    test = process_monk_data(test_data)
    net = NeuralNetwork()

    n_inputs = tr.shape[1] - 2

    net.add_input_layer(n_inputs)

    net.add_hidden_layer(n_inputs, 4)

    net.add_output_layer(4, 1)
    test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,type="monk",outFun2="accuracy")
    test_errors[i]=test_error
    training_errors[i]=training_error
    acc_test_errors[i]=acc_test
    acc_train_errors[i]=acc_train

print("Train")
print(f"MSE: mean = {np.mean(training_errors[:,-1])}, Var = {np.var(training_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_train_errors[:,-1])}, Var = {np.var(acc_train_errors[:,-1])}")
print("Test")
print(f"MSE: mean = {np.mean(test_errors[:,-1])}, Var = {np.var(test_errors[:,-1])}")
print(f"Accuracy: mean = {np.mean(acc_test_errors[:,-1])}, Var = {np.var(acc_test_errors[:,-1])}")

#### Hyperparameters = {eta = 0.001, mb = 1, alpha (momentum) = 0.9, lambda (L2) = 1 * 10^-5}

In [None]:
%%time
params={
    "eta" : 0.001,
    "mb" : 1,
    "momentum" : 0.9,
    "epochs" : 300,
    "clip_value" : None,
    "hid_act_fun" : "tanh",
    "out_act_fun" : "sigmoid",
    "cost_fun" : "mse",
    "ridge_lambda":0.00001,
    "lasso_lambda": None,
    "decay_max_steps": None,
    "decay_min_value": None
}

fig,axs = plt.subplots(1,2,figsize=(10,5))
col_names = ["Class", "a1", "a2", "a3", "a4", "a5", "a6", "ID"]

tr_data = pd.read_csv("../monk+s+problems/monks-3.train", sep =" ",  names = col_names)
test_data = pd.read_csv("../monk+s+problems/monks-3.test", sep =" ",  names = col_names)


#creazione rete
tr = process_monk_data(tr_data)
test = process_monk_data(test_data)
net = NeuralNetwork()

n_inputs = tr.shape[1] - 2
net.add_input_layer(n_inputs)
net.add_hidden_layer(n_inputs, 4)
net.add_output_layer(4, 1)

#training 
test_error,training_error,acc_test,acc_train=net.train(tr,params,test_data=test,outFun2="accuracy",type="monk")
print(f"MSE(tr) = {training_error[-1]}, MSE(test) = {test_error[-1]}")
print(f"Accuracy(tr) = {acc_train[-1]}, Accuracy(test) = {acc_test[-1]}")

plot_loss_Monk(training_error,"mse",axs[0],test_losses=test_error)
plot_loss_Monk(acc_train,"acc",axs[1],test_losses=acc_test)

fig.tight_layout(pad=2.0)
plt.show()