In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wandb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import seaborn as sns
import math

In [2]:
df = pd.read_csv('Data/HousingData.csv')
df = df.fillna(df.mean())
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.200000,4.0900,1,296,15.3,396.90,4.980000,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.900000,4.9671,2,242,17.8,396.90,9.140000,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.100000,4.9671,2,242,17.8,392.83,4.030000,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.800000,6.0622,3,222,18.7,394.63,2.940000,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.200000,6.0622,3,222,18.7,396.90,12.715432,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.100000,2.4786,1,273,21.0,391.99,12.715432,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.700000,2.2875,1,273,21.0,396.90,9.080000,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.000000,2.1675,1,273,21.0,396.90,5.640000,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.300000,2.3889,1,273,21.0,393.45,6.480000,22.0


In [3]:
x_train, x_rest, y_train, y_rest = train_test_split(df.iloc[:,:-1], df['MEDV'].values.reshape(-1,1), test_size=0.2, random_state=42)
x_test, x_val, y_test, y_val = train_test_split(x_rest, y_rest, test_size=0.5, random_state=42)
scalar = StandardScaler()
scalar.fit(x_train)

In [4]:
x_training = scalar.transform(x_train)
x_validation = scalar.transform(x_val)
x_testing = scalar.transform(x_test)

# # Replacing Nan with 0
# x_training[np.isnan(x_training)] = 1
# x_validation[np.isnan(x_validation)] = 1
# x_testing[np.isnan(x_testing)] = 1

In [5]:
def mse(y_true,y_pred):
    return np.square(y_pred - y_true).mean()

def tanh(y):
#     return (np.exp(y)-np.exp(-y)) / (np.exp(y)+np.exp(-y))
    return 2 / (1 + np.exp(-2 * y)) - 1

def relu(y):
    temp = np.where(y >= 0,y,0.0)
    return temp
#     return np.where(temp > 10,10.0,temp)

def sigmoid(y):
    return 1 / (1 + np.exp(-y))

class MLP:
    def __init__(self,input_size,output_size,num_layers,layer_sizes,activations,optimiser = 'batch',lr = 0.01):
        self.lr = lr
        self.input_size = input_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.layer_sizes = layer_sizes
        self.activations = activations
        self.optimiser = optimiser
#         Initialising weights and biases
        self.w_and_b = []
        prev_size = input_size
        for layer in range(num_layers):
            self.w_and_b.append(0.1*np.random.rand(layer_sizes[layer],prev_size+1))
            prev_size = layer_sizes[layer]
        self.w_and_b.append(np.random.rand(output_size,prev_size+1))
                            
        self.layer_inputs = []    
        self.layer_outputs = []
        self.out = np.zeros(output_size)
    
    def tanh_grad(self,z):
        return 1 - tanh(z)**2
    
    def relu_grad(self,z):
        return np.where(z > 0,1.0,0.0)
                            
    def sigmoid_grad(self,z):
        return sigmoid(z)*(1 - sigmoid(z))
    
    def mseGrad(self,y_true,y_pred):
        return (2/len(y_true))*(y_pred - y_true)                    
        
    def forward(self,x):
        inp = np.append(x,np.ones((x.shape[0],1)),axis=1)
        for layer in range(self.num_layers):    
            self.layer_inputs.append(inp) # (914,12)
            z = inp @ self.w_and_b[layer].T
            y = (self.activations[layer])(z)
            self.layer_outputs.append(z)
            inp = np.append(y,np.ones((y.shape[0],1)),axis=1)

        self.layer_inputs.append(inp) # (914,12)
        z = inp @ self.w_and_b[self.num_layers].T
        self.layer_outputs.append(z)
        self.out = z
        return z
    
    def backward(self,y_true):
        w_and_b_gradients = []
        grad_y_out = self.mseGrad(y_true,self.out) # (914,1)
        grad_w_and_b = grad_y_out.T @ self.layer_inputs[-1]
        w_and_b_gradients.append(grad_w_and_b)
        grad_y = grad_y_out @ self.w_and_b[-1][:,:-1]
        for layer in range(self.num_layers-1,-1,-1):
            grad_z = []
            if(self.activations[layer] == tanh):
                grad_z = grad_y*self.tanh_grad(self.layer_outputs[layer])
            elif(self.activations[layer] == relu):
                grad_z = grad_y*self.relu_grad(self.layer_outputs[layer])
            elif(self.activations[layer] == sigmoid):
                grad_z = grad_y*self.sigmoid_grad(self.layer_outputs[layer])
            
            grad_w_and_b = grad_z.T @ self.layer_inputs[layer] 
            w_and_b_gradients.append(grad_w_and_b)
            grad_y = grad_z@self.w_and_b[layer][:,:-1]
            
        for layer in range(self.num_layers,-1,-1):
            self.w_and_b[layer] -= self.lr*w_and_b_gradients[self.num_layers-layer]
#             print(f"{layer} => w and b : {self.w_and_b[layer]}")

    def training(self,x,y,epochs):
        num_samples = len(x)
        batch_size = 0
        prev_5_best = 0
        best_epoch = 0
        if(self.optimiser == 'batch'):
            batch_size = x.shape[0]
        elif(self.optimiser == 'sgd'):
            batch_size = 1
        else:
            batch_size = 64
        for epoch in range(epochs):
            permutation = np.random.permutation(len(x))
            x_shuffled = x[permutation]
            y_shuffled = y[permutation]
            total_batches = math.ceil(num_samples / batch_size)
            for i in range(0,total_batches,batch_size):
                out = self.forward(x_shuffled[i:batch_size+i])
#                 print(f"Epoch [{epoch}], batch [{i+1}] Training => Loss : {loss:.4f}")
                self.backward(y_shuffled[i:batch_size+i])
            
#             loss = mse(y_shuffled[i:batch_size+1],out)
#             if(loss < prev_5_best):
#                 prev_5_best = loss
#                 best_epoch = epoch
#             else:
#                 best_epoch +=1
#             if(best_epoch > 5):
#                 break
        

In [6]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkhushi1703[0m ([33msmai-khushi[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [7]:
input_size = x_training.shape[1]
output_size = 1
num_layers = 1

In [10]:
epochs = [4000,6000,8000]
layer_sizes = [[8],[25],[64]]
activations =  [[tanh],[sigmoid],[relu]]
activation_names = ['tanh','sigmoid','relu']
optimiser = 'batch'

table = []
lrs = [0.001,0.002,0.003]

#  batch
for idx,activation in enumerate(activations):
    wandb.init(project = "Multilayer Regression Perceptron")
    for lr in lrs:
        for epoch in epochs:
            for size in layer_sizes:
                regressor = MLP(input_size,output_size,num_layers,size,activation,'batch',lr)
                size = size[0]
        #       Training
                regressor.training(x_training,y_train,epoch)
        #       Training metrics
                out = regressor.forward(x_training)
                mse_train = mse(y_train,out)
                print("TRAINING")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Training => MSE : {mse_train:.4f}")

        #       Validation
                out = regressor.forward(x_validation)
                mse_val = mean_squared_error(y_val, out)
                rmse_val = mean_squared_error(y_val, out, squared=False)
                r2_val = r2_score(y_val, out)
                print("VALIDATION")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => MSE : {mse_val:.4f}")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => RMSE : {rmse_val:.4f}")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => R2 score : {r2_val:.4f}")

                wandb.log({
                    "learning_rate": lr,
                    "epochs": epoch,
                    "layer_size": size,
                    "Validation MSE": mse_val,
                    "Training MSE": mse_train,
                })
                entry = [lr,epoch,idx,size,mse_val,rmse_val,r2_val]
                table.append(entry)
    wandb.finish()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167471289324264, max=1.0…

TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [8] Training => MSE : 60.6164
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => MSE : 54.1406
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => RMSE : 7.3580
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => R2 score : 0.3473
TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [25] Training => MSE : 85.1451
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => MSE : 78.0648
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => RMSE : 8.8354
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => R2 score : 0.0589
TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [64] Training => MSE : 57.8083
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [64] Validation => MSE : 37.5556
Epoch [4000], lr [0.001], activation [tanh], size [64] Validation => RMSE : 6.1283
Epoch [4000], lr [0.001], act

TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [8] Training => MSE : 69.9975
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => MSE : 67.5209
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => RMSE : 8.2171
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => R2 score : 0.1860
TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [25] Training => MSE : 64.0548
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => MSE : 55.2665
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => RMSE : 7.4341
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => R2 score : 0.3338
TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [64] Training => MSE : 61.9943
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => MSE : 47.4273
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => RMSE : 6.8867
Epoch [8000], lr [0.003], act



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Training MSE,▃▆▂█▁▅▆▃▁▄▁▂▃▄▂▅▃▃▅▃▂▂▃▂▄▃▃
Validation MSE,▃▅▂█▂▆▆▃▁▅▃▂▃▄▃▆▄▄▅▃▃▃▄▄▅▄▃
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,61.99427
Validation MSE,47.42726
epochs,8000.0
layer_size,64.0
learning_rate,0.003


TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Training => MSE : 94.4824
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => MSE : 97.0451
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => RMSE : 9.8511
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => R2 score : -0.1699
TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Training => MSE : 91.2050
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => MSE : 90.6555
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => RMSE : 9.5213
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => R2 score : -0.0928
TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Training => MSE : 69.5707
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => MSE : 60.6679
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => RMSE : 7

TRAINING
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Training => MSE : 59.7371
VALIDATION
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => MSE : 46.5272
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => RMSE : 6.8211
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => R2 score : 0.4391
TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Training => MSE : 76.5960
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => MSE : 72.1920
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => RMSE : 8.4966
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => R2 score : 0.1297
TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Training => MSE : 73.2133
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => MSE : 47.4427
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => RMSE : 6.8



VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.071443…

0,1
Training MSE,▇▆▃▆▁▂▆▆▇▆▄▄▇▁▃▆▄▁▃▃█▁▆▂▄▄▂
Validation MSE,▇▆▃▆▁▂▆▅▇▆▄▃▆▁▃▆▄▂▂▂█▁▅▂▄▂▁
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,62.60246
Validation MSE,42.42052
epochs,8000.0
layer_size,64.0
learning_rate,0.003


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011168251855350617, max=1.0…

TRAINING
Epoch [4000], lr [0.001], activation [relu], size [8] Training => MSE : 56.4164
VALIDATION
Epoch [4000], lr [0.001], activation [relu], size [8] Validation => MSE : 48.0662
Epoch [4000], lr [0.001], activation [relu], size [8] Validation => RMSE : 6.9330
Epoch [4000], lr [0.001], activation [relu], size [8] Validation => R2 score : 0.4206
TRAINING
Epoch [4000], lr [0.001], activation [relu], size [25] Training => MSE : 94.1703
VALIDATION
Epoch [4000], lr [0.001], activation [relu], size [25] Validation => MSE : 69.5160
Epoch [4000], lr [0.001], activation [relu], size [25] Validation => RMSE : 8.3376
Epoch [4000], lr [0.001], activation [relu], size [25] Validation => R2 score : 0.1620
TRAINING
Epoch [4000], lr [0.001], activation [relu], size [64] Training => MSE : 48.1975
VALIDATION
Epoch [4000], lr [0.001], activation [relu], size [64] Validation => MSE : 24.3381
Epoch [4000], lr [0.001], activation [relu], size [64] Validation => RMSE : 4.9334
Epoch [4000], lr [0.001], act

TRAINING
Epoch [8000], lr [0.003], activation [relu], size [8] Training => MSE : 43.4886
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [8] Validation => MSE : 25.7092
Epoch [8000], lr [0.003], activation [relu], size [8] Validation => RMSE : 5.0704
Epoch [8000], lr [0.003], activation [relu], size [8] Validation => R2 score : 0.6901
TRAINING
Epoch [8000], lr [0.003], activation [relu], size [25] Training => MSE : 26.7311
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => MSE : 21.4425
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => RMSE : 4.6306
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => R2 score : 0.7415
TRAINING
Epoch [8000], lr [0.003], activation [relu], size [64] Training => MSE : 31.5134
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [64] Validation => MSE : 19.0435
Epoch [8000], lr [0.003], activation [relu], size [64] Validation => RMSE : 4.3639
Epoch [8000], lr [0.003], act



VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.073202…

0,1
Training MSE,▄█▃▃▃▃▃▂▄▂▇▂▃▁▁▄▂▂▄▃▂▃▇▂▃▁▁
Validation MSE,▅▇▂▂▂▃▃▂▃▂█▂▂▁▁▃▂▂▃▃▃▃▆▂▂▂▁
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,31.51342
Validation MSE,19.04349
epochs,8000.0
layer_size,64.0
learning_rate,0.003


In [11]:
table_batch = table

In [12]:
table = pd.DataFrame(table_batch,columns = ['LR','Epochs','Activation','layer size','MSE','RMSE','R2'])
table

Unnamed: 0,LR,Epochs,Activation,layer size,MSE,RMSE,R2
0,0.001,4000,0,8,54.140618,7.358031,0.347340
1,0.001,4000,0,25,78.064834,8.835431,0.058936
2,0.001,4000,0,64,37.555593,6.128262,0.547271
3,0.001,6000,0,8,108.313991,10.407401,-0.305714
4,0.001,6000,0,25,41.793350,6.464778,0.496185
...,...,...,...,...,...,...,...
76,0.003,6000,2,25,57.305965,7.570070,0.309182
77,0.003,6000,2,64,25.976757,5.096740,0.686853
78,0.003,8000,2,8,25.709159,5.070420,0.690079
79,0.003,8000,2,25,21.442516,4.630606,0.741513


In [13]:
print("Batch tanh Metrics")
df_batch_tanh = table[table['Activation'] == 0].drop('Activation',axis = 1)
df_batch_tanh = df_batch_tanh.sort_values(by='R2', ascending=False)
df_batch_tanh

Batch tanh Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
8,0.001,8000,64,24.06311,4.905416,0.709922
2,0.001,4000,64,37.555593,6.128262,0.547271
11,0.002,4000,64,40.318944,6.34972,0.513959
4,0.001,6000,25,41.79335,6.464778,0.496185
10,0.002,4000,25,43.657368,6.607372,0.473715
26,0.003,8000,64,47.427263,6.886745,0.428269
14,0.002,6000,64,50.285465,7.091224,0.393814
20,0.003,4000,64,50.692137,7.119841,0.388912
21,0.003,6000,8,52.4081,7.239344,0.368226
12,0.002,6000,8,52.51541,7.246752,0.366932


In [14]:
print("Batch sigmoid Metrics")
df_batch_sigmoid = table[table['Activation'] == 1].drop('Activation',axis = 1)
df_batch_sigmoid = df_batch_sigmoid.sort_values(by='R2', ascending=False)
df_batch_sigmoid

Batch sigmoid Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
31,0.001,6000,25,39.822357,6.310496,0.519946
40,0.002,6000,25,40.669739,6.377283,0.50973
53,0.003,8000,64,42.420519,6.513104,0.488625
48,0.003,6000,8,43.825565,6.620088,0.471687
50,0.003,6000,64,46.527223,6.821087,0.439119
52,0.003,8000,25,47.442704,6.887866,0.428083
45,0.003,4000,8,48.008758,6.928835,0.421259
32,0.001,6000,64,48.639124,6.974176,0.41366
46,0.003,4000,25,52.348756,7.235244,0.368941
44,0.002,8000,64,52.3652,7.23638,0.368743


In [15]:
print("Batch ReLU Metrics")
df_batch_relu = table[table['Activation'] == 2].drop('Activation',axis = 1)
df_batch_relu = df_batch_relu.sort_values(by='R2', ascending=False)
df_batch_relu

Batch ReLU Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
67,0.002,6000,25,14.581198,3.818534,0.824225
68,0.002,6000,64,17.732855,4.21104,0.786232
80,0.003,8000,64,19.043485,4.363884,0.770433
63,0.002,4000,8,19.74477,4.443509,0.761979
71,0.002,8000,64,20.711379,4.550976,0.750326
79,0.003,8000,25,21.442516,4.630606,0.741513
70,0.002,8000,25,23.32011,4.82909,0.718878
56,0.001,4000,64,24.338104,4.933366,0.706607
65,0.002,4000,64,25.285458,5.028465,0.695186
78,0.003,8000,8,25.709159,5.07042,0.690079


In [16]:
epochs = [4000,6000,8000]
layer_sizes = [[8],[25],[64]]
activations =  [[tanh],[sigmoid],[relu]]
activation_names = ['tanh','sigmoid','relu']

table = []
lrs = [0.001,0.002,0.003]

#  minibatch
for idx,activation in enumerate(activations):
    wandb.init(project = "Multilayer Regression Perceptron")
    for lr in lrs:
        for epoch in epochs:
            for size in layer_sizes:
                regressor = MLP(input_size,output_size,num_layers,size,activation,'minibatch',lr)
                size = size[0]
        #       Training
                regressor.training(x_training,y_train,epoch)
        #       Training metrics
                out = regressor.forward(x_training)
                mse_train = mse(y_train,out)
                print("TRAINING")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Training => MSE : {mse_train:.4f}")

        #       Validation
                out = regressor.forward(x_validation)
                mse_val = mean_squared_error(y_val, out)
                rmse_val = mean_squared_error(y_val, out, squared=False)
                r2_val = r2_score(y_val, out)
                print("VALIDATION")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => MSE : {mse_val:.4f}")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => RMSE : {rmse_val:.4f}")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => R2 score : {r2_val:.4f}")

                wandb.log({
                    "learning_rate": lr,
                    "epochs": epoch,
                    "layer_size": size,
                    "Validation MSE": mse_val,
                    "Training MSE": mse_train,
                })
                entry = [lr,epoch,idx,size,mse_val,rmse_val,r2_val]
                table.append(entry)
    wandb.finish()

TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [8] Training => MSE : 108.2059
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => MSE : 114.6147
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => RMSE : 10.7058
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => R2 score : -0.3817
TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [25] Training => MSE : 76.6624
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => MSE : 76.1439
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => RMSE : 8.7260
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => R2 score : 0.0821
TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [64] Training => MSE : 53.7286
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [64] Validation => MSE : 48.8311
Epoch [4000], lr [0.001], activation [tanh], size [64] Validation => RMSE : 6.9879
Epoch [4000], lr [0.001],

TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [8] Training => MSE : 84.8342
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => MSE : 85.7884
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => RMSE : 9.2622
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => R2 score : -0.0342
TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [25] Training => MSE : 54.5475
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => MSE : 51.4656
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => RMSE : 7.1740
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => R2 score : 0.3796
TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [64] Training => MSE : 49.6539
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => MSE : 45.0205
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => RMSE : 6.7097
Epoch [8000], lr [0.003], ac



0,1
Training MSE,█▅▂▅▄▂▄▄▁▇▄▄▃▄▃▆▄▃▄▃▃▁▂▁▆▂▂
Validation MSE,█▅▃▄▄▁▃▄▂▅▄▃▂▄▃▅▄▃▄▃▂▁▁▂▆▃▂
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,49.65393
Validation MSE,45.02049
epochs,8000.0
layer_size,64.0
learning_rate,0.003


TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Training => MSE : 100.9590
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => MSE : 100.8371
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => RMSE : 10.0418
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => R2 score : -0.2156
TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Training => MSE : 88.3322
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => MSE : 75.7635
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => RMSE : 8.7042
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => R2 score : 0.0867
TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Training => MSE : 109.6399
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => MSE : 106.2258
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => RMSE

TRAINING
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Training => MSE : 84.5844
VALIDATION
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => MSE : 85.3882
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => RMSE : 9.2406
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => R2 score : -0.0293
TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Training => MSE : 58.4641
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => MSE : 71.1373
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => RMSE : 8.4343
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => R2 score : 0.1424
TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Training => MSE : 55.0061
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => MSE : 46.8388
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => RMSE : 6.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Training MSE,▇▆█▆▂▂▆▃▅▇▆▁▅▂▃▄▂▃▆▂▂▄▂▅▃▂▁
Validation MSE,▇▅█▆▂▂▆▂▆█▆▁▅▂▂▄▂▁▆▃▁▃▂▆▅▂▁
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,40.2671
Validation MSE,32.6496
epochs,8000.0
layer_size,64.0
learning_rate,0.003


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01116806017752323, max=1.0)…

TRAINING
Epoch [4000], lr [0.001], activation [relu], size [8] Training => MSE : 56.6523
VALIDATION
Epoch [4000], lr [0.001], activation [relu], size [8] Validation => MSE : 44.2571
Epoch [4000], lr [0.001], activation [relu], size [8] Validation => RMSE : 6.6526
Epoch [4000], lr [0.001], activation [relu], size [8] Validation => R2 score : 0.4665
TRAINING
Epoch [4000], lr [0.001], activation [relu], size [25] Training => MSE : 54.3862
VALIDATION
Epoch [4000], lr [0.001], activation [relu], size [25] Validation => MSE : 50.7540
Epoch [4000], lr [0.001], activation [relu], size [25] Validation => RMSE : 7.1242
Epoch [4000], lr [0.001], activation [relu], size [25] Validation => R2 score : 0.3882
TRAINING
Epoch [4000], lr [0.001], activation [relu], size [64] Training => MSE : 95.8152
VALIDATION
Epoch [4000], lr [0.001], activation [relu], size [64] Validation => MSE : 91.5873
Epoch [4000], lr [0.001], activation [relu], size [64] Validation => RMSE : 9.5701
Epoch [4000], lr [0.001], act

TRAINING
Epoch [8000], lr [0.003], activation [relu], size [8] Training => MSE : 37.9988
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [8] Validation => MSE : 31.1140
Epoch [8000], lr [0.003], activation [relu], size [8] Validation => RMSE : 5.5780
Epoch [8000], lr [0.003], activation [relu], size [8] Validation => R2 score : 0.6249
TRAINING
Epoch [8000], lr [0.003], activation [relu], size [25] Training => MSE : 44.3221
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => MSE : 33.7956
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => RMSE : 5.8134
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => R2 score : 0.5926
TRAINING
Epoch [8000], lr [0.003], activation [relu], size [64] Training => MSE : 28.5089
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [64] Validation => MSE : 20.4732
Epoch [8000], lr [0.003], activation [relu], size [64] Validation => RMSE : 4.5247
Epoch [8000], lr [0.003], act



0,1
Training MSE,▄▄█▄▆▁▁▃▅▃▄▃▅▂▂▄▃▃▃▂▃▄▁▅▂▃▁
Validation MSE,▄▄█▃▆▂▁▂▃▃▄▂▅▂▂▄▂▂▃▂▃▃▁▄▃▃▂
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,28.50888
Validation MSE,20.47324
epochs,8000.0
layer_size,64.0
learning_rate,0.003


In [17]:
table_minibatch = table

In [18]:
table = pd.DataFrame(table_minibatch,columns = ['LR','Epochs','Activation','layer size','MSE','RMSE','R2'])
table

Unnamed: 0,LR,Epochs,Activation,layer size,MSE,RMSE,R2
0,0.001,4000,0,8,114.614665,10.705824,-0.381668
1,0.001,4000,0,25,76.143859,8.726045,0.082094
2,0.001,4000,0,64,48.831065,6.987923,0.411347
3,0.001,6000,0,8,62.682193,7.917209,0.244373
4,0.001,6000,0,25,63.692995,7.980789,0.232188
...,...,...,...,...,...,...,...
76,0.003,6000,2,25,15.882950,3.985342,0.808533
77,0.003,6000,2,64,46.445119,6.815066,0.440109
78,0.003,8000,2,8,31.114032,5.577995,0.624924
79,0.003,8000,2,25,33.795624,5.813400,0.592597


In [20]:
print("miniBatch ReLU Metrics")
df_minibatch_relu = table[table['Activation'] == 2].drop('Activation',axis = 1)
df_minibatch_relu = df_minibatch_relu.sort_values(by='R2', ascending=False)
df_minibatch_relu

miniBatch ReLU Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
60,0.001,8000,8,14.538418,3.812928,0.824741
76,0.003,6000,25,15.88295,3.985342,0.808533
80,0.003,8000,64,20.473239,4.524736,0.753197
73,0.003,4000,25,21.621275,4.649868,0.739358
68,0.002,6000,64,23.786021,4.877091,0.713262
65,0.002,4000,64,25.260437,5.025976,0.695488
59,0.001,6000,64,25.322234,5.03212,0.694743
70,0.002,8000,25,25.504503,5.050198,0.692546
61,0.001,8000,25,28.088019,5.299813,0.661402
71,0.002,8000,64,29.766591,5.455877,0.641167


In [21]:
print("MiniBatch tanh Metrics")
df_minibatch_tanh = table[table['Activation'] == 0].drop('Activation',axis = 1)
df_minibatch_tanh = df_minibatch_tanh.sort_values(by='R2', ascending=False)
df_minibatch_tanh

MiniBatch tanh Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
5,0.001,6000,64,30.75116,5.545373,0.629298
21,0.003,6000,8,30.936983,5.562102,0.627058
22,0.003,6000,25,34.782239,5.897647,0.580704
23,0.003,6000,64,37.997556,6.164216,0.541943
8,0.001,8000,64,38.666604,6.218248,0.533878
12,0.002,6000,8,39.791286,6.308033,0.52032
20,0.003,4000,64,43.055877,6.561698,0.480966
26,0.003,8000,64,45.020491,6.709731,0.457283
2,0.001,4000,64,48.831065,6.987923,0.411347
17,0.002,8000,64,49.125667,7.00897,0.407795


In [22]:
print("MiniBatch Sigmoid Metrics")
df_minibatch_sigmoid = table[table['Activation'] == 1].drop('Activation',axis = 1)
df_minibatch_sigmoid = df_minibatch_sigmoid.sort_values(by='R2', ascending=False)
df_minibatch_sigmoid

MiniBatch Sigmoid Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
53,0.003,8000,64,32.649604,5.713983,0.606412
38,0.002,4000,64,33.358381,5.775671,0.597868
47,0.003,4000,64,34.056296,5.835777,0.589455
44,0.002,8000,64,35.749799,5.979114,0.56904
31,0.001,6000,25,38.351227,6.192837,0.53768
41,0.002,6000,64,41.488192,6.441133,0.499864
43,0.002,8000,25,41.947328,6.476676,0.494329
32,0.001,6000,64,42.559938,6.523798,0.486944
49,0.003,6000,25,42.832264,6.544636,0.483661
40,0.002,6000,25,45.249849,6.726801,0.454518


In [23]:
epochs = [4000,6000,8000]
layer_sizes = [[8],[25],[64]]
activations =  [[tanh],[sigmoid],[relu]]
activation_names = ['tanh','sigmoid','relu']

table = []
lrs = [0.001,0.002,0.003]

#  sgd
for idx,activation in enumerate(activations):
    wandb.init(project = "Multilayer Regression Perceptron")
    for lr in lrs:
        for epoch in epochs:
            for size in layer_sizes:
                regressor = MLP(input_size,output_size,num_layers,size,activation,'sgd',lr)
                size = size[0]
        #       Training
                regressor.training(x_training,y_train,epoch)
        #       Training metrics
                out = regressor.forward(x_training)
                mse_train = mse(y_train,out)
                print("TRAINING")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Training => MSE : {mse_train:.4f}")

        #       Validation
                out = regressor.forward(x_validation)
                mse_val = mean_squared_error(y_val, out)
                rmse_val = mean_squared_error(y_val, out, squared=False)
                r2_val = r2_score(y_val, out)
                print("VALIDATION")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => MSE : {mse_val:.4f}")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => RMSE : {rmse_val:.4f}")
                print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => R2 score : {r2_val:.4f}")

                wandb.log({
                    "learning_rate": lr,
                    "epochs": epoch,
                    "layer_size": size,
                    "Validation MSE": mse_val,
                    "Training MSE": mse_train,
                })
                entry = [lr,epoch,idx,size,mse_val,rmse_val,r2_val]
                table.append(entry)
    wandb.finish()

TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [8] Training => MSE : 72.7614
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => MSE : 49.3259
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => RMSE : 7.0232
Epoch [4000], lr [0.001], activation [tanh], size [8] Validation => R2 score : 0.4054
TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [25] Training => MSE : 91.5097
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => MSE : 103.1017
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => RMSE : 10.1539
Epoch [4000], lr [0.001], activation [tanh], size [25] Validation => R2 score : -0.2429
TRAINING
Epoch [4000], lr [0.001], activation [tanh], size [64] Training => MSE : 82.7127
VALIDATION
Epoch [4000], lr [0.001], activation [tanh], size [64] Validation => MSE : 85.3875
Epoch [4000], lr [0.001], activation [tanh], size [64] Validation => RMSE : 9.2405
Epoch [4000], lr [0.001], 

  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.001], activation [tanh], size [8] Training => MSE : 54.4297
VALIDATION
Epoch [6000], lr [0.001], activation [tanh], size [8] Validation => MSE : 50.3490
Epoch [6000], lr [0.001], activation [tanh], size [8] Validation => RMSE : 7.0957
Epoch [6000], lr [0.001], activation [tanh], size [8] Validation => R2 score : 0.3930


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.001], activation [tanh], size [25] Training => MSE : 64.8484
VALIDATION
Epoch [6000], lr [0.001], activation [tanh], size [25] Validation => MSE : 59.5903
Epoch [6000], lr [0.001], activation [tanh], size [25] Validation => RMSE : 7.7195
Epoch [6000], lr [0.001], activation [tanh], size [25] Validation => R2 score : 0.2816


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.001], activation [tanh], size [64] Training => MSE : 70.3014
VALIDATION
Epoch [6000], lr [0.001], activation [tanh], size [64] Validation => MSE : 65.1209
Epoch [6000], lr [0.001], activation [tanh], size [64] Validation => RMSE : 8.0698
Epoch [6000], lr [0.001], activation [tanh], size [64] Validation => R2 score : 0.2150
TRAINING
Epoch [8000], lr [0.001], activation [tanh], size [8] Training => MSE : 85.7932
VALIDATION
Epoch [8000], lr [0.001], activation [tanh], size [8] Validation => MSE : 83.6950
Epoch [8000], lr [0.001], activation [tanh], size [8] Validation => RMSE : 9.1485
Epoch [8000], lr [0.001], activation [tanh], size [8] Validation => R2 score : -0.0089
TRAINING
Epoch [8000], lr [0.001], activation [tanh], size [25] Training => MSE : 66.1992
VALIDATION
Epoch [8000], lr [0.001], activation [tanh], size [25] Validation => MSE : 59.9467
Epoch [8000], lr [0.001], activation [tanh], size [25] Validation => RMSE : 7.7425
Epoch [8000], lr [0.001], ac

  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [4000], lr [0.002], activation [tanh], size [8] Training => MSE : 77.8413
VALIDATION
Epoch [4000], lr [0.002], activation [tanh], size [8] Validation => MSE : 71.0085
Epoch [4000], lr [0.002], activation [tanh], size [8] Validation => RMSE : 8.4267
Epoch [4000], lr [0.002], activation [tanh], size [8] Validation => R2 score : 0.1440


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [4000], lr [0.002], activation [tanh], size [25] Training => MSE : 69.5810
VALIDATION
Epoch [4000], lr [0.002], activation [tanh], size [25] Validation => MSE : 52.9426
Epoch [4000], lr [0.002], activation [tanh], size [25] Validation => RMSE : 7.2762
Epoch [4000], lr [0.002], activation [tanh], size [25] Validation => R2 score : 0.3618


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [4000], lr [0.002], activation [tanh], size [64] Training => MSE : 70.3536
VALIDATION
Epoch [4000], lr [0.002], activation [tanh], size [64] Validation => MSE : 57.4037
Epoch [4000], lr [0.002], activation [tanh], size [64] Validation => RMSE : 7.5765
Epoch [4000], lr [0.002], activation [tanh], size [64] Validation => R2 score : 0.3080


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.002], activation [tanh], size [8] Training => MSE : 63.0050
VALIDATION
Epoch [6000], lr [0.002], activation [tanh], size [8] Validation => MSE : 55.8488
Epoch [6000], lr [0.002], activation [tanh], size [8] Validation => RMSE : 7.4732
Epoch [6000], lr [0.002], activation [tanh], size [8] Validation => R2 score : 0.3267


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.002], activation [tanh], size [25] Training => MSE : 66.2525
VALIDATION
Epoch [6000], lr [0.002], activation [tanh], size [25] Validation => MSE : 56.7593
Epoch [6000], lr [0.002], activation [tanh], size [25] Validation => RMSE : 7.5339
Epoch [6000], lr [0.002], activation [tanh], size [25] Validation => R2 score : 0.3158


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.002], activation [tanh], size [64] Training => MSE : 58.1557
VALIDATION
Epoch [6000], lr [0.002], activation [tanh], size [64] Validation => MSE : 53.9230
Epoch [6000], lr [0.002], activation [tanh], size [64] Validation => RMSE : 7.3432
Epoch [6000], lr [0.002], activation [tanh], size [64] Validation => R2 score : 0.3500


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [8000], lr [0.002], activation [tanh], size [8] Training => MSE : 79.5713
VALIDATION
Epoch [8000], lr [0.002], activation [tanh], size [8] Validation => MSE : 70.2873
Epoch [8000], lr [0.002], activation [tanh], size [8] Validation => RMSE : 8.3837
Epoch [8000], lr [0.002], activation [tanh], size [8] Validation => R2 score : 0.1527
TRAINING
Epoch [8000], lr [0.002], activation [tanh], size [25] Training => MSE : 81.8924
VALIDATION
Epoch [8000], lr [0.002], activation [tanh], size [25] Validation => MSE : 78.0110
Epoch [8000], lr [0.002], activation [tanh], size [25] Validation => RMSE : 8.8324
Epoch [8000], lr [0.002], activation [tanh], size [25] Validation => R2 score : 0.0596


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [8000], lr [0.002], activation [tanh], size [64] Training => MSE : 78.9576
VALIDATION
Epoch [8000], lr [0.002], activation [tanh], size [64] Validation => MSE : 74.6062
Epoch [8000], lr [0.002], activation [tanh], size [64] Validation => RMSE : 8.6375
Epoch [8000], lr [0.002], activation [tanh], size [64] Validation => R2 score : 0.1006


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [4000], lr [0.003], activation [tanh], size [8] Training => MSE : 61.2904
VALIDATION
Epoch [4000], lr [0.003], activation [tanh], size [8] Validation => MSE : 57.9802
Epoch [4000], lr [0.003], activation [tanh], size [8] Validation => RMSE : 7.6145
Epoch [4000], lr [0.003], activation [tanh], size [8] Validation => R2 score : 0.3011


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [4000], lr [0.003], activation [tanh], size [25] Training => MSE : 72.2839
VALIDATION
Epoch [4000], lr [0.003], activation [tanh], size [25] Validation => MSE : 65.0247
Epoch [4000], lr [0.003], activation [tanh], size [25] Validation => RMSE : 8.0638
Epoch [4000], lr [0.003], activation [tanh], size [25] Validation => R2 score : 0.2161


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [4000], lr [0.003], activation [tanh], size [64] Training => MSE : 111.2568
VALIDATION
Epoch [4000], lr [0.003], activation [tanh], size [64] Validation => MSE : 91.8784
Epoch [4000], lr [0.003], activation [tanh], size [64] Validation => RMSE : 9.5853
Epoch [4000], lr [0.003], activation [tanh], size [64] Validation => R2 score : -0.1076


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.003], activation [tanh], size [8] Training => MSE : 77.9380
VALIDATION
Epoch [6000], lr [0.003], activation [tanh], size [8] Validation => MSE : 65.5542
Epoch [6000], lr [0.003], activation [tanh], size [8] Validation => RMSE : 8.0966
Epoch [6000], lr [0.003], activation [tanh], size [8] Validation => R2 score : 0.2098


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.003], activation [tanh], size [25] Training => MSE : 68.6231
VALIDATION
Epoch [6000], lr [0.003], activation [tanh], size [25] Validation => MSE : 57.5881
Epoch [6000], lr [0.003], activation [tanh], size [25] Validation => RMSE : 7.5887
Epoch [6000], lr [0.003], activation [tanh], size [25] Validation => R2 score : 0.3058


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [6000], lr [0.003], activation [tanh], size [64] Training => MSE : 68.1864
VALIDATION
Epoch [6000], lr [0.003], activation [tanh], size [64] Validation => MSE : 62.8273
Epoch [6000], lr [0.003], activation [tanh], size [64] Validation => RMSE : 7.9264
Epoch [6000], lr [0.003], activation [tanh], size [64] Validation => R2 score : 0.2426


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [8] Training => MSE : 67.1780
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => MSE : 55.6723
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => RMSE : 7.4614
Epoch [8000], lr [0.003], activation [tanh], size [8] Validation => R2 score : 0.3289


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [25] Training => MSE : 64.5443
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => MSE : 55.1383
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => RMSE : 7.4255
Epoch [8000], lr [0.003], activation [tanh], size [25] Validation => R2 score : 0.3353


  return 2 / (1 + np.exp(-2 * y)) - 1


TRAINING
Epoch [8000], lr [0.003], activation [tanh], size [64] Training => MSE : 53.5870
VALIDATION
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => MSE : 48.7607
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => RMSE : 6.9829
Epoch [8000], lr [0.003], activation [tanh], size [64] Validation => R2 score : 0.4122




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Training MSE,▃▆▅▁▂▃▅▃▁▄▃▃▂▃▂▄▄▄▂▃█▄▃▃▃▂▁
Validation MSE,▁█▆▁▂▃▆▂▂▄▂▂▂▂▂▄▅▄▂▃▇▃▂▃▂▂▁
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,53.58696
Validation MSE,48.76073
epochs,8000.0
layer_size,64.0
learning_rate,0.003


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Training => MSE : 74.4689
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => MSE : 63.7888
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => RMSE : 7.9868
Epoch [4000], lr [0.001], activation [sigmoid], size [8] Validation => R2 score : 0.2310


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Training => MSE : 58.7965
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => MSE : 53.4427
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => RMSE : 7.3104
Epoch [4000], lr [0.001], activation [sigmoid], size [25] Validation => R2 score : 0.3558


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Training => MSE : 64.8285
VALIDATION
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => MSE : 56.7670
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => RMSE : 7.5344
Epoch [4000], lr [0.001], activation [sigmoid], size [64] Validation => R2 score : 0.3157


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.001], activation [sigmoid], size [8] Training => MSE : 50.9096
VALIDATION
Epoch [6000], lr [0.001], activation [sigmoid], size [8] Validation => MSE : 44.3352
Epoch [6000], lr [0.001], activation [sigmoid], size [8] Validation => RMSE : 6.6585
Epoch [6000], lr [0.001], activation [sigmoid], size [8] Validation => R2 score : 0.4655


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.001], activation [sigmoid], size [25] Training => MSE : 62.8144
VALIDATION
Epoch [6000], lr [0.001], activation [sigmoid], size [25] Validation => MSE : 55.4612
Epoch [6000], lr [0.001], activation [sigmoid], size [25] Validation => RMSE : 7.4472
Epoch [6000], lr [0.001], activation [sigmoid], size [25] Validation => R2 score : 0.3314


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.001], activation [sigmoid], size [64] Training => MSE : 92.0632
VALIDATION
Epoch [6000], lr [0.001], activation [sigmoid], size [64] Validation => MSE : 82.2557
Epoch [6000], lr [0.001], activation [sigmoid], size [64] Validation => RMSE : 9.0695
Epoch [6000], lr [0.001], activation [sigmoid], size [64] Validation => R2 score : 0.0084


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.001], activation [sigmoid], size [8] Training => MSE : 58.1505
VALIDATION
Epoch [8000], lr [0.001], activation [sigmoid], size [8] Validation => MSE : 49.7422
Epoch [8000], lr [0.001], activation [sigmoid], size [8] Validation => RMSE : 7.0528
Epoch [8000], lr [0.001], activation [sigmoid], size [8] Validation => R2 score : 0.4004


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.001], activation [sigmoid], size [25] Training => MSE : 57.9634
VALIDATION
Epoch [8000], lr [0.001], activation [sigmoid], size [25] Validation => MSE : 54.6234
Epoch [8000], lr [0.001], activation [sigmoid], size [25] Validation => RMSE : 7.3908
Epoch [8000], lr [0.001], activation [sigmoid], size [25] Validation => R2 score : 0.3415


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.001], activation [sigmoid], size [64] Training => MSE : 68.6838
VALIDATION
Epoch [8000], lr [0.001], activation [sigmoid], size [64] Validation => MSE : 58.6680
Epoch [8000], lr [0.001], activation [sigmoid], size [64] Validation => RMSE : 7.6595
Epoch [8000], lr [0.001], activation [sigmoid], size [64] Validation => R2 score : 0.2928


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.002], activation [sigmoid], size [8] Training => MSE : 73.6526
VALIDATION
Epoch [4000], lr [0.002], activation [sigmoid], size [8] Validation => MSE : 62.3244
Epoch [4000], lr [0.002], activation [sigmoid], size [8] Validation => RMSE : 7.8946
Epoch [4000], lr [0.002], activation [sigmoid], size [8] Validation => R2 score : 0.2487


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.002], activation [sigmoid], size [25] Training => MSE : 65.7596
VALIDATION
Epoch [4000], lr [0.002], activation [sigmoid], size [25] Validation => MSE : 56.8799
Epoch [4000], lr [0.002], activation [sigmoid], size [25] Validation => RMSE : 7.5419
Epoch [4000], lr [0.002], activation [sigmoid], size [25] Validation => R2 score : 0.3143


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.002], activation [sigmoid], size [64] Training => MSE : 65.6154
VALIDATION
Epoch [4000], lr [0.002], activation [sigmoid], size [64] Validation => MSE : 59.4375
Epoch [4000], lr [0.002], activation [sigmoid], size [64] Validation => RMSE : 7.7096
Epoch [4000], lr [0.002], activation [sigmoid], size [64] Validation => R2 score : 0.2835


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.002], activation [sigmoid], size [8] Training => MSE : 75.8664
VALIDATION
Epoch [6000], lr [0.002], activation [sigmoid], size [8] Validation => MSE : 59.8034
Epoch [6000], lr [0.002], activation [sigmoid], size [8] Validation => RMSE : 7.7333
Epoch [6000], lr [0.002], activation [sigmoid], size [8] Validation => R2 score : 0.2791


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.002], activation [sigmoid], size [25] Training => MSE : 69.3238
VALIDATION
Epoch [6000], lr [0.002], activation [sigmoid], size [25] Validation => MSE : 60.1565
Epoch [6000], lr [0.002], activation [sigmoid], size [25] Validation => RMSE : 7.7561
Epoch [6000], lr [0.002], activation [sigmoid], size [25] Validation => R2 score : 0.2748


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.002], activation [sigmoid], size [64] Training => MSE : 66.6132
VALIDATION
Epoch [6000], lr [0.002], activation [sigmoid], size [64] Validation => MSE : 63.5916
Epoch [6000], lr [0.002], activation [sigmoid], size [64] Validation => RMSE : 7.9744
Epoch [6000], lr [0.002], activation [sigmoid], size [64] Validation => R2 score : 0.2334


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.002], activation [sigmoid], size [8] Training => MSE : 69.2973
VALIDATION
Epoch [8000], lr [0.002], activation [sigmoid], size [8] Validation => MSE : 63.5109
Epoch [8000], lr [0.002], activation [sigmoid], size [8] Validation => RMSE : 7.9694
Epoch [8000], lr [0.002], activation [sigmoid], size [8] Validation => R2 score : 0.2344


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.002], activation [sigmoid], size [25] Training => MSE : 66.6844
VALIDATION
Epoch [8000], lr [0.002], activation [sigmoid], size [25] Validation => MSE : 63.5509
Epoch [8000], lr [0.002], activation [sigmoid], size [25] Validation => RMSE : 7.9719
Epoch [8000], lr [0.002], activation [sigmoid], size [25] Validation => R2 score : 0.2339


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.002], activation [sigmoid], size [64] Training => MSE : 61.1348
VALIDATION
Epoch [8000], lr [0.002], activation [sigmoid], size [64] Validation => MSE : 53.7911
Epoch [8000], lr [0.002], activation [sigmoid], size [64] Validation => RMSE : 7.3342
Epoch [8000], lr [0.002], activation [sigmoid], size [64] Validation => R2 score : 0.3516


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.003], activation [sigmoid], size [8] Training => MSE : 71.2432
VALIDATION
Epoch [4000], lr [0.003], activation [sigmoid], size [8] Validation => MSE : 58.4000
Epoch [4000], lr [0.003], activation [sigmoid], size [8] Validation => RMSE : 7.6420
Epoch [4000], lr [0.003], activation [sigmoid], size [8] Validation => R2 score : 0.2960


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.003], activation [sigmoid], size [25] Training => MSE : 72.4950
VALIDATION
Epoch [4000], lr [0.003], activation [sigmoid], size [25] Validation => MSE : 60.8795
Epoch [4000], lr [0.003], activation [sigmoid], size [25] Validation => RMSE : 7.8025
Epoch [4000], lr [0.003], activation [sigmoid], size [25] Validation => R2 score : 0.2661


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [4000], lr [0.003], activation [sigmoid], size [64] Training => MSE : 112.4938
VALIDATION
Epoch [4000], lr [0.003], activation [sigmoid], size [64] Validation => MSE : 130.4418
Epoch [4000], lr [0.003], activation [sigmoid], size [64] Validation => RMSE : 11.4211
Epoch [4000], lr [0.003], activation [sigmoid], size [64] Validation => R2 score : -0.5725


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.003], activation [sigmoid], size [8] Training => MSE : 76.1540
VALIDATION
Epoch [6000], lr [0.003], activation [sigmoid], size [8] Validation => MSE : 61.9012
Epoch [6000], lr [0.003], activation [sigmoid], size [8] Validation => RMSE : 7.8677
Epoch [6000], lr [0.003], activation [sigmoid], size [8] Validation => R2 score : 0.2538


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.003], activation [sigmoid], size [25] Training => MSE : 60.6841
VALIDATION
Epoch [6000], lr [0.003], activation [sigmoid], size [25] Validation => MSE : 54.9392
Epoch [6000], lr [0.003], activation [sigmoid], size [25] Validation => RMSE : 7.4121
Epoch [6000], lr [0.003], activation [sigmoid], size [25] Validation => R2 score : 0.3377


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Training => MSE : 74.7648
VALIDATION
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => MSE : 55.8322
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => RMSE : 7.4721
Epoch [6000], lr [0.003], activation [sigmoid], size [64] Validation => R2 score : 0.3269


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Training => MSE : 69.3430
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => MSE : 63.2328
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => RMSE : 7.9519
Epoch [8000], lr [0.003], activation [sigmoid], size [8] Validation => R2 score : 0.2377


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Training => MSE : 67.8015
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => MSE : 56.0400
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => RMSE : 7.4860
Epoch [8000], lr [0.003], activation [sigmoid], size [25] Validation => R2 score : 0.3244


  return 1 / (1 + np.exp(-y))


TRAINING
Epoch [8000], lr [0.003], activation [sigmoid], size [64] Training => MSE : 86.1753
VALIDATION
Epoch [8000], lr [0.003], activation [sigmoid], size [64] Validation => MSE : 82.1335
Epoch [8000], lr [0.003], activation [sigmoid], size [64] Validation => RMSE : 9.0628
Epoch [8000], lr [0.003], activation [sigmoid], size [64] Validation => R2 score : 0.0099




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Training MSE,▄▂▃▁▂▆▂▂▃▄▃▃▄▃▃▃▃▂▃▃█▄▂▄▃▃▅
Validation MSE,▃▂▂▁▂▄▁▂▂▂▂▂▂▂▃▃▃▂▂▂█▂▂▂▃▂▄
epochs,▁▁▁▅▅▅███▁▁▁▅▅▅███▁▁▁▅▅▅███
layer_size,▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█▁▃█
learning_rate,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅█████████

0,1
Training MSE,86.17529
Validation MSE,82.13354
epochs,8000.0
layer_size,64.0
learning_rate,0.003


  z = inp @ self.w_and_b[self.num_layers].T
  grad_w_and_b = grad_y_out.T @ self.layer_inputs[-1]
  grad_z = grad_y*self.relu_grad(self.layer_outputs[layer])
  z = inp @ self.w_and_b[layer].T
  z = inp @ self.w_and_b[self.num_layers].T


TRAINING
Epoch [4000], lr [0.001], activation [relu], size [8] Training => MSE : nan


ValueError: Input contains NaN.

In [24]:
table_sgd = table

In [25]:
table = pd.DataFrame(table_sgd,columns = ['LR','Epochs','Activation','layer size','MSE','RMSE','R2'])
table

Unnamed: 0,LR,Epochs,Activation,layer size,MSE,RMSE,R2
0,0.001,4000,0,8,49.325946,7.023243,0.405381
1,0.001,4000,0,25,103.101655,10.153899,-0.24288
2,0.001,4000,0,64,85.387466,9.240534,-0.029337
3,0.001,6000,0,8,50.349019,7.095704,0.393048
4,0.001,6000,0,25,59.590267,7.719473,0.281645
5,0.001,6000,0,64,65.120898,8.069752,0.214974
6,0.001,8000,0,8,83.694952,9.148495,-0.008934
7,0.001,8000,0,25,59.946684,7.742524,0.277349
8,0.001,8000,0,64,52.737748,7.262076,0.364252
9,0.002,4000,0,8,71.008493,8.426654,0.144


In [26]:
print("sgd Sigmoid Metrics")
df_sgd_sigmoid = table[table['Activation'] == 1].drop('Activation',axis = 1)
df_sgd_sigmoid = df_sgd_sigmoid.sort_values(by='R2', ascending=False)
df_sgd_sigmoid

sgd Sigmoid Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
30,0.001,6000,8,44.335175,6.658466,0.465544
33,0.001,8000,8,49.742186,7.052814,0.400363
28,0.001,4000,25,53.442658,7.310449,0.355754
44,0.002,8000,64,53.791109,7.334242,0.351554
34,0.001,8000,25,54.623442,7.390767,0.34152
49,0.003,6000,25,54.939164,7.412096,0.337714
31,0.001,6000,25,55.46121,7.447228,0.331421
50,0.003,6000,64,55.832222,7.472096,0.326948
52,0.003,8000,25,56.040012,7.485988,0.324443
29,0.001,4000,64,56.767009,7.534388,0.31568


In [27]:
print("sgd tanh Metrics")
df_sgd_tanh = table[table['Activation'] == 0].drop('Activation',axis = 1)
df_sgd_tanh = df_sgd_tanh.sort_values(by='R2', ascending=False)
df_sgd_tanh

sgd tanh Metrics


Unnamed: 0,LR,Epochs,layer size,MSE,RMSE,R2
26,0.003,8000,64,48.760725,6.982888,0.412194
0,0.001,4000,8,49.325946,7.023243,0.405381
3,0.001,6000,8,50.349019,7.095704,0.393048
8,0.001,8000,64,52.737748,7.262076,0.364252
10,0.002,4000,25,52.942552,7.276163,0.361783
14,0.002,6000,64,53.922991,7.343228,0.349964
25,0.003,8000,25,55.138299,7.425517,0.335313
24,0.003,8000,8,55.672277,7.461386,0.328876
12,0.002,6000,8,55.848812,7.473206,0.326748
13,0.002,6000,25,56.759333,7.533879,0.315772


# best Architecture
### Minibatch with layer size 25, lr = 0.003, activation = relu and trained for epochs 8000

In [85]:
size = 25
lr = 0.003
epoch = 8000
activation = [relu]

regressor = MLP(input_size,output_size,num_layers,[size],activation,'minibatch',lr)
#       Training
regressor.training(x_training,y_train,epoch)
#       Training metrics
out = regressor.forward(x_training)
mse_train = mse(y_train,out)
print("TRAINING")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Training => MSE : {mse_train:.4f}")

#       Validation
out = regressor.forward(x_validation)
mse_val = mean_squared_error(y_val, out)
rmse_val = mean_squared_error(y_val, out, squared=False)
r2_val = r2_score(y_val, out)
print("VALIDATION")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => MSE : {mse_val:.4f}")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => RMSE : {rmse_val:.4f}")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => R2 score : {r2_val:.4f}")

TRAINING
Epoch [8000], lr [0.003], activation [relu], size [25] Training => MSE : 25.6852
VALIDATION
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => MSE : 14.581198
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => RMSE : 3.818534
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => R2 score : 0.824225


In [83]:
#       testing
out = regressor.forward(x_testing)
mse_val = mean_squared_error(y_test, out)
rmse_val = mean_squared_error(y_test, out, squared=False)
r2_val = r2_score(y_test, out)
print("Testing")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => MSE : {mse_val:.4f}")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => RMSE : {rmse_val:.4f}")
print(f"Epoch [{epoch}], lr [{lr}], activation [{activation_names[idx]}], size [{size}] Validation => R2 score : {r2_val:.4f}")

Testing
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => MSE : 20.473239
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => RMSE : 4.524736
Epoch [8000], lr [0.003], activation [relu], size [25] Validation => R2 score : 0.753197
