In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import wandb
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

In [2]:
wine_dataset = pd.read_csv('WineQT.csv')
wine_dataset.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4


In [3]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33marjundosajh100[0m ([33marjundosajh[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
X = wine_dataset.drop('quality', axis=1)
y = wine_dataset['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=52)

print(f"Size of training set = {len(X_train)}\nSize of validation set = {len(X_val)}\nSize of test set = {len(X_test)}")

Size of training set = 925
Size of validation set = 103
Size of test set = 115


In [5]:
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
y_train -= 3 # To make the range of values from 0 to 5
print(y_train[:100])

X_val = X_val.to_numpy()
y_val = y_val.to_numpy()
y_val -= 3 # To make the range of values from 0 to 5

X_test = X_test.to_numpy()
y_test = y_test.to_numpy()
y_test -= 3 # To make the range of values from 0 to 5

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)

[3 3 4 3 2 2 3 3 3 3 2 0 4 2 3 2 4 2 3 1 2 3 3 3 2 3 3 3 2 2 3 2 3 2 2 4 1
 3 2 3 3 2 2 3 3 3 2 3 3 2 2 4 2 3 3 2 2 2 3 3 3 3 2 4 3 3 2 2 2 2 3 2 4 2
 2 3 2 2 4 2 3 2 4 3 2 3 2 3 1 3 2 2 4 3 2 3 3 2 3 1]


In [6]:
X_train.shape, y_train.shape

((925, 12), (925,))

In [7]:
class MultiLayerPerceptronClassifier():
    def __init__(self, X_train, y_train, X_val, y_val, hidden_layers, activation_function="sigmoid", learning_rate=0.01, optimizer="sgd", log_to_wandb=False):
        self.log_to_wandb = log_to_wandb
        self.X_train = X_train
        self.y_train = self.one_hot_encode(y_train)
        self.X_val = X_val
        self.y_val = self.one_hot_encode(y_val)

        self.input_size = X_train.shape[1]
        self.output_size = self.y_train.shape[1]
        self.learning_rate = learning_rate
        self.layers = [self.input_size] + hidden_layers + [self.output_size]
        self.weights = [np.random.randn(self.layers[i], self.layers[i+1]) for i in range(len(self.layers) - 1)]
        self.biases = [np.random.randn(1, self.layers[i+1]) for i in range(len(self.layers) - 1)]
        
        if activation_function == "linear":
            self.activation = self.linear
            self.activation_derivative = self.linear_derivative
        elif activation_function == "sigmoid":
            self.activation = self.sigmoid
            self.activation_derivative = self.sigmoid_derivative
        elif activation_function == "tanh":
            self.activation = self.tanh
            self.activation_derivative = self.tanh_derivative
        elif activation_function == "relu":
            self.activation = self.relu
            self.activation_derivative = self.relu_derivative
        
        self.optimizer = optimizer

    def linear(self, x):
        return x
    
    def linear_derivative(self, x):
        return 1

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def tanh(self, x):
        return np.tanh(x)

    def tanh_derivative(self, x):
        return 1.0 - np.tanh(x)**2

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)

    def one_hot_encode(self, y):
        n_values = int(np.max(y)) + 1
        return np.eye(n_values)[y]
    
    def predict(self, X):
        outputs = self.forward(X)
        # Get the index of the maximum value in each output (this corresponds to the predicted class)
        predictions = np.argmax(outputs, axis=1)
        return predictions

    def accuracy(self, X, y_true):
        y_pred = self.predict(X)
        correct_predictions = np.sum(y_pred == y_true)
        total_samples = len(y_true)
        return (correct_predictions / total_samples) * 100 # returns percentage accuracy
    
    def compute_loss(self, y_true, y_pred):
        # I have uesd the MSE loss function
        m = y_true.shape[0]
        loss = (1 / (2 * m)) * np.sum(np.square(y_pred - y_true))
        return loss

    def forward(self, input_data):
        input_data = np.atleast_2d(input_data) # to make sure that input data has atleast 2 dimensions
        
        self.a_values = [input_data]
        self.z_values = []

        for i in range(len(self.weights)):
            z = np.dot(self.a_values[-1], self.weights[i]) + self.biases[i]
            a = self.activation(z)
            self.z_values.append(z)
            self.a_values.append(a)

        return self.a_values[-1]

    def backpropagation(self, X, y):
        m = X.shape[0]
        self.dz_values = [self.a_values[-1] - y]
        self.dw_values = [np.dot(self.a_values[-2].T, self.dz_values[0]) / m]
        self.db_values = [np.sum(self.dz_values[0], axis=0, keepdims=True) / m]

        for i in range(len(self.weights) - 1, 0, -1):
            dz = np.dot(self.dz_values[0], self.weights[i].T) * self.activation_derivative(self.a_values[i])
            dw = np.dot(self.a_values[i-1].T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True) / m
            self.dz_values.insert(0, dz)
            self.dw_values.insert(0, dw)
            self.db_values.insert(0, db)

    def update_weights(self):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * self.dw_values[i]
            self.biases[i] -= self.learning_rate * self.db_values[i]

    def train(self, epochs=100, batch_size=None):
        if self.optimizer == "sgd":
            for epoch in range(epochs):
                for x, target in zip(self.X_train, self.y_train):
                    self.forward(x)
                    self.backpropagation(x.reshape(1, -1), target.reshape(1, -1))
                    self.update_weights()
                self.print_epoch_stats(epoch, epochs)
        
        elif self.optimizer == "bgd":
            for epoch in range(epochs):
                self.forward(self.X_train)
                self.backpropagation(self.X_train, self.y_train)
                self.update_weights()
                self.print_epoch_stats(epoch, epochs)
        
        elif self.optimizer == "mbgd":
            n_samples = self.X_train.shape[0]
            n_batches = n_samples // batch_size

            for epoch in range(epochs):
                for i in range(0, n_samples, batch_size):
                    end = i + batch_size if i + batch_size <= n_samples else n_samples
                    batch_X, batch_y = self.X_train[i:end], self.y_train[i:end]
                    self.forward(batch_X)
                    self.backpropagation(batch_X, batch_y)
                    self.update_weights()
                self.print_epoch_stats(epoch, epochs)

    def print_epoch_stats(self, epoch, epochs):
        train_predictions = self.forward(self.X_train)
        train_loss = self.compute_loss(self.y_train, train_predictions)
        
        train_pred_labels = np.argmax(train_predictions, axis=1) # one hot encoded lables to class labels
        train_true_labels = np.argmax(self.y_train, axis=1)
        
        val_predictions = self.forward(self.X_val)
        val_loss = self.compute_loss(self.y_val, val_predictions)
        
        val_pred_labels = np.argmax(val_predictions, axis=1)
        val_true_labels = np.argmax(self.y_val, axis=1)

        # Computing various metrics using sklearn.metrics
        train_accuracy = accuracy_score(train_true_labels, train_pred_labels)
        train_precision = precision_score(train_true_labels, train_pred_labels, average='macro', zero_division=1)
        train_recall = recall_score(train_true_labels, train_pred_labels, average='macro')
        train_f1 = f1_score(train_true_labels, train_pred_labels, average='macro')
        
        val_accuracy = accuracy_score(val_true_labels, val_pred_labels)
        val_precision = precision_score(val_true_labels, val_pred_labels, average='macro', zero_division=1)
        val_recall = recall_score(val_true_labels, val_pred_labels, average='macro')
        val_f1 = f1_score(val_true_labels, val_pred_labels, average='macro')

        # Log the metrics to wandb
        if self.log_to_wandb:
            metrics = {
                "train_loss": train_loss, 
                "train_accuracy": train_accuracy, 
                "train_precision": train_precision, 
                "train_recall": train_recall, 
                "train_f1": train_f1,
                "val_loss": val_loss, 
                "val_accuracy": val_accuracy, 
                "val_precision": val_precision, 
                "val_recall": val_recall, 
                "val_f1": val_f1,
                "epoch": epoch + 1
            }
            wandb.log(metrics)

        # print the metrics
        else:
            print(f"Epoch {epoch + 1}/{epochs} - Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%")

In [8]:
mlp = MultiLayerPerceptronClassifier(X_train, y_train, X_val, y_val, hidden_layers=[6, 6], activation_function="tanh", learning_rate=0.01, optimizer="sgd")
mlp.train(epochs=100)

Epoch 1/100 - Training Loss: 0.3371, Training Accuracy: 0.57%
Epoch 2/100 - Training Loss: 0.3128, Training Accuracy: 0.56%
Epoch 3/100 - Training Loss: 0.3054, Training Accuracy: 0.57%
Epoch 4/100 - Training Loss: 0.3010, Training Accuracy: 0.58%
Epoch 5/100 - Training Loss: 0.2975, Training Accuracy: 0.58%
Epoch 6/100 - Training Loss: 0.2945, Training Accuracy: 0.58%
Epoch 7/100 - Training Loss: 0.2919, Training Accuracy: 0.58%
Epoch 8/100 - Training Loss: 0.2895, Training Accuracy: 0.59%
Epoch 9/100 - Training Loss: 0.2872, Training Accuracy: 0.60%
Epoch 10/100 - Training Loss: 0.2851, Training Accuracy: 0.60%
Epoch 11/100 - Training Loss: 0.2835, Training Accuracy: 0.60%
Epoch 12/100 - Training Loss: 0.2822, Training Accuracy: 0.60%
Epoch 13/100 - Training Loss: 0.2811, Training Accuracy: 0.60%
Epoch 14/100 - Training Loss: 0.2801, Training Accuracy: 0.60%
Epoch 15/100 - Training Loss: 0.2792, Training Accuracy: 0.60%
Epoch 16/100 - Training Loss: 0.2785, Training Accuracy: 0.60%
E

## Hyperparameter Tuning - Trying out different activation functions and optimizers

In [9]:
activation_functions = ['relu', 'sigmoid', 'tanh']
optimizers = ['sgd', 'bgd', 'mbgd']
hidden_layers = [6, 6]
batch_size = 16
num_epochs = 1000
learning_rate = 0.01

# Running the model for all possible configurations of hyperparameters
for activation_function in activation_functions:
    for optimizer in optimizers:
            wandb.init(project='SMAI-Assignment-3-Task-2.2.1', name=f'MLP_Classifier_{activation_function}_{optimizer}_{learning_rate}', config={
                "model": "MLP Classifier",
                "activation_function": activation_function,
                "optimizer": optimizer,
                "learning_rate": learning_rate,
            })
            mlp = MultiLayerPerceptronClassifier(X_train, y_train, X_val, y_val, hidden_layers=hidden_layers, activation_function=activation_function, learning_rate=learning_rate, optimizer=optimizer, log_to_wandb=True)
            mlp.train(epochs=num_epochs, batch_size=batch_size)



0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▃▄▅▄▆▆▇▇▇██▆▇▇▇█▇██▇▆▇██▇█▇█▇██▇██▆▇▇▇
train_f1,▁▂▂▄▅▅▆▆▇▇▇██▇▇▇██████▇███▇██████▇██▇█▇▇
train_loss,█▇▇▆▆▆▅▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▃▁▁▁▂▁▁▁▂▁▁▂▁▁▃▁▂▂
train_precision,█▄▄▁▃▂▂▃▆▅▅▆▇▆▅▇▇▇███▆▅▆█▆▅█▆▇▆█▇▅▇▇▄▇▄▅
train_recall,▁▁▂▄▅▄▆▆▇▇▇██▇████████▇█████████████▇███
val_accuracy,▁▃▆▆█▆█▆▆▆▃▆██▆▆█▃▆▆▆▃▃▃▆▃▃▆▃▆▆▆▃▃▃▃▆▃▃▃
val_f1,▁▂▃▃▆▅█▇▇▇▄▇██▇▇█▄▅▅▅▄▄▄▅▄▄▅▄▅▅▅▄▄▄▄▅▄▄▄
val_loss,▆▅▄▃▃▄▁▂▂▃▄▃▃▂▂▃▄▆▅▅▅▅▇▆▅▅▄▄▆▆▅▄▅▆▅▅█▆▇█
val_precision,█▆▇▂▆▆▆▆▆▆▁▆▅▇▆▆▇▅▆▆▆▅▆▅▆▅▅▆▅▆▆▆▅▅▅▅▆▅▅▅

0,1
epoch,1000.0
train_accuracy,0.47135
train_f1,0.17998
train_loss,0.3049
train_precision,0.90213
train_recall,0.21366
val_accuracy,0.42718
val_f1,0.12404
val_loss,0.31855
val_precision,0.79389




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▁▁▂▃▄▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████
train_f1,▁▁▂▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
train_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,█▆▁▄▄▄▅▅▅▅▅▄▄▇▇▇▇▇▇▇▇▇▇▇▇▇▄▄▄▅▅▅▅▅▅▅▅▅▅▅
train_recall,██▅▅▂▁▄▅▃▄▄▄▄▄▅▅▅▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇
val_accuracy,▁▁▁▂▃▅▅▅▆▆▆▆▇▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████
val_f1,▁▁▂▃▃▄▄▄▅▆▅▆▇▆▆▆▇█▇▇▇▇▇▇▇▇▇▇███████▇▇█▇▇
val_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▆▆▇▇▆▆▆▆▆▂▁▅▅▄▄▄▄▅▄▄▄███████████████████

0,1
epoch,1000.0
train_accuracy,0.44649
train_f1,0.14896
train_loss,0.32191
train_precision,0.54329
train_recall,0.1786
val_accuracy,0.40777
val_f1,0.13662
val_loss,0.32314
val_precision,0.81107




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▂▂▂▃▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇█████████
train_f1,▁▂▂▂▃▃▅▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▇█████████████
train_loss,█▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▂▁▂▂▂▂▂▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▇▇▇▇▇▇▇██████
train_recall,▁▁▂▂▂▃▅▆▅▅▅▅▄▄▄▄▄▄▄▄▅▅▅▅▅▅▇▇▇███████████
val_accuracy,▁▁▁▁▁▁▂▃▃▃▃▃▃▄▄▄▄▄▄▅▆▅▆▆▆▆▇▇▆▆▇█████████
val_f1,▁▁▁▁▁▁▄▆▆▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█████████
val_loss,█▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▃███▃▅▂▃▃▁▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▂▂▂▂▂▂▂▂▂▂▂▂

0,1
epoch,1000.0
train_accuracy,0.60216
train_f1,0.28408
train_loss,0.26861
train_precision,0.83634
train_recall,0.28036
val_accuracy,0.58252
val_f1,0.20782
val_loss,0.29398
val_precision,0.69653




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇██████████████████████
train_f1,▁▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████
train_loss,█▆▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▁▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████
train_recall,▁▆▆▆▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇█████████████████████
val_accuracy,▃▆▇▇▆▅▅▄▄▄▅▅▆▇██▇▅▅▅▅▄▅▄▄▂▂▂▂▂▂▂▂▁▁▂▂▂▂▃
val_f1,▁▆▆▇▆▆▆▆▆▆▆▇▇▇███▇▇▇▇▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆
val_loss,▅▂▁▁▁▁▂▃▃▄▄▃▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇██████████
val_precision,█▃▃▄▃▃▃▂▂▃▃▃▄▄▅▅▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▁▁▂▂▂▂▂

0,1
epoch,1000.0
train_accuracy,0.71135
train_f1,0.34495
train_loss,0.19606
train_precision,0.83342
train_recall,0.36635
val_accuracy,0.57282
val_f1,0.28499
val_loss,0.3035
val_precision,0.77243




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▁▁▂▄▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████
train_f1,▁▁▁▂▆▇▇█▇▇▇▇▇▇▇▇▇▇▇█████████████████████
train_loss,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▅▅▁▁▂▂▂▂▅███████████████████████████████
train_recall,█▆▆▃▄▁▂▂▂▂▁▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃
val_accuracy,▁▁▁▂▃▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
val_f1,▁▁▁▃▄▆▇▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
val_loss,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▅▅▄▂▁▁▁▁▄▇▇▇████████████████████████████

0,1
epoch,1000.0
train_accuracy,0.46486
train_f1,0.16986
train_loss,0.32418
train_precision,0.82135
train_recall,0.18836
val_accuracy,0.45631
val_f1,0.16476
val_loss,0.32535
val_precision,0.81866




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
train_f1,▁▃▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▇▇▇████████████
train_loss,█▅▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▂▅▆▇▇▇▇█▇▇█████████████▁▂▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂
train_recall,▁▃▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇███████████
val_accuracy,▁▅▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▆▇▇▇▇▇▇██████████
val_f1,▁▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▅▇██▇▇███████████
val_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▅▇▇██▇▇▇▇▇▇▇▇▇▇▇▇██████▁▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄

0,1
epoch,1000.0
train_accuracy,0.61622
train_f1,0.28941
train_loss,0.25177
train_precision,0.79809
train_recall,0.28969
val_accuracy,0.63107
val_f1,0.26837
val_loss,0.25809
val_precision,0.78071




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▁▂▄▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇█████████▇████▇
train_f1,▁▁▂▄▅▆▇▆▇▆▆▆▇▇▇▇▇▇▇▇▇▇██████████████████
train_loss,█▅▅▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▁▆▆▇▇▇█▇█▇██████████████████████████████
train_recall,▁▂▃▄▅▆▇▇▇▇▇▇███▇▇██████████▇██████▇████▇
val_accuracy,▃▁▂▄▅█▇▇▅▅▄▅▅▅▅▅▅▅▇▆▅▅▅▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄
val_f1,▄▁▂▂▄███▆▅▄▄▅▅▅▅▅▆▆▆▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▅▅▅▄
val_loss,█▅▄▃▂▁▁▂▂▂▂▂▃▄▄▄▄▄▄▃▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▂▂▃
val_precision,█▁▂▂▄▇▆▆▅▄▃▄▄▄▄▄▄▅▆▅▄▄▅▄▄▅▄▄▅▅▅▅▅▅▅▅▄▅▄▄

0,1
epoch,1000.0
train_accuracy,0.68865
train_f1,0.34049
train_loss,0.22818
train_precision,0.82952
train_recall,0.3529
val_accuracy,0.63107
val_f1,0.29713
val_loss,0.26278
val_precision,0.78949




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇███
train_f1,▆▆▅▆▁▂▄▅▅▇███████▇▆▆▆▆▆▅▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇
train_loss,█▆▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▂▂▂▃▁▁▂▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████████
train_recall,█▆▅▅▁▁▃▃▄▆▇▇▇▇▇▇▇▆▅▆▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁▃▄▄▄▅▅▆▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████▇▇▇▇
val_f1,▁▂▅██▅▅▇▆▇██▇███▇▆▆▆▅▅▅▄▄▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄
val_loss,█▆▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▁▁▄▇▅▅▄▄▄▄▄▄▇████▇██▇▇▇▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
epoch,1000.0
train_accuracy,0.47892
train_f1,0.18465
train_loss,0.35994
train_precision,0.51592
train_recall,0.1969
val_accuracy,0.50485
val_f1,0.18439
val_loss,0.36109
val_precision,0.50751


In [10]:
api = wandb.Api()

In [11]:
runs = api.runs(f"arjundosajh/SMAI-Assignment-3-Task-2.2.1")

# Extract metrics from each run
train_accuracies = []
train_precisions = []
train_recalls = []
train_f1_scores = []

val_accuracies = []
val_precisions = []
val_recalls = []
val_f1_scores = []

for run in runs:
    train_accuracies.append(run.summary.get('train_accuracy', None))
    train_precisions.append(run.summary.get('train_precision', None))
    train_recalls.append(run.summary.get('train_recall', None))
    train_f1_scores.append(run.summary.get('train_f1', None))

    val_accuracies.append(run.summary.get('val_accuracy', None))
    val_precisions.append(run.summary.get('val_precision', None))
    val_recalls.append(run.summary.get('val_recall', None))
    val_f1_scores.append(run.summary.get('val_f1', None))

# Create a pandas dataframe with columns accuracy, precision, recall, f1_score 
# and rows as the values of each of these metrics
df = pd.DataFrame(list(zip(train_accuracies, train_precisions, train_recalls, train_f1_scores, val_accuracies, val_precisions, val_recalls, val_f1_scores)), 
                  columns=['train_accuracy', 'train_precision', 'train_recall', 'train_f1', 'val_accuracy', 'val_precision', 'val_recall', 'val_f1'])

sorted_df = df.sort_values(by='val_f1', ascending=False)
sorted_df

Unnamed: 0,train_accuracy,train_precision,train_recall,train_f1,val_accuracy,val_precision,val_recall,val_f1
2,0.688649,0.829522,0.352904,0.340487,0.631068,0.789486,0.305761,0.297128
0,0.631351,0.814071,0.306717,0.30576,0.650485,0.790215,0.291226,0.285148
5,0.711351,0.833417,0.366348,0.344949,0.572816,0.772433,0.305585,0.284988
3,0.616216,0.798088,0.289687,0.289408,0.631068,0.780713,0.271934,0.268372
6,0.602162,0.836341,0.28036,0.284085,0.582524,0.696535,0.23062,0.207819
1,0.478919,0.515922,0.196901,0.184649,0.504854,0.50751,0.198908,0.184389
4,0.464865,0.821346,0.188365,0.169859,0.456311,0.818657,0.180233,0.164764
7,0.446486,0.543287,0.178596,0.14896,0.407767,0.811072,0.16191,0.136615
8,0.471351,0.902125,0.213658,0.179983,0.427184,0.793889,0.181818,0.124043


## Hyperparameter Tuning - Tuning epochs, learning rate and number of hidden layers and the neurons in them

In [17]:
learning_rates = [0.1, 0.01, 0.001]
hidden_layers = [[4, 4], [4, 4, 4], [6, 6], [8, 8]]
num_epochs = [500, 1000]

# Running the model for all possible configurations of hyperparameters
for learning_rate in learning_rates:
    for hidden_layer in hidden_layers:
        for num_epoch in num_epochs:
            wandb.init(project='SMAI-Assignment-3-Task-2.2.4', name=f'MLP_Classifier_{learning_rate}_{hidden_layer}_{num_epoch}', config={
                "model": "MLP Classifier",
                "optimizer": "sgd",
                "acitvation_function": "sigmoid",
                "learning_rate": learning_rate,
                "hidden_layer": hidden_layer,
                "num_epoch": num_epoch,
            })
            mlp = MultiLayerPerceptronClassifier(X_train, y_train, X_val, y_val, hidden_layers=hidden_layer, activation_function="sigmoid", learning_rate=learning_rate, optimizer="sgd", log_to_wandb=True)
            mlp.train(epochs=num_epoch)

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▅▅▅▆▆▆▇▇▇▇▇▆▆▆▆▆▇▇█▇█▇█▇█▇██████▇██▇█▇
train_f1,▁▃▄▅▅▅▆▆▇▇▇▆▇▆▆▆▇▆▇▇█▇█▇█▇█▇██████▇██▇█▇
train_loss,█▅▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▂▁▂▁▁▁▁▁▁▁▁▁▂▁▁
train_precision,▁▂▅▄▅▅▅▅▇██▇▆▅▆▆▆▅▇▇▇▇▇▇▇▆█▇█▇████▇█▇▇█▇
train_recall,▁▂▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇███▇█▇█▇████████▇██▇█▇
val_accuracy,▇▂▄▂▃▁▃▄▄▄▄▄▂▄▄▄▅█▄▄▃▅▄▄▆▄▇▆▄▅▅▄▅▅▅▅▅▅▅▅
val_f1,▄▁▁▂▂▁▄▄▄▄▅▄▄▅▆▅▇█▆▆▅▇▆▆▇▆█▇▆▇▇▆▇▇▇▇▇▇▇▇
val_loss,▃▃▄▇▆▇█▆▆▆▄▅▅█▇▇▅▁▆▃▄▃▄▃▄▃▁▂▂▂▂▂▂▂▂▃▂▂▃▂
val_precision,▆▂▂▂▂▁▄▄▅▆▇▆▄▅▆▅▇█▆▆▅▇▆▆▇▆█▇▆▇▇▆▇▇▇▇▇▇▇▇

0,1
epoch,774.0
train_accuracy,0.71568
train_f1,0.35898
train_loss,0.19591
train_precision,0.8469
train_recall,0.37693
val_accuracy,0.6699
val_f1,0.33333
val_loss,0.24002
val_precision,0.82695




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▆▆▆▇▇▇▆▇▇▆▇▇▇▇▇▇▇▇██▇▇█████▇████▇▇▇██
train_f1,▁▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████████
train_loss,█▅▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▂▁▁▁▁▂▁▁▁▁
train_precision,█▁▂▃▃▃▄▄▄▄▄▄▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▅▅▅▅
train_recall,▁▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███▇███████████████████
val_accuracy,█▇▅▂▁▃▂▁▄▅▄▅▄▂▃▄▄▄▄▃▄▃▃▃▃▄▄▄▄▄▅▄▄▄▄▄▄▄▄▃
val_f1,▁█▅▃▃▄▄▃▅▆▆▆▅▄▅▅▅▆▅▅▆▅▅▅▅▆▆▆▅▆▇▆▆▅▆▆▅▅▅▄
val_loss,▂▁▃▅▆▆▆▅▄▄▄▄▇██▇▇███▇█▇███▇▇▇█▅▇█▇▆▅▆▇▇▇
val_precision,█▃▂▁▁▂▁▁▂▂▂▂▂▂▂▂▂▃▂▂▃▂▂▂▂▃▂▂▂▃▃▂▃▂▂▂▂▂▂▂

0,1
epoch,500.0
train_accuracy,0.69838
train_f1,0.34926
train_loss,0.22112
train_precision,0.84122
train_recall,0.36093
val_accuracy,0.57282
val_f1,0.26208
val_loss,0.28352
val_precision,0.75643




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████████████
train_f1,▁▃▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████████
train_loss,█▆▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▁▃▄▅▆▅▆▆▆▆▆▇▇▇▇▇▇▇▇█████████████████████
train_recall,▁▃▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████████████
val_accuracy,▁▇██▇▇▇▇████████▇▇███▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
val_f1,▁▅██▇▇▇▇████████▅▅▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
val_loss,█▄▁▄▅▄▄▄▄▄▄▄▄▅▅▄▄▅▄▄▄▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
val_precision,▁▄▇▇▆▆▆▆▇▇██████▅▅▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
epoch,1000.0
train_accuracy,0.71459
train_f1,0.35864
train_loss,0.22018
train_precision,0.84763
train_recall,0.37057
val_accuracy,0.65049
val_f1,0.30443
val_loss,0.25785
val_precision,0.79677




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▆▆▅▅▆▇▇▇▇█▇▇▇█▆▇▇█▇█▆▇▇▇█▇▇▇▇█▆▅▅▆▆▅▆▅
train_f1,▁▁▇▇▇▇██████████▇▇████▇█████████▇▇▇▇▇▇▇▇
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▂▁▂▁▂▂▂▂▂▂▂▃
train_precision,▇█▂▂▂▂▂▃▃▃▃▃▃▃▃▃▂▂▃▃▂▃▂▃▃▃▃▃▂▃▂▃▂▂▂▂▁▁▂▁
train_recall,▁▁▆▇▇▇▇█████████▆▇▇▇▇▇▇▇▇▇█▇█▇▇██▇██▇▇█▆
val_accuracy,█▅▄▅▂▇▆▇▆▇▅▆▅▅▆▆▃▃▆▃▆▃▄▅▃▅▄▆▃▄▁▅▅▄▆▅▃▅▃▅
val_f1,▃▁▆▆▄█▆▇▇▇▆▆▆▆▇▇▅▆▆▅▆▅▅▆▅▅▅▆▅▅▄▅▇▆█▇▅▇▆▆
val_loss,▁▂▄▃▃▂▄▅▅▅▆▆▅▆▆▆▆▆▅▇▆▇▇▆▇▇▇▅▆▇█▆▃▆▃▄▇▆▇▇
val_precision,█▇▃▂▁▄▃▃▃▃▃▃▃▂▃▃▂▂▃▂▃▂▂▂▂▂▂▃▂▂▁▂▃▃▃▃▂▃▂▃

0,1
epoch,500.0
train_accuracy,0.63676
train_f1,0.31183
train_loss,0.24589
train_precision,0.79967
train_recall,0.33107
val_accuracy,0.54369
val_f1,0.26422
val_loss,0.29654
val_precision,0.75531




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▆▆▇▆▆▆▅▇▅▇▇▆▆▆▆▇▇█▅▆▇▅▇▆▆▆▆▆▇▅▇▆▅▇▇█▅
train_f1,▁▇▇▇██▇▇▇▇█▇███▇▇▇███▇██▇██▇█▇██▇█▇▇███▇
train_loss,█▄▃▃▃▃▂▂▃▃▂▃▂▂▂▂▃▂▂▁▁▂▂▂▂▁▂▂▁▂▂▂▃▁▂▃▂▁▁▃
train_precision,█▁▂▂▃▃▃▂▂▂▃▂▃▃▃▃▃▃▃▃▄▃▃▃▂▃▃▃▃▃▃▃▂▃▃▂▃▄▄▂
train_recall,▁▆▇▇█████▇█▇███▇█████▇██▇██▇█▇██▇██▇███▇
val_accuracy,█▂▄▄▄▅▃▂▂▃▁▃▂▂▁▂▄▂▁▄▁▄▂▄▄▁▂▁▁▃▂▃▅▃▁▅▄▂▃▅
val_f1,▁▃▅▆▅▇▅▄▄▆▃▅▄▃▃▄▆▄▂▅▃▆▄▆▅▃▄▃▃▅▅▄█▆▃█▆▄▆▇
val_loss,▁▃▅▄▅▆▆▅▇▇▇▅▇▇█▇▆▇▇▆█▆▆▅▅█▆██▆▇▆▄▆█▅▅▇▇▅
val_precision,█▁▂▂▂▃▂▂▁▂▁▂▂▂▁▁▂▂▁▂▁▂▂▂▂▁▂▁▁▂▂▂▃▂▁▃▂▂▂▃

0,1
epoch,1000.0
train_accuracy,0.67243
train_f1,0.33823
train_loss,0.22527
train_precision,0.82763
train_recall,0.35721
val_accuracy,0.53398
val_f1,0.25571
val_loss,0.31841
val_precision,0.74943




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▃▄▅▅▅▆▅▆▆▆▇▇▆▇▇▇▇▇██████████▇██▇▇▇▇▇▇▇
train_f1,▁▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▇███████████▇███▇▇▇█▇█
train_loss,█▆▅▅▄▄▄▃▄▄▂▂▂▂▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁
train_precision,▁▂▃▄▄▅▅▅▅▆▆▆▇▇▆▇▇▇▇▇███████▇██▇██▇▇▇▇▇▇▇
train_recall,▁▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇███████▇██▇██▇▇▇▇█▇█
val_accuracy,█▇█▅▅▆▅▄▃▂▂▂▂▂▁▂▂▂▂▂▁▂▂▂▂▂▂▂▃▃▂▂▃▂▂▂▁▂▁▂
val_f1,▄▆█▅▅▆▆▄▄▃▂▂▂▃▂▃▂▃▃▁▁▃▃▁▃▃▃▂▃▃▃▁▃▃▂▂▂▃▂▂
val_loss,▂▁▂▂▃▂▃▃▅▆▆▆▆▆▇▇▇███████████▇▇▇▇▇███████
val_precision,▇▇█▇▇▇▇▇▇▆▆▆▆▆▆▆▆▇▆▆▆▆▆▆▁▁▁▁▇▁▆▆▁▇▆▆▆▆▆▆

0,1
epoch,500.0
train_accuracy,0.69405
train_f1,0.37002
train_loss,0.21085
train_precision,0.83313
train_recall,0.37994
val_accuracy,0.5534
val_f1,0.27792
val_loss,0.31024
val_precision,0.76468




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▅▅▅▆▇▆▆▆▆▆▆▆▆▅▆▆▆▆▇▇▇▇▇█▇██▇█▇▇████▇█
train_f1,▁▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▅▆▆▆▆▇▇▇▆▇▇▇▇▇▇█▇▇█▇▇███
train_loss,█▆▅▅▄▄▃▃▃▃▃▃▃▃▃▃▄▃▃▃▃▃▂▂▃▂▂▂▂▂▂▁▂▂▁▁▁▁▂▁
train_precision,▁▅▅▅▅▅▆▇▇▆▇▇▆▆▆▆▄▆▆▆▆▇▇▇▇▇█▇▇▇▇█▇▇█▇▇█▇█
train_recall,▁▅▅▆▅▅▆▆▆▆▆▆▆▆▆▆▅▆▆▅▆▆▇▇▆▆▇▇▇▇▇▇▇▇▇▇▇███
val_accuracy,█▇▅▇▆▄▅▅▅▅▅▅▁▆▄▄▄▁▂▄▃▄▃▅▄▅▂▅▄▄▅▄▄▄▂▃▂▁▂▃
val_f1,▇█▆█▇▄▅▅▅▅▅▄▁▆▄▄▄▂▂▄▄▄▄▅▅▆▃▅▅▄▅▄▄▄▃▃▄▃▃▄
val_loss,▁▁▂▁▃▄▄▄▄▅▅▅▆▅▅▅▅▆▇▆▅▅▆▆▆▆▇▆▆▇▆▆▆▆▆▆▇▇██
val_precision,█▇▆██▄▅▅▅▅▅▄▁▇▄▄▄▂▂▄▄▄▃▄▅▆▃▅▄▄▅▄▄▄▃▃▃▂▃▄

0,1
epoch,1000.0
train_accuracy,0.70919
train_f1,0.35542
train_loss,0.20482
train_precision,0.84174
train_recall,0.37755
val_accuracy,0.54369
val_f1,0.27781
val_loss,0.33897
val_precision,0.76667




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▅▆▆▆▇▇▇▇▇▇▆▆▇▆▆▇▇▇▇█████▇███████▇█████
train_f1,▁▃▄▄▄▅▆▅▅▅▆▆▇▆▇▇▇▇▇▇▇█▇▇▇▇▇█▇▆▇▇▇▇▆▇▇▇▇▇
train_loss,█▆▅▄▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁
train_precision,▆▇▇▇█████████▆▄▄▄▇▇▇▇▃▇▇▇▇▇▃▇▆▁▇▇▇▆▇▇▇▇▇
train_recall,▁▄▅▅▅▅▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇█▇▇█▇▇██▇▇▇▇▇▇▇▇▇██
val_accuracy,▅█▆▇▃▄▄▃▃▂▄▅▅▂▃▁▃▁▄▃▁▄▂▃▁▃▃▄▅▄▄▂▂▄▄▂▄▂▂▃
val_f1,▂▇██▃▄▃▂▃▄▄▅▅▃▃▂▃▁▄▃▁▄▁▃▂▃▃▄▆▂▄▁▃▅▅▃▅▃▃▄
val_loss,▃▁▃▃▅▅▆▆██▇▆▆▇▇▇▇█▇▇▇▇█▇▇█▆▇▆▇▇▇▇▇▇█▇██▇
val_precision,████▇█▇▇▇▇▇▄▁▄▄▄▇▄▄▄▄▄▄▄▄▄▄▁▄▄▄▄▄▄▄▄▄▄▄▄

0,1
epoch,500.0
train_accuracy,0.75568
train_f1,0.40748
train_loss,0.17537
train_precision,0.82823
train_recall,0.40157
val_accuracy,0.61165
val_f1,0.29247
val_loss,0.30444
val_precision,0.62111




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▅▆▆▇▇▇▇▇▇▇█▇▇▇█▇▇▇█▇███████▇████████▇
train_f1,▁▄▄▄▅▅▆████▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆
train_loss,█▅▅▄▃▃▃▃▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁
train_precision,▅▇▇▇▇▇▁▃▄▃▃█████████████████████████████
train_recall,▁▄▄▅▅▆▆████▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇██▇▇▇█▇▇▇▇▇▇▇
val_accuracy,▆▃▃▄▄▄▆▇▇▆▆▆▆▆▆▆▅▃▁▂▃▃▅▅▄▃▆▄▃▃▃▅▃▅▅▅▅██▃
val_f1,▃▁▁▃▃▃▄▅▅▅▅▆▆▅▆▆▅▃▂▃▄▃▆▅▅▄▆▅▄▃▄▅▄▄▅▅▅▇█▄
val_loss,▁▃▃▄▄▃▄▄▄▄▄▄▅▅▅▅▅▇█▆▇▇▆▆▅▅▅▅▆▆▆▆▇▇▆▆▆▅▅▆
val_precision,▄▁▁▄▄▄▅▆▆▇▆▆▆▆▆▇▅▃▂▃▄▄▇▆▅▅▆▅▅▄▄▅▅▅▆▆▄▇█▄

0,1
epoch,1000.0
train_accuracy,0.77946
train_f1,0.3934
train_loss,0.16018
train_precision,0.88004
train_recall,0.40928
val_accuracy,0.63107
val_f1,0.29898
val_loss,0.30304
val_precision,0.79313




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▅▅▆▆▆▆▆▇▇▇▇▇▇██████████████████████████
train_f1,▁▃▄▄▆▇▇▇▇▇██████████████████████████████
train_loss,█▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▃▇██▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
train_recall,▁▃▃▃▅▆▇▇▇▇▇█████████████████████████████
val_accuracy,▁▆▆▆█▇███▇▆▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▅▅▅▅▅▅▅▅▅
val_f1,▁▄▄▄█▇███▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▅▅▅▅▅▆▆▆▆▆▆▆▆▆
val_loss,█▃▂▂▂▁▁▁▁▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅
val_precision,▅███▆▄▅▅▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▁

0,1
epoch,500.0
train_accuracy,0.66162
train_f1,0.32603
train_loss,0.22887
train_precision,0.81888
train_recall,0.34174
val_accuracy,0.57282
val_f1,0.2632
val_loss,0.28613
val_precision,0.75876




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▅▆▆▆▆▇▇▇▇▇▇███▇▇▇▇█████████████████████
train_f1,▁▆▇▇▇▇▇▇▇███████████████████████████████
train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,█▁▂▃▃▃▃▄▄▅▆▆▇▇▆▆▆▆▆▇▇▇▇▇▇████████▇▇▇▇▇▇▇
train_recall,▁▅▇▇▇▇▇█████████████████████████████████
val_accuracy,▁▅▅▆▆▇█▇▇▇▇▆▇▇██▇▇▇▇▆▆▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆
val_f1,▁▅▆▇▇██▇▇▇▇▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄
val_precision,█▁▃▃▄▅▆▄▅▅▅▄▅▅▅▅▅▄▅▅▅▅▇▇▇▇▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
epoch,1000.0
train_accuracy,0.68541
train_f1,0.3397
train_loss,0.22461
train_precision,0.83394
train_recall,0.34602
val_accuracy,0.63107
val_f1,0.29656
val_loss,0.26988
val_precision,0.80756




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█▇▇▇██████████████████
train_f1,▁▄▄▄▄▄▄▆▇▇▇▇▇▇▇▇████████████████████████
train_loss,█▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,█▅▅▆▆▆▆▂▂▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
train_recall,▁▄▄▄▄▄▄▅▆▆▆▇▇▇▇▇▇███████████████████████
val_accuracy,▁▇▇▇▇▇▆▇█▇▇▆▅▆▆▇▇▇▇▇▇▇█████▇▇▇▇▇▇▇██████
val_f1,▁▆▅▅▅▅▅▇█▇▇▇▇▇▇███▇▇████████████████████
val_loss,█▃▂▂▂▂▂▂▂▂▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,█▇▇▇▆▆▆▃▄▂▁▂▁▁▂▂▂▂▂▂▂▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂

0,1
epoch,500.0
train_accuracy,0.69405
train_f1,0.34521
train_loss,0.2242
train_precision,0.83257
train_recall,0.36091
val_accuracy,0.65049
val_f1,0.29167
val_loss,0.25112
val_precision,0.78611




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████████
train_f1,▁▄▄▄▆▇▇▇▇▇▇▇████████████████████████████
train_loss,█▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,█▅▅▅▁▁▁▁▂▁▂▂▂▂▂▂▂▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▄▃▃▃▃▃▃▃
train_recall,▁▃▄▄▅▆▇▇▇▇▇▇▇▇██████████████████████████
val_accuracy,▁███▇██▇██▇▆▇▇████████▇█████████████████
val_f1,▁▅▅▅▅▇▇▇▇▇▇▇▇▇▇▇██▇▇▇▇▇▇▇████▇▇▇▇▇▇▇████
val_loss,█▃▁▁▁▁▁▂▃▃▄▄▄▄▄▅▅▅▅▅▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅
val_precision,█▇▇▇▁▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▃▃▃▃▃▃▃▄▄▄▄

0,1
epoch,1000.0
train_accuracy,0.70378
train_f1,0.34804
train_loss,0.22055
train_precision,0.83759
train_recall,0.35953
val_accuracy,0.58252
val_f1,0.27298
val_loss,0.29917
val_precision,0.77266




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████
train_f1,▁▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████
train_loss,█▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▁▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████▇▇█████████
train_recall,▁▄▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
val_accuracy,▁▅▆▇▆▇▇▇▇▇█▇▇▇▇▇▇▇▇▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▅▆▆▆▆▅
val_f1,▁▆▆▇▇█▇█▇▇█▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▆▆▆▆▆▆
val_loss,█▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅
val_precision,▁▇▇█▇██████▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▆▆▆▆▆▆

0,1
epoch,500.0
train_accuracy,0.69946
train_f1,0.34917
train_loss,0.21723
train_precision,0.83615
train_recall,0.36499
val_accuracy,0.58252
val_f1,0.26922
val_loss,0.27398
val_precision,0.7635




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████
train_f1,▁▄▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████
train_loss,█▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,█▃▄▅▆▆▇▇▇▇▇▇▇▇▇▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▃▃▄▄▄▄▄▄▄
train_recall,▁▄▅▅▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
val_accuracy,▂█▅▅▅▄▄▃▄▄▁▂▂▂▃▄▄▄▃▃▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
val_f1,▁█▇▇▆▆▆▅▆▆▄▄▄▄▅▆▆▆▅▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
val_loss,▆▁▂▃▄▅▅▅▆▆▇▇▇▇▇▇████████████▇▇▇▇▇▇▇▇▇▇▇▇
val_precision,█▇▆▆▆▆▆▅▆▆▅▅▅▅▅▅▅▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
epoch,1000.0
train_accuracy,0.73189
train_f1,0.40654
train_loss,0.19447
train_precision,0.80087
train_recall,0.40037
val_accuracy,0.63107
val_f1,0.29081
val_loss,0.28884
val_precision,0.6212




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇████████████████
train_f1,▁▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██▇██████████████████
train_loss,█▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▅▁▁▂▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
train_recall,▁▄▅▆▆▆▆▆▆▇▇▇▇███████████████████████████
val_accuracy,▁▆▁▂▂▄▃▄█▇▆▆█▇▇▇▇▅▄▅▅▅▅▄▄▄▅▄▅▅▇▇▇▇▆▆▆▆▄▄
val_f1,▁▅▄▅▅▆▆▇█▇▇▇█████▇▇▇▇▇▇▇▇▇▇▆▇▇████████▆▆
val_loss,█▄▄▅▄▄▃▂▂▂▁▁▁▁▁▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_precision,█▄▁▂▂▃▂▃▄▄▄▄▅▅▅▅▅▄▄▄▄▄▄▄▄▃▄▃▄▄▄▄▄▄▄▄▄▄▃▃

0,1
epoch,500.0
train_accuracy,0.73622
train_f1,0.37114
train_loss,0.18887
train_precision,0.86901
train_recall,0.3748
val_accuracy,0.60194
val_f1,0.28808
val_loss,0.28257
val_precision,0.78246




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▃▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇█████████
train_f1,▁▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█
train_loss,█▇▆▆▅▅▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
train_precision,▆▁▁▂▃▄▄▅▅▅▆▆▆▆▆▆▆▇▇▆▆▇▇▇▇▇▇█████████████
train_recall,▁▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██
val_accuracy,▄▃▄▄▅▆▇▆▇███▇▇▆▆▆▆▆▆▆▅▅▅▅▅▄▃▃▃▂▃▃▃▃▂▂▂▁▁
val_f1,▁▄▅▅▆▇▇▆▇███▇▇▇▇▇▆▆▆▆▅▅▅▅▅▅▄▄▄▃▄▄▄▄▄▃▃▃▃
val_loss,▂▃▂▂▂▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇▇▇▇█████
val_precision,█▂▃▃▃▄▄▄▄▅▅▅▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁▂▂▂▂▁▁▁▁▁

0,1
epoch,1000.0
train_accuracy,0.77514
train_f1,0.41479
train_loss,0.1738
train_precision,0.8808
train_recall,0.41979
val_accuracy,0.57282
val_f1,0.25959
val_loss,0.31055
val_precision,0.75158




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▄▅▅▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
train_f1,▁▃▄▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██
train_loss,█▆▆▆▅▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▆▆▇▇▇▇████████████████████▁▁▃▃▅▅▄▄▄▄▄▄▄▄
train_recall,▁▂▃▄▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█
val_accuracy,▁▂▂▂▄▅▆▆▆▇█▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████
val_f1,▁▃▃▃▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇██████
val_loss,█▆▆▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,█▃▃▃▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▁▁▁▃▃▃▃▃▃

0,1
epoch,500.0
train_accuracy,0.60649
train_f1,0.2501
train_loss,0.25762
train_precision,0.77359
train_recall,0.25865
val_accuracy,0.62136
val_f1,0.26774
val_loss,0.25708
val_precision,0.81755




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▃▄▄▅▆▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
train_f1,▁▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▆▆▇▇▇▇▇▇▇▇▇▇▇█████████
train_loss,█▇▇▆▅▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▄▄▅▆▇▇█▇▇▇██████▁▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃
train_recall,▁▂▂▂▃▃▄▃▃▃▄▄▄▄▄▄▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████
val_accuracy,▁▁▂▄▅▄▅▅▅▅▅▆▆▆▆▇█▇████▇▇▆▆▆▆▆▆▆▆▆▅▅▅▅▆▆▆
val_f1,▁▁▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▇▇██▇▇▇▆▇███▇▇▇▇▇▇▇█▇▇
val_loss,█▇▆▅▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▆▅▆▆▇▇▇▇▇▇▇▇▇▇▇██▁▅▅▆▆▆▃▃▃▄▄▄▄▄▃▃▃▃▃▃▃▃▃

0,1
epoch,1000.0
train_accuracy,0.64973
train_f1,0.3196
train_loss,0.24236
train_precision,0.81324
train_recall,0.32658
val_accuracy,0.60194
val_f1,0.27637
val_loss,0.2564
val_precision,0.77108




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▄▄▄▅▅▅▅▆▆▇▇▇▇▇████████
train_f1,▁▂▂▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▅▅▅▆▆▆▆▇▇▇████████████
train_loss,█▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
train_precision,█▂▃█████████▂▁▁▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅
train_recall,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▃▃▄▄▄▅▅▆▆▇▇▇▇▇████████
val_accuracy,▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▂▂▄▄▄▄▅▅▆▇▇▇▇▇▇▇▇██████
val_f1,▁▂▁▁▁▁▁▁▁▁▁▁▁▁▂▃▄▄▄▅▆▆▆▆▆▇▇▇▇▇██████████
val_loss,█▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_precision,█▅██████████▁▁▄▆▅▅▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆

0,1
epoch,500.0
train_accuracy,0.5773
train_f1,0.21019
train_loss,0.26813
train_precision,0.8579
train_recall,0.23335
val_accuracy,0.59223
val_f1,0.21296
val_loss,0.27051
val_precision,0.86321




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▄▅▆▆▇▇█████████████████
train_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▄▅▆▇▇▇██████████████████
train_loss,█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▅▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
train_precision,███████████████▁▁▂▃▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
train_recall,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▄▅▆▇▇▇█████████████████
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▄▅▆▆▇██▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇
val_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▅▆▆▇▇██▇▇▇▇█████████████
val_loss,█▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▅▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,███████████████▁▄▃▄▅▅▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
epoch,1000.0
train_accuracy,0.59459
train_f1,0.21757
train_loss,0.2601
train_precision,0.86528
train_recall,0.24088
val_accuracy,0.60194
val_f1,0.2171
val_loss,0.26754
val_precision,0.86692




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇██████████████
train_f1,▁▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▆▆▇▇▇▇▇▇███████████████
train_loss,█▆▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▁▇▇▇▇▇██████████▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
train_recall,▁▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▆▆▆▆▇▇▇▇▇▇█████████████
val_accuracy,▁▃▃▄▄▅▅▆▆▆▅▅▆▆▆▆▆▇▇█▇▇▇▇▇▇▇█████████████
val_f1,▁▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▅▆▆▇▆▇▇▇▇▇▇█████████████
val_loss,█▆▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▃▅▅▆▇▇▇██▇▇▇████▂▃▃▃▁▂▂▃▃▃▃▄▄▄▄▄▄▄▄▃▃▃▃▃

0,1
epoch,500.0
train_accuracy,0.64216
train_f1,0.31651
train_loss,0.24647
train_precision,0.81636
train_recall,0.31886
val_accuracy,0.64078
val_f1,0.30499
val_loss,0.2544
val_precision,0.80164




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▃▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████
train_f1,▁▂▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████
train_loss,█▆▅▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
train_precision,▁▃▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
train_recall,▁▂▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇███████████████
val_accuracy,▁▄▆▆▆▆▆▆▆▆▇▇▇█▇▇▇███████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
val_f1,▁▃▄▅▄▄▄▄▄▄▅▅▅▆▆▆▆███████████████████████
val_loss,█▆▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▁▃▄▄▄▄▄▄▄▄▄▄▅▆▆▆▇███████████████████████

0,1
epoch,1000.0
train_accuracy,0.6573
train_f1,0.32338
train_loss,0.23378
train_precision,0.82202
train_recall,0.32675
val_accuracy,0.66019
val_f1,0.31032
val_loss,0.24422
val_precision,0.80481




0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_accuracy,▁▄▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███▇████████████████
train_f1,▁▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇█████████████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▅▇▇████████▁▃▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
train_recall,▁▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇█████████████
val_accuracy,▁▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████▇▇▇
val_f1,▁▂▃▄▄▄▄▄▄▄▄▄▄▅▆▆▇▇▇███████████████████▇▇
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_precision,▆▇▇█████████▁▃▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
epoch,500.0
train_accuracy,0.64108
train_f1,0.3053
train_loss,0.24221
train_precision,0.8043
train_recall,0.30867
val_accuracy,0.61165
val_f1,0.28505
val_loss,0.24932
val_precision,0.7899


## Printing Classification Report on the test set

In [14]:
# print the classification report on the test set
mlp_model = MultiLayerPerceptronClassifier(X_train, y_train, X_val, y_val, hidden_layers=[6, 6], activation_function="tanh", learning_rate=0.01, optimizer="sgd", log_to_wandb=False)
mlp_model.train(epochs=1000)
y_pred = mlp_model.predict(X_test)

Epoch 1/1000 - Training Loss: 0.3457, Training Accuracy: 0.48%
Epoch 2/1000 - Training Loss: 0.3169, Training Accuracy: 0.54%
Epoch 3/1000 - Training Loss: 0.3051, Training Accuracy: 0.56%
Epoch 4/1000 - Training Loss: 0.2965, Training Accuracy: 0.57%
Epoch 5/1000 - Training Loss: 0.2898, Training Accuracy: 0.58%
Epoch 6/1000 - Training Loss: 0.2846, Training Accuracy: 0.59%
Epoch 7/1000 - Training Loss: 0.2807, Training Accuracy: 0.59%
Epoch 8/1000 - Training Loss: 0.2778, Training Accuracy: 0.59%
Epoch 9/1000 - Training Loss: 0.2755, Training Accuracy: 0.59%
Epoch 10/1000 - Training Loss: 0.2737, Training Accuracy: 0.59%
Epoch 11/1000 - Training Loss: 0.2722, Training Accuracy: 0.59%
Epoch 12/1000 - Training Loss: 0.2709, Training Accuracy: 0.60%
Epoch 13/1000 - Training Loss: 0.2698, Training Accuracy: 0.60%
Epoch 14/1000 - Training Loss: 0.2688, Training Accuracy: 0.60%
Epoch 15/1000 - Training Loss: 0.2680, Training Accuracy: 0.60%
Epoch 16/1000 - Training Loss: 0.2674, Training A

In [15]:
print("Classification Report on Test Set")
print(classification_report(y_test, y_pred, zero_division=1))

Classification Report on Test Set
              precision    recall  f1-score   support

           1       1.00      0.00      0.00         2
           2       0.67      0.77      0.71        47
           3       0.65      0.58      0.61        52
           4       0.40      0.46      0.43        13
           5       1.00      0.00      0.00         1

    accuracy                           0.63       115
   macro avg       0.74      0.36      0.35       115
weighted avg       0.64      0.63      0.62       115



## Comparing MLP Classifier and Logistic Regression

### Using Logistic Regression:
Best Accuracy on the val set: 61.17%
Best Accuracy on the train set: 57.3%
### Using MLP Classifier:
Best Accuracy on the val set: 66%
Best Accuracy on the train set: 77.5%

## Advertisement Dataset

In [18]:
advertisement_dataset = pd.read_csv('advertisement.csv')
advertisement_dataset.head()

Unnamed: 0,age,gender,income,education,married,children,city,occupation,purchase_amount,most bought item,labels
0,45,Male,61271.953359,Master,False,3,Lake Sheila,Doctor,87.697118,monitor,electronics clothing sports
1,24,Female,53229.101074,High School,False,1,Crystalburgh,Businessman,115.135586,lipstick,furniture beauty
2,45,Female,30066.046684,Bachelor,True,3,Margaretburgh,Engineer,101.694559,biscuits,clothing electronics food sports
3,19,Male,48950.246384,PhD,False,0,Williamshaven,Lawyer,97.964887,maggi,food
4,29,Female,44792.627094,Master,False,0,New Paul,Businessman,86.847281,carpet,home


wandb: Network error (ConnectionError), entering retry loop.
