In [1]:
import sys
import os

# Add the src directory to the Python path
src_path = os.path.join(os.getcwd(), 'src')
if src_path not in sys.path:
    sys.path.append(src_path)

In [2]:
import numpy as np
import pandas as pd
from itertools import product
from sklearn.model_selection import train_test_split
from src.optimizers import *
from src.activation_functions import * 
from src.utils import *
from src.model_regularization import *
from src.layer import *
np.random.seed(0)


#### Data pre-processing for MONK Datasets 

In [3]:
########################################################
### Don't forget to change the path to the data file ###
########################################################

df = pd.read_csv("../ML_project/data/Monk_3/monks-3.train", names=[0,1,2,3,4,5,6,"index"], delimiter= " ")
# df = pd.read_csv("../ML_project/data/Monk_2/monks-2.train",
#                  names=[0, 1, 2, 3, 4, 5, 6, "index"], delimiter=" ")
df.set_index("index", inplace=True)
y = df.iloc[:, 0]  # First column as target
X = df.iloc[:, 1:]  # All other columns as features
for i in range(1, X.shape[1]):
    X.iloc[:, i] = (X.iloc[:, i] - np.mean(X.iloc[:, i])) / \
        np.std(X.iloc[:, i])
X.head()

data_2     -1.255411
data_3     -1.255411
data_4     -1.255411
data_5     -1.255411
data_7     -1.255411
              ...   
data_420    1.214913
data_422    1.214913
data_425    1.214913
data_430    1.214913
data_432    1.214913
Name: 2, Length: 122, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X.iloc[:, i] = (X.iloc[:, i] - np.mean(X.iloc[:, i])) / \
data_2     -0.936442
data_3     -0.936442
data_4     -0.936442
data_5     -0.936442
data_7     -0.936442
              ...   
data_420    1.067872
data_422    1.067872
data_425    1.067872
data_430    1.067872
data_432    1.067872
Name: 3, Length: 122, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X.iloc[:, i] = (X.iloc[:, i] - np.mean(X.iloc[:, i])) / \
data_2     -1.245633
data_3     -1.245633
data_4     -1.245633
data_5     -1.245633
data_7     -1.245633
              ...   
data_420   -0.039544
data_422   -0.039544


Unnamed: 0_level_0,1,2,3,4,5,6
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
data_2,1,-1.255411,-0.936442,-1.245633,-1.30298,0.967733
data_3,1,-1.255411,-0.936442,-1.245633,-0.419849,-1.033342
data_4,1,-1.255411,-0.936442,-1.245633,-0.419849,0.967733
data_5,1,-1.255411,-0.936442,-1.245633,0.463282,-1.033342
data_7,1,-1.255411,-0.936442,-1.245633,1.346413,-1.033342


In [4]:
########################################################
### Don't forget to change the path to the data file ###
########################################################

test_data = pd.read_csv("../ML_project/data/Monk_3/monks-3.test",
names=[0, 1, 2, 3, 4, 5, 6, "index"], delimiter=" ")
test_data.set_index("index", inplace=True)
# test_data.head()
y_test = test_data.iloc[:, 0]
X_test = test_data.iloc[:, 1:]
for i in range(1, X_test.shape[1]):
    X_test.iloc[:, i] = (X_test.iloc[:, i] - np.mean(X_test.iloc[:, i])) / np.std(X_test.iloc[:, i])

data_1     -1.224745
data_2     -1.224745
data_3     -1.224745
data_4     -1.224745
data_5     -1.224745
              ...   
data_428    1.224745
data_429    1.224745
data_430    1.224745
data_431    1.224745
data_432    1.224745
Name: 2, Length: 432, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X_test.iloc[:, i] = (X_test.iloc[:, i] - np.mean(X_test.iloc[:, i])) / np.std(X_test.iloc[:, i])
data_1     -1.0
data_2     -1.0
data_3     -1.0
data_4     -1.0
data_5     -1.0
           ... 
data_428    1.0
data_429    1.0
data_430    1.0
data_431    1.0
data_432    1.0
Name: 3, Length: 432, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X_test.iloc[:, i] = (X_test.iloc[:, i] - np.mean(X_test.iloc[:, i])) / np.std(X_test.iloc[:, i])
data_1     -1.224745
data_2     -1.224745
data_3     -1.224745
data_4     -1.224745
data_5     -1.224745
              ...   
data_428    1.22474

In [5]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shape of the resulting datasets
print("Training Features Shape:", X_train.shape)
print("Validation Features Shape:", X_val.shape)
print("Training Target Shape:", y_train.shape)
print("Validation Target Shape:", y_val.shape)

Training Features Shape: (97, 6)
Validation Features Shape: (25, 6)
Training Target Shape: (97,)
Validation Target Shape: (25,)


In [6]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)

        if len(y_true.shape) == 1:
            correct_confidence = y_pred_clipped[range(samples), y_true]

        elif len(y_true.shape) == 2:
            correct_confidence = np.sum(y_pred_clipped * y_true, axis=1) 

        negative_log_likelihoods = np.log(correct_confidence)
        return negative_log_likelihoods

    def backward(self, dvalues, y_true):

        # Number of samples
        samples = len(dvalues)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len(dvalues[0])

        # If labels are sparse, turn them into one-hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        # Calculate gradient
        self.dinputs = -y_true / dvalues
        # Normalize gradient
        self.dinputs = self.dinputs / samples


class Activation_Softmax_Loss_CategoricalCrossentropy():

    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()


    def forward(self, inputs, y_true):
        # Output layer's activation function
        self.activation.forward(inputs)

        # Set the output
        self.output = self.activation.output

        return self.loss.calculate(self.output, y_true)

    def backward(self, dvalues, y_true):
        samples = len(dvalues)

        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)

        self.dinputs = dvalues.copy()

        self.dinputs[range(samples), y_true] -= 1

        self.dinputs = self.dinputs / samples

class MSE:
    def __init__(self):
        self.dinputs = None
        self.output = None
        
    def forward(self, y_pred, y_true):
        # Remove the shape condition - always calculate loss
        self.output = np.mean((y_pred - y_true)**2)
        return self.output
    
    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        self.dinputs = 2 * (dvalues - y_true) / samples

In [7]:
class NN:
    def __init__(self, l1, l2, input_size, hidden_sizes, output_size, 
                 hidden_activations=None, dropout_rates=None, output_activation=Activation_Sigmoid()):
        self.layers = []
        prev_size = input_size
        
        # Default to ReLU if no activations specified
        if hidden_activations is None:
            hidden_activations = [Activation_ReLU() for _ in hidden_sizes]
        
        # Default to no dropout
        if dropout_rates is None:
            dropout_rates = [0.0] * len(hidden_sizes)
            
        # Create hidden layers
        for size, activation, rate in zip(hidden_sizes, hidden_activations, dropout_rates):
            self.layers.append(Layer_Dense(prev_size, size,l1=l1, l2=l2))
            self.layers.append(activation())
            if rate > 0:
                self.layers.append(Dropout(rate))
            prev_size = size
        
        # Output layer (no activation)
        self.layers.append(Layer_Dense(prev_size, output_size))
        # self.layers.append(output_activation)
        
    def forward(self, inputs, training=True):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                layer.forward(inputs, training)
            else:
                layer.forward(inputs)
            inputs = layer.output
        self.output = inputs

In [8]:
# def train_and_evaluate(learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation):
#     # Initialize components
#     model = NN(
#         l1=l1,
#         l2=l2,
#         input_size=6,
#         hidden_sizes=[10],
#         output_size=2,
#         hidden_activations=[activation],
#         dropout_rates=[dropout_rate]
#     )
    
#     loss_activation = MSE()
#     optimizer = Optimizer_Adam(learning_rate=learning_rate, decay=1e-3)

#     train_losses = []
#     train_accuracies = []
#     val_losses = []
#     val_accuracies = []

#     for epoch in range(n_epochs):
#         batch_losses = []
#         batch_accuracies = []
        
#         for X_batch, y_batch in create_batches(X_train, y_train, batch_size):
#             # Forward pass through model
#             model.forward(X_batch, training=True)
            
#             # Calculate loss through separate loss activation
#             loss = loss_activation.forward(model.output, y_batch)
#             # print(y_batch.shape)
#             # Calculate accuracy
#             predictions = np.argmax(loss_activation.output, axis=1)
#             accuracy = np.mean(predictions == y_batch)
            
#             # Backward pass
#             loss_activation.backward(loss_activation.output, y_batch)
#             dvalues = loss_activation.dinputs
            
#             # Propagate gradients through model layers in reverse
#             for layer in reversed(model.layers):
#                 layer.backward(dvalues)
#                 dvalues = layer.dinputs
                
#             # Update parameters
#             optimizer.pre_update_params()
#             for layer in model.layers:
#                 if isinstance(layer, Layer_Dense):
#                     optimizer.update_params(layer)
#             optimizer.post_update_params()
            
#             batch_losses.append(loss)
#             batch_accuracies.append(accuracy)

#         # Epoch metrics
#         epoch_loss = np.mean(batch_losses)
#         epoch_accuracy = np.mean(batch_accuracies)
#         train_losses.append(epoch_loss)
#         train_accuracies.append(epoch_accuracy)

#         # Validation pass
#         model.forward(X_val, training=False)
#         val_loss = loss_activation.forward(model.output, y_val)
#         val_predictions = np.argmax(loss_activation.output, axis=1)
#         val_accuracy = np.mean(val_predictions == y_val)
#         val_losses.append(val_loss)
#         val_accuracies.append(val_accuracy)

#     return val_accuracies[-1]

In [9]:
def accuracy(target, output):
    predictions = np.round(output.squeeze())
    return np.mean(predictions == target.squeeze()) * 100

In [10]:
def train_and_evaluate(learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation):
    # Initialize components
    model = NN(
        l1=l1,
        l2=l2,
        input_size=6,
        hidden_sizes=[10],
        output_size=1,
        hidden_activations=[activation],
        dropout_rates=[dropout_rate],
        output_activation=Activation_Sigmoid()
    )
    
    loss_function = MSE()
    optimizer = Optimizer_Adam(learning_rate=learning_rate, decay=1e-3)

    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    for epoch in range(n_epochs):
        batch_losses = []
        batch_accuracies = []
        
        for X_batch, y_batch in create_batches(X_train, y_train, batch_size):
            # Forward pass
            model.forward(X_batch, training=True)
            
            # Calculate loss
            loss = loss_function.forward(model.output, y_batch)
            
            predictions = np.round(model.output.squeeze())
            accuracy = np.mean(predictions == y_batch.squeeze())

            # Backward pass with shape validation
            loss_function.backward(model.output, y_batch)
            dvalues = loss_function.dinputs
            
            # Verify gradient shape matches output
            assert dvalues.shape == model.output.shape, \
                f"Gradient shape mismatch: {dvalues.shape} vs {model.output.shape}"
            
            # Propagate gradients
            for layer in reversed(model.layers):
                layer.backward(dvalues)
                dvalues = layer.dinputs
                
                # Ensure numpy arrays
                if isinstance(dvalues, pd.DataFrame):
                    dvalues = dvalues.values
                elif isinstance(dvalues, pd.Series):
                    dvalues = dvalues.values.reshape(-1, 1)

            # Update parameters
            optimizer.pre_update_params()
            for layer in model.layers:
                if isinstance(layer, Layer_Dense):
                    optimizer.update_params(layer)
            optimizer.post_update_params()
            
            batch_losses.append(loss)
            batch_accuracies.append(accuracy)

        # Epoch metrics
        epoch_loss = np.mean(batch_losses)
        epoch_acc = np.mean(batch_accuracies)
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)

        # Validation pass
        model.forward(X_val.values if isinstance(X_val, pd.DataFrame) else X_val, 
                     training=False)
        val_loss = loss_function.forward(model.output, 
                                        y_val.values if isinstance(y_val, (pd.Series, pd.DataFrame)) else y_val)
        val_predictions = np.round(model.output.squeeze())
        val_accuracy = np.mean(val_predictions == y_val.squeeze())
        
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: "
                  f"Train Loss: {epoch_loss:.4f}, Acc: {epoch_acc*100:.2f}% | "
                  f"Val Loss: {val_loss:.4f}, Acc: {val_accuracy*100:.2f}%")
    # print(val_accuracies, val_accuracies[-1])
    return val_losses[-1], val_accuracies[-1]

In [11]:
# hyperparameter_grid = {
#     'learning_rate': [0.1,0.01,0.001, 0.003],
#     'l1': [0.0, 1e-5],
#     'l2': [0.0, 1e-4],
#     'dropout_rate': [0.1, 0.3, 0.5],
#     'batch_size': [1, 4, 16, 32],
#     'n_epochs': [50, 100, 200, 400],
#     'activation': [Activation_Sigmoid, Activation_Leaky_ReLU, Activation_Tanh]
# }

hyperparameter_grid = {
    'learning_rate': [0.1],
    'l1': [0.0, 1e-5],
    'l2': [0.0, 1e-4],
    'dropout_rate': [0.1,],
    'batch_size': [1,],
    'n_epochs': [50, 100],
    'activation': [Activation_Sigmoid]
}

In [12]:
# print(len(val_accuracies))

In [13]:
# Initialize variables to store the best hyperparameters and performance
best_hyperparams = []
best_performance = (-np.inf, -np.inf)  # Assuming we are maximizing validation accuracy

# Iterate over all combinations of hyperparameters
for params in product(*hyperparameter_grid.values()):
    # Unpack the hyperparameters
    learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation = params
    # Train and evaluate the model
    val_accuracy = train_and_evaluate(learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation)
    # Update the best hyperparameters if the current model is better
    if val_accuracy[1] > best_performance[1]:
        best_hyperparams.clear()
        best_performance = val_accuracy
        best_hyperparams.append({
            'learning_rate': learning_rate,
            'l1': l1,
            'l2': l2,
            'dropout_rate': dropout_rate,
            'batch_size': batch_size,
            'n_epochs': n_epochs,
            'activation': activation
        })
    elif val_accuracy[1] == 1.0:
        best_hyperparams.append({
            'learning_rate': learning_rate,
            'l1': l1,
            'l2': l2,
            'dropout_rate': dropout_rate,
            'batch_size': batch_size,
            'n_epochs': n_epochs,
            'activation': activation
        })

# Print the best hyperparameters and performance
print("Best Hyperparameters:", best_hyperparams)
print("Best Validation Accuracy:", best_performance)

Epoch 0: Train Loss: 0.2889, Acc: 60.82% | Val Loss: 0.3414, Acc: 72.00%
Epoch 0: Train Loss: 0.2382, Acc: 63.92% | Val Loss: 0.3927, Acc: 80.00%
Epoch 0: Train Loss: 0.2392, Acc: 70.10% | Val Loss: 0.3317, Acc: 68.00%
Epoch 0: Train Loss: 0.2755, Acc: 64.95% | Val Loss: 0.3593, Acc: 60.00%
Epoch 0: Train Loss: 0.2734, Acc: 64.95% | Val Loss: 0.3664, Acc: 68.00%
Epoch 0: Train Loss: 0.2238, Acc: 69.07% | Val Loss: 0.3600, Acc: 76.00%
Epoch 0: Train Loss: 0.2892, Acc: 59.79% | Val Loss: 0.2930, Acc: 80.00%
Epoch 0: Train Loss: 0.2893, Acc: 58.76% | Val Loss: 0.4467, Acc: 52.00%
Best Hyperparameters: [{'learning_rate': 0.1, 'l1': 1e-05, 'l2': 0.0, 'dropout_rate': 0.1, 'batch_size': 1, 'n_epochs': 50, 'activation': <class 'src.activation_functions.Activation_Sigmoid'>}]
Best Validation Accuracy: (0.476158113033482, 0.96)


In [14]:
print(best_hyperparams)

[{'learning_rate': 0.1, 'l1': 1e-05, 'l2': 0.0, 'dropout_rate': 0.1, 'batch_size': 1, 'n_epochs': 50, 'activation': <class 'src.activation_functions.Activation_Sigmoid'>}]


In [15]:
# final_model_performance = train_and_evaluate(**best_hyperparams)
# print("Final Model Performance:", final_model_performance)

In [16]:
# learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation = best_hyperparams.values()
# learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation

In [17]:
test_accuracies = []

In [18]:
for i in best_hyperparams: 
    learning_rate, l1, l2, dropout_rate, batch_size, n_epochs, activation = i.values()
    model = NN(
        l1=l1,
        l2=l2,
        input_size=6,
        hidden_sizes=[10],
        output_size=1,
        hidden_activations=[activation],
        dropout_rates=[dropout_rate]
    )
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    # early_stopping = EarlyStopping(
    #     patience=20,
    #     min_delta_loss=0.0001,
    #     min_delta_accuracy=0.0001,
    #     restore_best_weights=True
    # )
    # loss_activation = MSE()
    loss_function = MSE()
    optimizer = Optimizer_Adam(learning_rate=learning_rate)
    # Before training loop:
    print("Data shapes:")
    print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Sample prediction: {model.forward(X_train[:1])}")  # Should output ~0.5
    print(f"Initial loss: {loss_function.forward(model.output, y_train[:1].squeeze())}")
    # Training loop
    for epoch in range(n_epochs):
        batch_losses = []
        batch_accuracies = []
        
        for X_batch, y_batch in create_batches(X_train, y_train, batch_size):
            # Forward pass
            model.forward(X_batch, training=True)
            
            # Calculate loss
            loss = loss_function.forward(model.output, y_batch)
            
            predictions = np.round(model.output.squeeze())
            accuracy = np.mean(predictions == y_batch.squeeze())

            # Backward pass with shape validation
            loss_function.backward(model.output, y_batch)

            max_grad = max(
                np.max(np.abs(layer.dweights)) 
                for layer in model.layers 
                if isinstance(layer, Layer_Dense)
            )
            print(f"Max gradient: {max_grad:.4f}")
            dvalues = loss_function.dinputs
            
            # Verify gradient shape matches output
            assert dvalues.shape == model.output.shape, \
                f"Gradient shape mismatch: {dvalues.shape} vs {model.output.shape}"
            
            # Propagate gradients
            for layer in reversed(model.layers):
                layer.backward(dvalues)
                dvalues = layer.dinputs
                
                # Ensure numpy arrays
                if isinstance(dvalues, pd.DataFrame):
                    dvalues = dvalues.values
                elif isinstance(dvalues, pd.Series):
                    dvalues = dvalues.values.reshape(-1, 1)

            # Update parameters
            optimizer.pre_update_params()
            for layer in model.layers:
                if isinstance(layer, Layer_Dense):
                    optimizer.update_params(layer)
            optimizer.post_update_params()
            
            batch_losses.append(loss)
            batch_accuracies.append(accuracy)

        # Epoch metrics
        epoch_loss = np.mean(batch_losses)
        epoch_acc = np.mean(batch_accuracies)
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        print(epoch_loss, epoch_acc)
        # Validation pass
        model.forward(X_val.values if isinstance(X_val, pd.DataFrame) else X_val, 
                        training=False)
        # print(model.output)
        val_loss = loss_function.forward(model.output, y_val.values if isinstance(y_val, (pd.Series, pd.DataFrame)) else y_val)
        val_predictions = np.round(model.output.squeeze())
        val_accuracy = np.mean(val_predictions == y_val.squeeze())
        
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
    # print(len(val_accuracies), len(train_accuracies))
    plot_accuracies(train_accuracies, val_accuracies, label1="Training Accuracies", label2="Validation Accuracies", title="Accuracy Over Epochs")
    plot_losses(train_losses, val_losses, label1="Training Loss", label2="Validation Loss", title="Loss Over Epochs")
    
    model.forward(X_test, training=False)
    # Compute softmax probabilities for the test output
    # print(X_test.shape, y_test.shape)
    # print(model.output, y_test)
    loss_function.forward(model.output.squeeze(), y_test) 
    # Calculate accuracy for the test set
    predictions = np.round(model.output.squeeze())
    if len(y_test.shape) == 2:
        y_true = np.argmax(y_test, axis=1) 
    else:
        y_true = y_test

    # Compute test accuracy
    test_accuracy = np.mean(predictions == y_true)
    test_accuracies.append(test_accuracy)
    print(f"Test Accuracy: {test_accuracy:.4f}")

Data shapes:
X_train: (97, 6), y_train: (97,)
Sample prediction: None
Initial loss: 0.001363589250398477


TypeError: bad operand type for abs(): 'NoneType'

In [34]:
print(train_losses, val_losses) 

[0.0, 0.3408472197476576, 0.39220134100533427, 0.3517592001330052, 0.359354626973759, 0.445301660384225, 0.46967723340757234, 0.30671469885389113, 0.3675657169746863, 0.437299500739877, 0.5756837948880944, 0.5379057026864668, 0.4683187149758564, 0.4278020091041673, 0.40494186152012446, 0.453661344935394, 0.4523533935901557, 0.539615405863994, 0.3614852711991134, 0.39071796202154496, 0.3841183670576393, 0.4307056684828311, 0.3853687111984917, 0.36221899390770357, 0.33369875162904833, 0.560752678750922, 0.45625943946873454, 0.4556209647444425, 0.4685704252473555, 0.4492434175558663, 0.4503054233621944, 0.40910254011228997, 0.5036253061968651, 0.4382209674054665, 0.5001077702966245, 0.43038748428287243, 0.37163782020395264, 0.3436477572927342, 0.4054560148225245, 0.3513557713835197, 0.3763797134081373, 0.42860558878257576, 0.4171505740806912, 0.3762509532990682, 0.4299824560405011, 0.48987450584562425, 0.35424650418614084, 0.36580797117384034, 0.43854379546599465, 0.3182861872076362, 0.43

In [35]:
print(np.argmax(test_accuracies))
print(test_accuracies[15])
print(len(test_accuracies))
print(best_hyperparams[15])

0


IndexError: list index out of range

In [46]:
model.forward(X_test, training=False)
# Compute softmax probabilities for the test output
# print(X_test.shape, y_test.shape)
# print(model.output, y_test)
loss_function.forward(model.output.squeeze(), y_test) 
# Calculate accuracy for the test set
predictions = np.round(model.output.squeeze())
if len(y_test.shape) == 2:
    y_true = np.argmax(y_test, axis=1) 
else:
    y_true = y_test

# Compute test accuracy
test_accuracy = np.mean(predictions == y_true)
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.8727


(0.1, 1e-05, 0.0, 0.3, 16, 100, src.activation_functions.Activation_Leaky_ReLU) MONK2 Cross entropy ADAM

(0.1, 0.0, 0.0, 0.1, 16, 200, src.activation_functions.Activation_Tanh) MONK3 Cross entropy ADAM
test_result: 0.9144

MONK1:
    
    
MONK2:



MONK3:
Cross Entropy: 
(0.1, 0.0, 0.0, 0.1, 1, 100, src.activation_functions.Activation_Sigmoid) RMSprop
Test Accuracy: 0.9213
(0.1, 0.0, 0.0, 0.1, 1, 400, src.activation_functions.Activation_Sigmoid) Adam
Test Accuracy: 0.9606

MSE: 
{'learning_rate': 0.1, 'l1': 1e-05, 'l2': 0.0001, 'dropout_rate': 0.1, 'batch_size': 4, 'n_epochs': 50, 'activation': <class 'src.activation_functions.Activation_Tanh'>} 

In [20]:
# n_epochs = 350
# batch_size = 1


# train_losses = []
# train_accuracies = []
# val_losses = []
# val_accuracies = []

# # early_stopping = EarlyStopping(
# #     patience=20,
# #     min_delta_loss=0.0001,
# #     min_delta_accuracy=0.0001,
# #     restore_best_weights=True
# # )

# # Training loop
# for epoch in range(n_epochs):
#     batch_losses = []
#     batch_accuracies = []
    
#     # Mini-batch training
#     for X_batch, y_batch in create_batches(X_train, y_train, batch_size):
#         # Forward pass
#         dense1.forward(X_batch)
#         activation4.forward(dense1.output)
#         dense2.forward(activation4.output)
#         loss = loss_activation.forward(dense2.output, y_batch)
        
#         # Calculate accuracy for this batch
#         predictions = np.argmax(loss_activation.output, axis=1)
#         if len(y_batch.shape) == 2:
#             y_true = np.argmax(y_batch, axis=1)
#         else:
#             y_true = y_batch
#         accuracy = np.mean(predictions == y_true)
        
#         # Backward pass
#         loss_activation.backward(loss_activation.output, y_batch)
#         dense2.backward(loss_activation.dinputs)
#         activation4.backward(dense2.dinputs)
#         dense1.backward(activation4.dinputs)
        
#         # Update weights and biases
#         optimizer.pre_update_params()
#         optimizer.update_params(dense1)
#         optimizer.update_params(dense2)
#         optimizer.post_update_params()
        
#         batch_losses.append(loss)
#         batch_accuracies.append(accuracy)
    
#     # Calculate epoch-level training metrics
#     epoch_loss = np.mean(batch_losses)
#     epoch_accuracy = np.mean(batch_accuracies)
#     train_losses.append(epoch_loss)
#     train_accuracies.append(epoch_accuracy)

#     # Validation pass (entire validation dataset)
#     dense1.forward(X_val)
#     activation4.forward(dense1.output)
#     dense2.forward(activation4.output)
#     val_loss = loss_activation.forward(dense2.output, y_val)
    
#     # Calculate validation accuracy
#     val_predictions = np.argmax(loss_activation.output, axis=1)
#     if len(y_val.shape) == 2:
#         y_val_true = np.argmax(y_val, axis=1)
#     else:
#         y_val_true = y_val
#     val_accuracy = np.mean(val_predictions == y_val_true)
    
#     # Append validation metrics
#     val_losses.append(val_loss)
#     val_accuracies.append(val_accuracy)
    
#     # early_stopping.on_epoch_end(
#     #     current_loss=val_loss,
#     #     current_accuracy=val_accuracy,
#     #     model=[dense1, dense2], 
#     #     epoch=epoch
#     # )
#     # if early_stopping.stop_training:
#     #     print(f"Early stopping at epoch {epoch}")
#     #     break
    
#     # Print progress
#     if not epoch % 100:
#         print(f"epoch: {epoch}, "
#               f"train_acc: {epoch_accuracy:.3f}, train_loss: {epoch_loss:.3f}, "
#               f"val_acc: {val_accuracy:.3f}, val_loss: {val_loss:.3f}, "
#               f"learning_rate: {optimizer.current_learning_rate}")

# plot_accuracies(train_accuracies, val_accuracies, label1="Training Accuracies", label2="Validation Accuracies", title="Accuracy Over Epochs")
# plot_accuracies(train_losses, val_losses, label1="Training Loss", label2="Validation Loss", title="Loss Over Epochs")