In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import os

# Add the src directory to the Python path
src_path = os.path.join(os.getcwd(), 'src')
if src_path not in sys.path:
    sys.path.append(src_path)

In [None]:
import ast
import csv
import itertools
from itertools import product
import numpy as np
import pandas as pd

from loss_functions import *
from src.activation_functions import *
from src.batch_normalization import *
from src.data_preprocessing import *
from src.cascade_correlation import CascadeCorrelation
from src.k_fold_cross_validation import *
from src.layer import *
from src.early_stopping import EarlyStopping
from src.neural_network import *
from src.optimizers import *
from src.random_search import *
from src.train_and_evaluate import Train
from src.utils import *

np.random.seed(0)

<!-- #### Data pre-processing for MONK Datasets  -->

In [None]:
MONK_NUM=1
X_train, y_train = load_data(MONK_NUM=MONK_NUM)
X_test, y_test = load_data(MONK_NUM=MONK_NUM, train=False)

In [None]:
X_train, X_val = train_test_split(X_train)
y_train, y_val = train_test_split(y_train)
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
X_val = np.asarray(X_val)
y_val = np.asarray(y_val)
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)

In [None]:
print("Training set size:", len(X_train))
print("Validation set size:", len(X_val))

In [None]:
# Print the shape of the resulting datasets
for _ in [X_train, X_val, y_train, y_val]:
    print(f"the shape: ", _.shape)

In [None]:
param_distributions = {
    'learning_rate': np.logspace(-3, -1, num=30).tolist(),
    'l1': np.logspace(-5, -1, num=20).tolist(),
    'l2': np.logspace(-5, -1, num=20).tolist(),
    'dropout_rate': np.logspace(-5, -1, num=20).tolist(),
    'batch_size': [8, 16, 32],
    'n_epochs': [50, 100],
    'hidden_size': [3, 4, 5, 6],
    'hidden_activation': [Activation_Tanh, Activation_Leaky_ReLU, Activation_Sigmoid, Activation_ReLU],
    'batch_norm': [False],
    'weight_decay': [0, 5e-2, 1e-2, 1e-3, 1e-5],
    'patience': [10, 30, 50],
    'n_h_layers': [1],
    'weights_init': ['he'],
    'output_activation': [Activation_Sigmoid()],
    'sched_decay': [1]
}

print(f"Number of possible combinations: {count_permutations(param_distributions)}")

In [None]:
best_hyperparams, best_performance = random_search(X_train=X_train, y_train=y_train, param_distributions=param_distributions, n_iters=500, csv_path=f"monk{MONK_NUM}_top5res.csv")  # adjust n_iters as needed

print(best_hyperparams)

In [None]:
print(best_hyperparams)

GRID SEARCH

In [None]:
if best_hyperparams['CC']:
    model = CascadeCorrelation(input_size = 17, output_size= 1, activation=Activation_Leaky_ReLU, output_activation = Activation_Sigmoid)
else:
    model = NN(
        l1=best_hyperparams['l1'],
        l2=best_hyperparams['l2'],
        input_size=17,
        hidden_size=best_hyperparams['hidden_size'],
        output_size=1,
        hidden_activation=best_hyperparams['hidden_activation'],
        dropout_rate=best_hyperparams['dropout_rate'],
        use_batch_norm=best_hyperparams['batch_norm'],
        n_h_layers=best_hyperparams['n_h_layers'],
        output_activation=best_hyperparams['output_activation'],
        weights_init=best_hyperparams['weights_init']
    )

train = Train(best_hyperparams, model)
train.train_and_evaluate(X_train, y_train, X_val, y_val)
train.test(X_test, y_test)
train.plot(score=True)

In [None]:
print(train.train_losses, train.val_losses)

In [None]:
print(f"Test Accuracy: {train.test_score:.4f}")

In [None]:
class EnsembleNN:
    def __init__(self, n_models=5):
        self.models = []
        self.n_models = n_models
        self.loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()
        

    def create_and_train_models(self, hyperparams):
        # Create and train multiple models with the same hyperparameters
        for i in range(self.n_models):
            model = NN(
                l1=hyperparams['l1'],
                l2=hyperparams['l2'],
                input_size=17,
                hidden_size=hyperparams['hidden_size'],
                output_size=1,
                hidden_activation=hyperparams['hidden_activation'],
                dropout_rate=hyperparams['dropout_rate'],
                use_batch_norm=hyperparams['batch_norm'],
                n_h_layers=hyperparams['n_h_layers']
            )
            print(f"Training model {i+1}/{self.n_models}")
            # Train model using existing train_and_evaluate function
            self.train = Train(hyperparams, model)
            model, val_accuracy = self.train.train_and_evaluate(
                X_train=X_train,
                y_train=y_train,
                X_val=X_val,
                y_val=y_val,
            )
            self.models.append(model)
            print(f"Model {i+1} validation accuracy: {val_accuracy:.4f}")

    def predict(self, X):
        """Make predictions using majority voting"""
        predictions = []
        for model in self.models:
            model.forward(X, training=False)
            self.loss_activation.forward(
                model.output, np.zeros((X.shape[0], 2)))  # Dummy y values
            pred = np.argmax(self.loss_activation.output, axis=1)
            predictions.append(pred)

        # Majority voting
        predictions = np.array(predictions)
        final_predictions = np.apply_along_axis(
            lambda x: np.bincount(x).argmax(),
            axis=0,
            arr=predictions
        )
        return final_predictions

In [None]:
ensemble = EnsembleNN(n_models=5)

ensemble.create_and_train_models(best_hyperparams)

_ , test_accuracy = ensemble.train.test(X_test, y_test)

print(f"Ensemble Test Accuracy: {test_accuracy:.4f}")

ensemble.train.plot(score=True)