In [675]:
import numpy as np
import random
from tqdm import tqdm
import itertools
from dataset import dataset
from joblib import Parallel, delayed
from additional_functions import process_all
from additional_functions import load_arrays_from_csv

# Learning Algorithms

In [676]:
class backprop_nn:
    def __init__(self, data: dataset, prediction_type_flag: str, hidden_layer_count=0, network_shape=[], hidden_node_count=1, epochs=100, momentum=.9, learning_rate=.01, batch_size=10):
        self.epochs = epochs
        self.momentum = momentum
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.hidden_layer_count = hidden_layer_count
        if hidden_layer_count == 0:
            hidden_node_count = 0
        else:
            hidden_node_count = [hidden_node_count] * hidden_layer_count
        self.tune_set = data.tune_set
        self.validate_set = data.validate_set
        self.prediction_type = prediction_type_flag

        if self.prediction_type == "classification":
            self.class_count = len(np.unique(self.tune_set[:,-1]))
        else:
            self.class_count = 0

        input_size = self.tune_set.shape[1] - 1
        self.input_size = input_size
        if network_shape == []:
            self.network_shape = [input_size] + (hidden_node_count if hidden_node_count else []) + ([self.class_count] if (self.prediction_type == "classification") else [1])
        else:
            self.network_shape = network_shape
        self.biases = []
        self.weights = []
        self.bias_velocity = []
        self.weight_velocity = []
    def init_weights_biases_momentum(self):
        '''
        Initializes weights randomly based on the network shape list
        '''
        self.biases = [np.random.randn(next_size, 1) for next_size in self.network_shape[1:]]
        self.weights = [np.random.randn(next_size, cur_size) for cur_size, next_size in zip(self.network_shape[:-1], self.network_shape[1:])]
        self.bias_velocity = [np.zeros(bias.shape) for bias in self.biases]
        self.weight_velocity = [np.zeros(weight.shape) for weight in self.weights]
    def for_prop(self, input: np):
        '''
        Feeds forward a single example through the network
        '''
        output = input
        for bias, weight in zip(self.biases[:-1], self.weights[:-1]):
            output = self.sigmoid(np.dot(weight, output) + bias)        #for each weight calculate the output of the activation function
                
        bias, weight = self.biases[-1], self.weights[-1]
        #For regression, use a linear combination for output activation
        #For classification, use a softmax output activation
        output = (np.dot(weight, output) + bias)    
        if self.prediction_type == "classification":
            output = self.softmax(output)
        return output
    def get_training_data(self, i: int):
        '''
        method needs to take in training data and compile 9 of the 10 folds (not fold I) into an array
        we then want to format the data as follows: each example = (attributes, label)
        I is used to indicate which fold is the hold out fold
        '''
        desired_data = np.concatenate([self.validate_set[j] for j in range(10) if j != i])  #Get all folds other than fold I and compile into its own array
        training_data = [(example[:-1], example[-1]) for example in desired_data]   #Format properly
        return training_data
    def get_testing_data(self, i: int):
        '''
        method needs to take in training data and compile 1 of the 10 folds (fold I) into an array
        Then format the data as follows: each example = (attributes, label)
        i is used to indicate which training set you want returned
        '''
        desired_data = self.validate_set[i]         #Get the test set
        testing_data = [(example[:-1], example[-1]) for example in desired_data] #Format properly
        return testing_data
    def get_tuning_data(self):
        '''
        method needs to take in the tuning set and properly format it
        Then format the data as follows: each example = (attributes, label)
        i is used to indicate which training set you want returned
        '''
        desired_data = self.tune_set  #Get the tuning set
        tuning_data = [(example[:-1], example[-1]) for example in desired_data] #Format properly
        return tuning_data
    def grad_desc(self, training_data, epochs, momentum, learning_rate, batch_size):
        # ***THINK OF THIS METHOD AS TRAINING VIA BACKPROPOGATION*** #
        '''
        Takes in a traing set from get_training_data. The format is a list of tuples, where each tuple
        represents an example. Within each tuple the first value is the feature vector and the second
        value is the label.

        We want to complete mini batch gradient descent

        This method effectively trains the model
        '''
        example_count = len(training_data)
        for epoch in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+batch_size] for k in range(0, example_count, batch_size)]         #Form the mini batches based on the batch size hyperparameters
            for mini_batch in mini_batches:
                self.update_weights(mini_batch, momentum, learning_rate)                                        #for each mini batch, update the weights
    def update_weights(self, mini_batch, momentum, learning_rate):
        '''
        This method takes in a mini batch and the momentum and learning rate hyperparameters. It needs to compute the gradient
        for the biases and the weights, use the gradients to calculate the velocities, and use the velocities to update the weights
        and biases.
        '''
        bias_gradient = [np.zeros(bias.shape) for bias in self.biases]          #initialize an empty array to store the gradients
        weight_gradient = [np.zeros(weight.shape) for weight in self.weights]

        # Compute gradients for the mini-batch
        for feature, label in mini_batch:           
            if not np.isnan(label):
                delta_bias_gradient, delta_weight_gradient = self.epoch(feature, label)
                bias_gradient = [gradient + delta for gradient, delta in zip(bias_gradient, delta_bias_gradient)]
                weight_gradient = [gradient + delta for gradient, delta in zip(weight_gradient, delta_weight_gradient)]
        
        # Update velocities and apply updates with momentum
        self.bias_velocity = [momentum * velocity - (learning_rate / len(mini_batch)) * gradient for velocity, gradient in zip(self.bias_velocity, bias_gradient)]
        self.weight_velocity = [momentum * velocity - (learning_rate / len(mini_batch)) * gradient for velocity, gradient in zip(self.weight_velocity, weight_gradient)]

        # Update weights and biases
        self.biases = [bias + velocity for bias, velocity in zip(self.biases, self.bias_velocity)]
        self.weights = [bias + velocity for bias, velocity in zip(self.weights, self.weight_velocity)]
    def epoch(self, feature, label):
        '''
        This method should complete one full cycle of forward propogation, loss calculation, calculate gradients based on loss
        and back propogation to update weights. 
        '''
        
        bias_gradient = [np.zeros(bias.shape) for bias in self.biases]
        weight_gradient = [np.zeros(weight.shape) for weight in self.weights]
        #forprop
        activation = feature
        activations = [feature] # list to store all the activations
        weighted_inputs = []
        for bias, weight in zip(self.biases[:-1], self.weights[:-1]):
            weighted_input = np.dot(weight, activation.reshape(-1,1)) + bias
            activation = self.sigmoid(weighted_input)
            weighted_inputs.append(weighted_input)
            activations.append(activation)
        # The output layer uses different activation functions
        bias, weight = self.biases[-1], self.weights[-1]
        weighted_input = np.dot(weight, activation.reshape(-1,1)) + bias
        #weighted_input = np.dot(weight, activation)
        activation = weighted_input
        if self.prediction_type == "classification":
            activation = self.softmax(weighted_input)
        weighted_inputs.append(weighted_input)
        activations.append(activation)

    


        # backprop
       
        if self.prediction_type == "classification":
            one_hot_label = [0] * self.class_count
            one_hot_label[int(label)] = 1
            one_hot_label = np.array(one_hot_label).reshape(-1, 1)
        else:
            one_hot_label = label       
        delta = (activations[-1] - one_hot_label) # * self.softmax(weighted_inputs[-1])
        bias_gradient[-1] = delta
        
        weight_gradient[-1] = np.dot(delta, activations[-2].reshape(1,-1))

        for layer_idx in range(2, len(self.network_shape)):
            weighted_input = weighted_inputs[-layer_idx]
            activation_prime = self.sigmoid_prime(weighted_input)
            delta = np.dot(self.weights[-layer_idx+1].transpose(), delta) * activation_prime
            
            if delta.shape == (1,1):
                bias_gradient[-layer_idx] = delta.reshape(-1)
                weight_gradient[-layer_idx] = (delta.reshape(-1) * activations[-layer_idx-1].transpose())
            else:
                bias_gradient[-layer_idx] = delta
                weight_gradient[-layer_idx] = np.dot(delta, activations[-layer_idx-1].reshape(1,-1))       
        return (bias_gradient, weight_gradient)
    def tune(self):
        '''
        The method should test the hyperparameter values below and return the combination of hyperparameters
        that result in the best performance for the algorithm.
        '''
        hidden_node_vals = [1, 3, 5, 7, 9]
        epoch_vals = [10, 50, 100, 200, 500]
        momentum_vals = [0.5, 0.7, 0.9, 0.95, 0.99]
        learning_rate_vals = [0.0001, 0.001, 0.01, 0.1, 1.0]
        batch_size_vals = [16, 32, 64, 128, 256]

        hidden_node_scores = []
        epoch_scores = []
        momentum_scores = []
        learning_rate_scores = []
        batch_size_scores = []

        # Hidden node Count Tuning
        '''
        If there is only one hidden layer try all five possible values and return the values that leads to best performance
        If there are two hiddens layers complete a grid search with the five possible values and return the combination that performs the best
        '''
        if (self.hidden_layer_count > 0):       #skip if there are no hidden layers
            hidden_node_combinations = list(itertools.product(hidden_node_vals, repeat=self.hidden_layer_count))
            for combination in tqdm(hidden_node_combinations, desc="Tuning Hidden Node Count", leave=False):
                self.network_shape = [self.input_size] + (list(combination)) + ([self.class_count] if (self.prediction_type == "classification") else [1])
                hidden_node_score = self.train_test(tuning_flag=True, epochs=self.epochs, momentum=self.momentum, learning_rate=self.learning_rate, batch_size=self.batch_size)
                hidden_node_scores.append(np.mean(hidden_node_score))
            hidden_node_scores = np.array(hidden_node_scores)
            if self.prediction_type == "classification":
                self.network_shape = [self.input_size] + (list(hidden_node_combinations[np.argmax(hidden_node_scores)])) + ([self.class_count] if (self.prediction_type == "classification") else [1])
            else:
                self.network_shape = [self.input_size] + (list(hidden_node_combinations[np.argmin(hidden_node_scores)])) + ([self.class_count] if (self.prediction_type == "classification") else [1])          
            print(f"Tuned Network Shape: {self.network_shape}")

        # Epoch tuning
        #Test all five epoch values above and return the value that performs the best
        for epochs in tqdm(epoch_vals, desc="Tuning Epochs", leave=False):
            epoch_score = self.train_test(tuning_flag=True, epochs=epochs, momentum=self.momentum, learning_rate=self.learning_rate, batch_size=self.batch_size)
            epoch_scores.append(np.mean(epoch_score))
        epoch_scores = np.array(epoch_scores)
        if self.prediction_type == "classification":
            self.epochs = epoch_vals[np.argmax(epoch_scores)]
        else:
            self.epochs = epoch_vals[np.argmin(epoch_scores)]
        print(f"Tuned Epoch Value: {self.epochs}")

        # Momentum Tuning
        #Try all five momentum values and return the value that performs the best
        for momentum in tqdm(momentum_vals, desc="Tuning Momentum", leave=False):
            momentum_score = self.train_test(tuning_flag=True, epochs=self.epochs, momentum=momentum, learning_rate=self.learning_rate, batch_size=self.batch_size)
            momentum_scores.append(np.mean(momentum_score))
        momentum_scores = np.array(momentum_scores)
        if self.prediction_type == "classification":
            self.momentum = momentum_vals[np.argmax(momentum_scores)]
        else:
            self.momentum = momentum_vals[np.argmin(momentum_scores)]
        print(f"Tuned Momentum Value: {self.momentum}")

        # Learning rate tuning
        #Try all five learning rate values and return the value that performs the best
        for learning_rate in tqdm(learning_rate_vals, desc="Tuning Learning Rate", leave=False):
            learning_rate_score = self.train_test(tuning_flag=True, epochs=self.epochs, momentum=self.momentum, learning_rate=learning_rate, batch_size=self.batch_size)
            learning_rate_scores.append(np.mean(learning_rate_score))
        learning_rate_scores = np.array(learning_rate_scores)
        if self.prediction_type == "classification":
            self.learning_rate = learning_rate_vals[np.argmax(learning_rate_scores)]
        else:
            self.learning_rate = learning_rate_vals[np.argmin(learning_rate_scores)]
        print(f"Tuned Learning Rate: {self.learning_rate}")

        # Batch size tuning
        #Try all five momentum values and return the value that performs the best
        for batch_size in tqdm(batch_size_vals, desc="Tuning Batch Size", leave=False):
            batch_size_score = self.train_test(tuning_flag=True, epochs=self.epochs, momentum=self.momentum, learning_rate=self.learning_rate, batch_size=batch_size)
            batch_size_scores.append(np.mean(batch_size_score))
        batch_size_scores = np.array(batch_size_scores)
        if self.prediction_type == "classification":
            self.batch_size = batch_size_vals[np.argmax(batch_size_scores)]
        else:
            self.batch_size = batch_size_vals[np.argmin(batch_size_scores)]
        print(f"Tuned Batch Size: {self.batch_size}")

        return [self.network_shape, self.epochs, self.momentum, self.learning_rate, self.batch_size]
    def train_test(self, tuning_flag: bool, epochs=100, momentum=.9, learning_rate=.01, batch_size=10):
        '''
        This method should take in the hyperparameters determined during tuning. It should use those hyperparameter
        values to train and test the model and return the calculated loss scores
        '''
        scores = []
        if tuning_flag:
            for i in range(10):
                self.init_weights_biases_momentum()     
                self.grad_desc(self.get_training_data(i), epochs, momentum, learning_rate, batch_size)
                score = self.loss(self.get_tuning_data())
                scores.append(score)
        else:
            for i in tqdm(range(10), desc="Evaluating Test Data", leave=False):
                self.init_weights_biases_momentum()
                self.grad_desc(self.get_training_data(i), self.epochs, self.momentum, self.learning_rate, self.batch_size)
                score = self.loss(self.get_testing_data(i))
                scores.append(score)
        return np.array(scores)
    def loss(self, test_data):
        '''
        This method calculates the loss based on our evaluation metrics
        For classification: 0/1 loss
        For regression: Mean squared error
        '''
        if self.prediction_type == "classification":
            results = [(np.argmax(self.for_prop(example)), label) for (example, label) in test_data if not np.isnan(label)]
            correct_results = sum(int(example == label) for (example, label) in results)
            total_examples = len(results)
            return correct_results / total_examples
        else:
            results = [(self.for_prop(x), y) for (x, y) in test_data if not np.isnan(y)]
            # Ensure predictions and labels are both 1D arrays of the same length
            predictions = np.array([prediction.flatten()[0] if prediction.size == 1 else np.argmax(prediction) for (prediction, label) in results], dtype=float)
            labels = np.array([label for (prediction, label) in results], dtype=float)

            # Calculate MSE
            mse = np.mean((predictions - labels) ** 2)
            return mse
    def sigmoid(self, input: np):       #used the logistic function as our activation
        return 1.0/(1.0+np.exp(-input))
    def sigmoid_prime(self, input: np): #derivative of the logistic function
        return self.sigmoid(input)*(1-self.sigmoid(input))
    def softmax(self, input):   #softmax for output activation during classification
        exp = np.exp(input - np.max(input))
        return exp / np.sum(exp)    

In [677]:
class DE_nn(backprop_nn):
    def __init__(self, data: dataset, prediction_type_flag: str, network_shape: list, population_size=50, epochs=100, scaling_factor=0.7, crossover_rate=0.7):
        self.epochs = epochs
        self.pop_size = population_size
        self.f = scaling_factor
        self.cr = crossover_rate
        self.tune_set = data.tune_set
        self.validate_set = data.validate_set
        self.prediction_type = prediction_type_flag

        if self.prediction_type == "classification":
            self.class_count = len(np.unique(self.tune_set[:,-1]))
        else:
            self.class_count = 0
            
        input_size = self.tune_set.shape[1] - 1
        self.input_size = input_size
        self.network_shape = network_shape
        self.population = []
        self.model = ()
    def init_population(self, pop_size):
        '''
        Initializes a population of weights and biases
        '''
        self.population = []
        for i in range(pop_size):
            biases = [np.random.randn(next_size, 1) for next_size in self.network_shape[1:]]
            weights = [np.random.randn(next_size, cur_size) for cur_size, next_size in zip(self.network_shape[:-1], self.network_shape[1:])]
            self.population.append((weights,biases))
    def donor_vector(self, f):
        # Step 1: Select 3 candidates
        candidates = random.sample(self.population,3)
        # Step 2: For each candidate, mutate the weights and biases
        weights_1, biases_1 = candidates[0]
        weights_2, biases_2 = candidates[1]
        weights_3, biases_3 = candidates[2]
        new_weights = []
        new_biases = []

        for w1, w2, w3 in zip(weights_1, weights_2, weights_3):
            new_weight = w1 + f * (w2 - w3)
            new_weights.append(new_weight)

        # Apply the equation for each bias vector
        for b1, b2, b3 in zip(biases_1, biases_2, biases_3):
            new_bias = b1 + f * (b2 - b3)
            new_biases.append(new_bias)
        print("DE Mutation Demonstration (Based on V_i = X_1 + F * (X_2 - X_3)):")
        print(f"Candidate 1:\n{candidates[0]}")
        print(f"Candidate 2:\n{candidates[1]}")
        print(f"Candidate 3:\n{candidates[2]}")
        print(f"Mutated Candidate:\n{(new_weights, new_biases)}")

        # Return the donor vector (weights, biases)
        return (new_weights, new_biases)
    def trial_vector(self, target_candidate, f, cr):
        # Unpack target candidate and donor vector
        target_weights, target_biases = target_candidate
        donor_vector = self.donor_vector(f)
        donor_weights, donor_biases = donor_vector

        # Initialize lists to store trial weights and biases
        trial_weights = []
        trial_biases = []

        # Apply crossover to weights
        for target_weight, donor_weight in zip(target_weights, donor_weights):
            # Perform element-wise crossover
            mask = np.random.rand(*target_weight.shape) < cr
            trial_weight = np.where(mask, donor_weight, target_weight)
            trial_weights.append(trial_weight)

        # Apply crossover to biases
        for target_bias, donor_bias in zip(target_biases, donor_biases):
            # Perform element-wise crossover
            mask = np.random.rand(*target_bias.shape) < cr
            trial_bias = np.where(mask, donor_bias, target_bias)
            trial_biases.append(trial_bias)
        print()
        print()
        print("DE Crossover Demonstration:")
        print(f"Parent 1:\n{target_candidate}")
        print(f"Parent 2:\n{donor_vector}")
        print(f"Crossover Result:\n{(trial_weights, trial_biases)}")

        # Return the trial vector (weights, biases)
        return (trial_weights, trial_biases)
    def evaluate_fitness(self, test_data, target, trial):
        '''
        Returns the best performing model between the target and trial
        '''
        target_fitness = self.loss(test_data, target)
        trial_fitness = self.loss(test_data, trial)
        if self.prediction_type == "classification":
            if max(target_fitness, trial_fitness) == target_fitness:
                return target
            else:
                return trial
        else:
            if min(target_fitness, trial_fitness) == target_fitness:
                return target
            else:
                return trial
    def for_prop(self, input: np, weights, biases):
        '''
        Feeds forward a single example through the network
        '''
        output = input
        for bias, weight in zip(biases[:-1], weights[:-1]):
            output = self.sigmoid(np.dot(weight, output) + bias)        #for each weight calculate the output of the activation function
                
        bias, weight = biases[-1], weights[-1]
        #For regression, use a linear combination for output activation
        #For classification, use a softmax output activation
        output = (np.dot(weight, output) + bias)    
        if self.prediction_type == "classification":
            output = self.softmax(output)
        return output
    def loss(self, test_data, model):
        '''
        This method calculates the loss based on our evaluation metrics
        For classification: 0/1 loss
        For regression: Mean squared error
        '''
        weights, biases = model
        if self.prediction_type == "classification":
            results = [(np.argmax(self.for_prop(example, weights, biases)), label) for (example, label) in test_data if not np.isnan(label)]
            correct_results = sum(int(example == label) for (example, label) in results)
            total_examples = len(results)
            return correct_results / total_examples
        else:
            results = [(self.for_prop(x, weights, biases), y) for (x, y) in test_data if not np.isnan(y)]
            # Ensure predictions and labels are both 1D arrays of the same length
            predictions = np.array([prediction.flatten()[0] if prediction.size == 1 else np.argmax(prediction) for (prediction, label) in results], dtype=float)
            labels = np.array([label for (prediction, label) in results], dtype=float)

            # Calculate MSE
            mse = np.mean((predictions - labels) ** 2)
            return mse
    def best_candidate(self, test_data):
        '''
        Returns the best model among a population
        '''
        scores = []
        for candidate in self.population:
            scores.append(self.loss(test_data, candidate))
        scores = np.array(scores)
        if self.prediction_type == "classification":
            return self.population[np.argmax(scores)]
        else:
            return self.population[np.argmin(scores)]
    def evolve(self, test_data, epochs, f, cr):
        '''
        This method evolves the models in the population
        '''
        for i in range(epochs):
            new_population = []
            for candidate in self.population:
                trial_vector = self.trial_vector(candidate, f, cr)
                new_candidate = self.evaluate_fitness(test_data, candidate, trial_vector)
                new_population.append(new_candidate)
            self.population = new_population
        self.model = self.best_candidate(test_data)
    def train_test(self, tuning_flag: bool, pop_size=50, epochs=100, f=0.7, cr=0.5):
        '''
        This method should take in the hyperparameters determined during tuning. It should use those hyperparameter
        values to train and test the model and return the calculated loss scores.
        '''

        # Define a function that encapsulates the work for each iteration
        def train_single_model(i):
            # Use self to access methods and attributes from the class
            if tuning_flag:
                self.init_population(pop_size)
                self.evolve(self.get_training_data(i), epochs, f, cr)
            else:
                self.init_population(self.pop_size)
                self.evolve(self.get_training_data(i), self.epochs, self.f, self.cr)
            return self.loss(self.get_tuning_data() if tuning_flag else self.get_testing_data(i), self.model)

        # Parallel execution using joblib
        scores = Parallel(n_jobs=12)(
            delayed(train_single_model)(i)
            for i in tqdm(range(10), desc="Evaluating Models", leave=False)
        )
        return np.array(scores)
    def tune(self, tuning_pop_size=False, tuning_epochs=True, tuning_f=True, tuning_cr=True):
        '''
        Four parameters need to be tuned: Population size, epochs, scaling factor, crossover rate
        '''
        pop_size_vals = [10, 50, 100, 200]
        epoch_vals = [10, 50, 100, 200, 500]
        f_vals = [0.4, 0.5, 0.7, 0.9, 1.0]
        cr_vals = [0.1, 0.3, 0.5, 0.7, 0.9]

        pop_size_scores = []
        epoch_scores = []
        f_scores = []
        cr_scores = []

        if tuning_pop_size:
            # Population Size Tuning
            #Try all four pop size values and return the value that performs the best
            for pop_size in tqdm(pop_size_vals, desc="Tuning Population Size", leave=False):
                pop_size_score = self.train_test(tuning_flag=True, pop_size=pop_size, epochs=self.epochs, f=self.f, cr=self.cr)
                pop_size_scores.append(np.mean(pop_size_score))
            pop_size_scores = np.array(pop_size_scores)
            if self.prediction_type == "classification":
                self.pop_size = pop_size_vals[np.argmax(pop_size_scores)]
            else:
                self.pop_size = pop_size_vals[np.argmin(pop_size_scores)]
            print(f"Tuned Population Size: {self.pop_size}")
        
        if tuning_epochs:
            # Epoch Tuning
            #Try all five epoch values and return the value that performs the best
            for epoch in tqdm(epoch_vals, desc="Tuning Epochs", leave=False):
                epoch_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=epoch, f=self.f, cr=self.cr)
                epoch_scores.append(np.mean(epoch_score))
            epoch_scores = np.array(epoch_scores)
            if self.prediction_type == "classification":
                self.epochs = epoch_vals[np.argmax(epoch_scores)]
            else:
                self.epochs = epoch_vals[np.argmin(epoch_scores)]
            print(f"Tuned Epoch Value: {self.epochs}")

        if tuning_f:
            # Scaling Factor Tuning
            #Try all five epoch values and return the value that performs the best
            for f in tqdm(f_vals, desc="Tuning Scaling Factor", leave=False):
                f_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, f=f, cr=self.cr)
                f_scores.append(np.mean(f_score))
            f_scores = np.array(f_scores)
            if self.prediction_type == "classification":
                self.f = f_vals[np.argmax(f_scores)]
            else:
                self.f = f_vals[np.argmin(f_scores)]
            print(f"Tuned Scaling Factor: {self.f}")

        if tuning_cr:
            # Crossover Rate Tuning
            #Try all five epoch values and return the value that performs the best
            for cr in tqdm(cr_vals, desc="Tuning Crossover Rate", leave=False):
                cr_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, f=self.f, cr=cr)
                cr_scores.append(np.mean(cr_score))
            cr_scores = np.array(cr_scores)
            if self.prediction_type == "classification":
                self.cr = cr_vals[np.argmax(cr_scores)]
            else:
                self.cr = cr_vals[np.argmin(cr_scores)]
            print(f"Tuned Crossover Rate: {self.cr}")


        return [self.pop_size, self.epochs, self.f, self.cr]

In [678]:
class GA_nn(DE_nn):
    def __init__(self, data: dataset, prediction_type_flag: str, network_shape: list, population_size=50, epochs=100, selection_size=0.4, mutation_rate=0.05, crossover_rate=0.5):
        '''
        Initializes the hyperparameters, network architecture, etc. If tuning is not called, default hyperparameters are used
        '''
        self.pop_size = population_size
        self.epochs = epochs
        self.select_size = selection_size
        self.mr = mutation_rate
        self.cr = crossover_rate
        self.tune_set = data.tune_set
        self.validate_set = data.validate_set
        self.prediction_type = prediction_type_flag

        if self.prediction_type == "classification":
            self.class_count = len(np.unique(self.tune_set[:,-1]))
        else:
            self.class_count = 0
            
        input_size = self.tune_set.shape[1] - 1
        self.input_size = input_size
        self.network_shape = network_shape
        self.population = []
        self.model = ()
    def select_candidates(self, test_data, select_size):
        '''
        Returns the best and worst 'select_size' models among the population
        '''
        # Evaluate all candidates
        scores = []
        for candidate in tqdm(self.population, desc="Evaluating Population", leave=True):
            scores.append(self.loss(test_data, candidate))
        scores = np.array(scores)
        if self.prediction_type == "classification":
            sorted_indices = scores.argsort()[::-1]
        else:
            sorted_indices = scores.argsort()
        # Select the top 'select_size' candidates
        top_indices = sorted_indices[:select_size]
        top_candidates = [self.population[i] for i in top_indices]
        # Select the bottom 'select_size' candidates
        bottom_indices = sorted_indices[-select_size:]
        bottom_candidates = [self.population[i] for i in bottom_indices]

        return top_candidates, bottom_candidates
    def crossover(self, candidates, cr):
        '''
        Takes in X candidates, pairs them, and crosses them over
        using binomial crossover and a crossover probability
        '''
        new_candidates = []

        for i in range(0, len(candidates), 2):
            # Initialize lists to store trial weights and biases
            weights = []
            biases = []
            # Access the current pair of items
            parent_1 = candidates[i]
            if i + 1 < len(candidates):
                parent_2 = candidates[i + 1]
            parent_1_weights, parent_1_biases = parent_1
            parent_2_weights, parent_2_biases = parent_2
            # Apply crossover to weights
            for parent_1_weight, parent_2_weight in zip(parent_1_weights, parent_2_weights):
                mask = np.random.rand(*parent_1_weight.shape) < cr
                weight = np.where(mask, parent_2_weight, parent_1_weight)
                weights.append(weight)
            # Apply crossover to biases
            for parent_1_bias, parent_2_bias in zip(parent_1_biases, parent_2_biases):
                mask = np.random.rand(*parent_1_bias.shape) < cr
                bias = np.where(mask, parent_2_bias, parent_1_bias)
                biases.append(bias)
            new_candidates.append((weights,biases))
            print(f"Parent 1:\n{parent_1}")
            print(f"Parent 2:\n{parent_2}")
            print(f"Crossover Result:\n{(weights, biases)}")

        # Return the crossed-over candidates
        return new_candidates   
    def mutate(self, candidates, mr):
        '''
        Mutates the weights and biases of the candidates based on a mutation probability
        '''
        mutated_candidates = []

        # Iterate over all candidates
        for weights, biases in candidates:
            new_weights = []
            new_biases = []
            # Iterate over each weight array in the candidate
            for weight in weights:
                if np.random.rand() < mr:
                    # Mutation probability hit
                    mutated_weight = weight + np.random.normal(0, 0.1, size=weight.shape)
                else:
                    # No mutation, keep original weight
                    mutated_weight = weight
                new_weights.append(mutated_weight)
            # Iterate over each bias array in the candidate
            for bias in biases:
                if np.random.rand() < mr:
                    # Mutation probability hit
                    mutated_bias = bias + np.random.normal(0, 0.1, size=bias.shape)
                else:
                    # No mutation, keep original bias
                    mutated_bias = bias
                new_biases.append(mutated_bias)
            mutated_candidates.append((new_weights, new_biases))
            print(f"Original Candidate:\n{(weights, biases)}")
            print(f"Mutated Candidate:\n{(new_weights, new_biases)}")

        return mutated_candidates
    def selection(self, test_data, select_size, mr, cr):
        '''
        Returns a new population of equal size to the original, with replaced candidates
        that have been through tournament, crossover, and mutation.
        '''
        # Function to check if two tuples containing lists of numpy arrays are equal
        def are_tuples_equal(tuple1, tuple2):
            list1_weights, list1_biases = tuple1
            list2_weights, list2_biases = tuple2
            # Check if all weight arrays are equal
            if len(list1_weights) != len(list2_weights) or len(list1_biases) != len(list2_biases):
                return False
            for w1, w2 in zip(list1_weights, list2_weights):
                if not np.array_equal(w1, w2):
                    return False
            # Check if all bias arrays are equal
            for b1, b2 in zip(list1_biases, list2_biases):
                if not np.array_equal(b1, b2):
                    return False
            return True

        # Step 1: Select X best and X worst candidates
        top_candidates, bottom_candidates = self.select_candidates(test_data, select_size)
        # Step 2: Crossover the best candidates
        new_candidates = self.crossover(top_candidates, cr)
        # Step 3: Mutate the best candidates
        new_candidates = self.mutate(new_candidates, mr)
        # Step 4: Replace the X worst candidates with the new candidates
        new_population = []
        # Replace the worst candidates in the population with the crossover/mutated candidates
        for candidate in self.population:
            found_match = False
            for i, bottom_candidate in enumerate(bottom_candidates):
                if are_tuples_equal(candidate, bottom_candidate):
                    new_population.append(top_candidates[i])
                    found_match = True
                    break
            if not found_match:
                # If no match, keep the original item
                new_population.append(candidate)
        return new_population
    def evolve(self, test_data, epochs, select_size, mr, cr):
        '''
        Evolves the population (training) and selects the best model at the
        end of the training cycle.
        '''
        for i in range(epochs):
            new_population = self.selection(test_data, select_size, mr, cr)
            self.population = new_population
        self.model = self.best_candidate(test_data)
    def train_test(self, tuning_flag: bool, pop_size=50, epochs=100, select_size=0.4, mr=0.05, cr=0.5):
        '''
        This method should take in the hyperparameters determined during tuning. It should use those hyperparameter
        values to train and test the model and return the calculated loss scores.
        '''
        # This function acts as what would occur during a single iteration so this process can be parrellized
        def train_single_model(i):
            if tuning_flag:
                self.init_population(pop_size)
                self.evolve(self.get_training_data(i), epochs, select_size, mr, cr)
            else:
                self.init_population(self.pop_size)
                self.evolve(self.get_training_data(i), self.epochs, int(self.select_size*self.pop_size), self.mr, self.cr)
            return self.loss(self.get_tuning_data() if tuning_flag else self.get_testing_data(i), self.model)

        # Parallel execution
        scores = Parallel(n_jobs=12)(
            delayed(train_single_model)(i)
            for i in tqdm(range(10), desc="Evaluating Models", leave=False)
        )
        return np.array(scores)
    def tune(self, tuning_pop_size=False, tuning_epochs=True, tuning_select_size=True, tuning_mr=True, tuning_cr=True):
        '''
        Five parameters need to be tuned: Population size, epochs, selection size, mutation rate, crossover rate
        '''
        pop_size_vals = [10, 50, 100, 200]
        epoch_vals = [10, 50, 100, 200, 500]
        select_size_vals = [0.2, 0.4, 0.6, 0.8]
        mr_vals = [0.01, 0.03, 0.05, 0.07, 0.1]
        cr_vals = [0.1, 0.3, 0.5, 0.7, 0.9]

        pop_size_scores = []
        epoch_scores = []
        select_size_scores = []
        mr_scores = []
        cr_scores = []

        if tuning_pop_size:
            # Population Size Tuning
            for pop_size in tqdm(pop_size_vals, desc="Tuning Population Size", leave=False):
                pop_size_score = self.train_test(tuning_flag=True, pop_size=pop_size, epochs=self.epochs, select_size=int(self.select_size*pop_size), mr=self.mr, cr=self.cr)
                pop_size_scores.append(np.mean(pop_size_score))
            pop_size_scores = np.array(pop_size_scores)
            if self.prediction_type == "classification":
                self.pop_size = pop_size_vals[np.argmax(pop_size_scores)]
            else:
                self.pop_size = pop_size_vals[np.argmin(pop_size_scores)]
            print(f"Tuned Population Size: {self.pop_size}")
        
        if tuning_epochs:
            # Epoch Tuning
            for epoch in tqdm(epoch_vals, desc="Tuning Epochs", leave=False):
                epoch_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=epoch, select_size=int(self.select_size*self.pop_size), mr=self.mr, cr=self.cr)
                epoch_scores.append(np.mean(epoch_score))
            epoch_scores = np.array(epoch_scores)
            if self.prediction_type == "classification":
                self.epochs = epoch_vals[np.argmax(epoch_scores)]
            else:
                self.epochs = epoch_vals[np.argmin(epoch_scores)]
            print(f"Tuned Epoch Value: {self.epochs}")

        if tuning_select_size:
            # Tournament Selection Size Tuning
            for select_size in tqdm(select_size_vals, desc="Tuning Selection Size", leave=False):
                select_size_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, select_size=int(select_size*self.pop_size), mr=self.mr, cr=self.cr)
                select_size_scores.append(np.mean(select_size_score))
            select_size_scores = np.array(select_size_scores)
            if self.prediction_type == "classification":
                self.select_size = select_size_vals[np.argmax(select_size_scores)]
            else:
                self.select_size = select_size_vals[np.argmin(select_size_scores)]
            print(f"Tuned Selection Size: {self.select_size}")

        if tuning_mr:
            # Mutation Rate Tuning
            for mr in tqdm(mr_vals, desc="Tuning Mutation Rate", leave=False):
                mr_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, select_size=int(self.select_size*self.pop_size), mr=mr, cr=self.cr)
                mr_scores.append(np.mean(mr_score))
            mr_scores = np.array(mr_scores)
            if self.prediction_type == "classification":
                self.mr = mr_vals[np.argmax(mr_scores)]
            else:
                self.mr = mr_vals[np.argmin(mr_scores)]
            print(f"Tuned Mutation Rate: {self.mr}")

        if tuning_cr:
            # Crossover Rate Tuning
            for cr in tqdm(cr_vals, desc="Tuning Crossover Rate", leave=False):
                cr_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, select_size=int(self.select_size*self.pop_size), mr=self.mr, cr=cr)
                cr_scores.append(np.mean(cr_score))
            cr_scores = np.array(cr_scores)
            if self.prediction_type == "classification":
                self.cr = cr_vals[np.argmax(cr_scores)]
            else:
                self.cr = cr_vals[np.argmin(cr_scores)]
            print(f"Tuned Crossover Rate: {self.cr}")


        return [self.pop_size, self.epochs, self.select_size, self.mr, self.cr]   

In [679]:
class PSO_nn:
    def __init__(self, data: dataset, prediction_type_flag: str, network_shape: list, population_size=50, epochs=50, social_update_rate=0.5, cognitive_update_rate=0.5, inertia=1):
        self.epochs = epochs
        self.pop_size = population_size
        self.c1 = cognitive_update_rate  # Cognitive update rate
        self.c2 = social_update_rate     # Social update rate
        self.w = inertia                 # Inertia weight
        self.tune_set = data.tune_set
        self.validate_set = data.validate_set
        self.prediction_type = prediction_type_flag

        if self.prediction_type == "classification":
            self.class_count = len(np.unique(self.tune_set[:, -1]))
        else:
            self.class_count = 0

        input_size = self.tune_set.shape[1] - 1
        self.input_size = input_size
        self.network_shape = network_shape
        self.population = []
        self.velocities = []
        self.personal_best_positions = []
        self.personal_best_scores = []
        self.global_best_position = None
        self.global_best_score = None
        self.model = ()
        self.pbest_flag = True
        self.gbest_flag = True

    def init_population(self):
        '''
        Initializes a population of particles with random weights and velocities.
        '''
        self.population = []
        self.velocities = []
        self.personal_best_positions = []
        self.personal_best_scores = []

        for i in range(self.pop_size):
            biases = [np.random.randn(next_size, 1) for next_size in self.network_shape[1:]]
            weights = [np.random.randn(next_size, cur_size) for cur_size, next_size in zip(self.network_shape[:-1], self.network_shape[1:])]
            particle = (weights, biases)
            self.population.append(particle)

            # Initialize velocities 
            vel_biases = [np.zeros_like(bias) for bias in biases]
            vel_weights = [np.zeros_like(weight) for weight in weights]
            velocity = (vel_weights, vel_biases)
            self.velocities.append(velocity)

            # Initialize personal bests
            self.personal_best_positions.append(particle)
            score = self.loss(self.get_training_data(i % 10), particle)
            self.personal_best_scores.append(score)

        # Initialize global best
        best_idx = np.argmin(self.personal_best_scores) if self.prediction_type != "classification" else np.argmax(self.personal_best_scores)
        self.global_best_position = self.personal_best_positions[best_idx]
        self.global_best_score = self.personal_best_scores[best_idx]

    def update_velocity(self, idx):
        '''
        Updates the velocity of a particle based on inertia, cognitive update rate, and social update rate.
        '''
        particle = self.population[idx]
        velocity = self.velocities[idx]
        personal_best = self.personal_best_positions[idx]
        global_best = self.global_best_position

        new_vel_weights = []
        new_vel_biases = []

        for v_w, x_w, pbest_w, gbest_w in zip(velocity[0], particle[0], personal_best[0], global_best[0]):
            r1 = np.random.rand(*x_w.shape)
            r2 = np.random.rand(*x_w.shape)
            cognitive_component_w = self.c1 * r1 * (pbest_w - x_w)
            social_component_w = self.c2 * r2 * (gbest_w - x_w)
            new_velocity_w = self.w * v_w + cognitive_component_w + social_component_w
            new_vel_weights.append(new_velocity_w)

        for v_b, x_b, pbest_b, gbest_b in zip(velocity[1], particle[1], personal_best[1], global_best[1]):
            r1 = np.random.rand(*x_b.shape)
            r2 = np.random.rand(*x_b.shape)
            cognitive_component_b = self.c1 * r1 * (pbest_b - x_b)
            social_component_b = self.c2 * r2 * (gbest_b - x_b)
            new_velocity_b = self.w * v_b + cognitive_component_b + social_component_b
            new_vel_biases.append(new_velocity_b)

        velocity_limit = 3  #Limit max velocity
        new_vel_weights = [np.clip(vw, -velocity_limit, velocity_limit) for vw in new_vel_weights]
        new_vel_biases = [np.clip(vb, -velocity_limit, velocity_limit) for vb in new_vel_biases]

        self.velocities[idx] = (new_vel_weights, new_vel_biases)
        print("Velocity Update Demonstration:")
        print(f"Original Velocity:\n{velocity}")
        print(f"Updated Velocity:\n{(new_vel_weights,new_vel_biases)}")

    def update_position(self, idx):
        '''
        Updates the position of a particle
        '''
        particle = self.population[idx]
        velocity = self.velocities[idx]

        new_weights = []
        new_biases = []

        for x_w, v_w in zip(particle[0], velocity[0]):
            new_w = x_w + v_w
            new_weights.append(new_w)

        for x_b, v_b in zip(particle[1], velocity[1]):
            new_b = x_b + v_b
            new_biases.append(new_b)

        self.population[idx] = (new_weights, new_biases)
        print("Position Update Demonstration:")
        print(f"Original Position:\n{particle}")
        print(f"Updated Position:\n{(new_weights,new_biases)}")

    def evaluate_fitness(self, idx, test_data):
        '''
        Evaluates the fitness of a particle and updates personal and global bests if necessary
        '''
        particle = self.population[idx]
        score = self.loss(test_data, particle)

        # Update personal best
        if self.prediction_type == "classification":
            if score > self.personal_best_scores[idx]:
                if self.pbest_flag:
                    print("pbest Calculation Demonstration:")
                    print(f"Original pbest:\n{self.personal_best_positions[idx]}")
                self.personal_best_positions[idx] = particle
                self.personal_best_scores[idx] = score
                if self.pbest_flag:
                    print(f"Updated pbest:\n{self.personal_best_positions[idx]}")
                    print()
                    print()
                    self.pbest_flag = False
        else:
            if score < self.personal_best_scores[idx]:
                self.personal_best_positions[idx] = particle
                self.personal_best_scores[idx] = score

        # Update global best
        if self.prediction_type == "classification":
            if score > self.global_best_score:
                if self.gbest_flag:
                    print("gbest Calculation Demonstration:")
                    print(f"Original gbest:\n{self.global_best_position}")
                self.global_best_position = particle
                self.global_best_score = score
                if self.gbest_flag:
                    print(f"Updated gbest:\n{self.global_best_position}")
                    self.gbest_flag = False
        else:
            if score < self.global_best_score:
                self.global_best_position = particle
                self.global_best_score = score

    def for_prop(self, input_vector: np.ndarray, weights, biases):
        '''
        Feeds forward a single example through the network.
        '''
        output = input_vector
        for bias, weight in zip(biases[:-1], weights[:-1]):
            output = self.sigmoid(np.dot(weight, output) + bias)
        bias, weight = biases[-1], weights[-1]
        output = np.dot(weight, output) + bias
        if self.prediction_type == "classification":
            output = self.softmax(output)
        return output

    def loss(self, test_data, model):
        '''
        Calculates the loss based on evaluation metrics.
        For classification: 0/1 loss
        For regression: Mean squared error
        '''
        weights, biases = model
        if self.prediction_type == "classification":
            results = [(np.argmax(self.for_prop(example.reshape(-1,1), weights, biases)), int(label)) for (example, label) in test_data if not np.isnan(label)]
            correct_results = sum(int(pred == label) for (pred, label) in results)
            total_examples = len(results)
            return correct_results / total_examples
        else:
            results = [(self.for_prop(x.reshape(-1,1), weights, biases), y) for (x, y) in test_data if not np.isnan(y)]
            predictions = np.array([prediction.flatten()[0] for (prediction, label) in results], dtype=float)
            labels = np.array([label for (prediction, label) in results], dtype=float)
            mse = np.mean((predictions - labels) ** 2)
            return mse

    def best_candidate(self, test_data):
        '''
        Returns the best performing partiicle among the population.
        '''
        scores = []
        for candidate in self.population:
            scores.append(self.loss(test_data, candidate))
        scores = np.array(scores)
        if self.prediction_type == "classification":
            return self.population[np.argmax(scores)]
        else:
            return self.population[np.argmin(scores)]

    def evolve(self, test_data, epochs):
        '''
        Evolves the population over a number of epochs.
        '''
        for epoch in tqdm(range(epochs), desc="Evolving Particles", leave=False):
            for idx in range(self.pop_size):
                self.update_velocity(idx)
                self.update_position(idx)
                self.evaluate_fitness(idx, test_data)
        self.model = self.global_best_position

    def train_test(self, tuning_flag: bool, pop_size=50, epochs=50, c1=0.5, c2=0.5, w=1):
        '''
        Trains and tests the model, returns the calculated loss scores.
        '''
        scores = []
        if tuning_flag:
            for i in range(10):
                self.pop_size = pop_size
                self.epochs = epochs
                self.c1 = c1
                self.c2 = c2
                self.w = w
                self.init_population()
                self.evolve(self.get_training_data(i), epochs)
                score = self.loss(self.get_tuning_data(), self.model)
                scores.append(score)
        else:
            for i in tqdm(range(10), desc="Evaluating Test Data", leave=False):
                self.init_population()
                self.evolve(self.get_training_data(i), self.epochs)
                score = self.loss(self.get_testing_data(i), self.model)
                scores.append(score)
        return np.array(scores)

    def tune(self):
        '''
        Tunes the hyperparameters: population size, epochs, cognitive update rate, social update rate, inertia weight.
        '''
        pop_size_vals = [10, 50, 100]
        epoch_vals = [10, 50, 75, 100, 200]
        c1_vals = [0.1, 0.5, 1.0, 1.5, 2.0, 2.5]        #Cognitive update rate
        c2_vals = [0.1, 0.5, 1.0, 1.5, 2.0, 2.5]        #Social Update Rate
        w_vals = [0.4, 0.7, 1.0]                        #Inertia

        # Tune Population Size
        pop_size_scores = []
        for pop_size in tqdm(pop_size_vals, desc="Tuning Population Size", leave=False):
            pop_size_score = self.train_test(tuning_flag=True, pop_size=pop_size, epochs=self.epochs, c1=self.c1, c2=self.c2, w=self.w)
            pop_size_scores.append(np.mean(pop_size_score))
        pop_size_scores = np.array(pop_size_scores)
        if self.prediction_type == "classification":
            self.pop_size = pop_size_vals[np.argmax(pop_size_scores)]
        else:
            self.pop_size = pop_size_vals[np.argmin(pop_size_scores)]
        print(f"Tuned Population Size: {self.pop_size}")

        # Tune Epochs
        epoch_scores = []
        for epoch in tqdm(epoch_vals, desc="Tuning Epochs", leave=False):
            epoch_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=epoch, c1=self.c1, c2=self.c2, w=self.w)
            epoch_scores.append(np.mean(epoch_score))
        epoch_scores = np.array(epoch_scores)
        if self.prediction_type == "classification":
            self.epochs = epoch_vals[np.argmax(epoch_scores)]
        else:
            self.epochs = epoch_vals[np.argmin(epoch_scores)]
        print(f"Tuned Epochs: {self.epochs}")

        # Tune Cognitive Update Rate(c1)
        c1_scores = []
        for c1 in tqdm(c1_vals, desc="Tuning Cognitive Update Rate", leave=False):
            c1_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, c1=c1, c2=self.c2, w=self.w)
            c1_scores.append(np.mean(c1_score))
        c1_scores = np.array(c1_scores)
        if self.prediction_type == "classification":
            self.c1 = c1_vals[np.argmax(c1_scores)]
        else:
            self.c1 = c1_vals[np.argmin(c1_scores)]
        print(f"Tuned Cognitive Coefficient: {self.c1}")

        # Tune Social Update Rate (c2)
        c2_scores = []
        for c2 in tqdm(c2_vals, desc="Tuning Social Update Rate", leave=False):
            c2_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, c1=self.c1, c2=c2, w=self.w)
            c2_scores.append(np.mean(c2_score))
        c2_scores = np.array(c2_scores)
        if self.prediction_type == "classification":
            self.c2 = c2_vals[np.argmax(c2_scores)]
        else:
            self.c2 = c2_vals[np.argmin(c2_scores)]
        print(f"Tuned Social Coefficient: {self.c2}")

        # Tune Inertia Weight (w)
        w_scores = []
        for w in tqdm(w_vals, desc="Tuning Inertia Weight", leave=False):
            w_score = self.train_test(tuning_flag=True, pop_size=self.pop_size, epochs=self.epochs, c1=self.c1, c2=self.c2, w=w)
            w_scores.append(np.mean(w_score))
        w_scores = np.array(w_scores)
        if self.prediction_type == "classification":
            self.w = w_vals[np.argmax(w_scores)]
        else:
            self.w = w_vals[np.argmin(w_scores)]
        print(f"Tuned Inertia Weight: {self.w}")

        return [self.pop_size, self.epochs, self.c1, self.c2, self.w]

    def sigmoid(self, input_vector):
        '''
        Sigmoid activation function.
        '''
        return 1 / (1 + np.exp(-input_vector))

    def softmax(self, input_vector):
        '''
        Softmax activation function for classification output layer.
        '''
        e_x = np.exp(input_vector - np.max(input_vector))
        return e_x / e_x.sum(axis=0)

    def get_training_data(self, i: int):
        '''
        method needs to take in training data and compile 9 of the 10 folds (not fold I) into an array
        we then want to format the data as follows: each example = (attributes, label)
        I is used to indicate which fold is the hold out fold
        '''
        desired_data = np.concatenate([self.validate_set[j] for j in range(10) if j != i])  #Get all folds other than fold I and compile into its own array
        training_data = [(example[:-1], example[-1]) for example in desired_data]   #Format properly
        return training_data
    def get_testing_data(self, i: int):
        '''
        method needs to take in training data and compile 1 of the 10 folds (fold I) into an array
        Then format the data as follows: each example = (attributes, label)
        i is used to indicate which training set you want returned
        '''
        desired_data = self.validate_set[i]         #Get the test set
        testing_data = [(example[:-1], example[-1]) for example in desired_data] #Format properly
        return testing_data
    def get_tuning_data(self):
        '''
        method needs to take in the tuning set and properly format it
        Then format the data as follows: each example = (attributes, label)
        i is used to indicate which training set you want returned
        '''
        desired_data = self.tune_set  #Get the tuning set
        tuning_data = [(example[:-1], example[-1]) for example in desired_data] #Format properly
        return tuning_data

# Pre-Processing

In [680]:
abalone_data, cancer_data, fire_data, glass_data, machine_data, soybean_data = process_all('carlthedog3', True)

# Backpropogation NN Instantiation

In [681]:
# Classification Sets
cancer_back_0 = backprop_nn(cancer_data, "classification", hidden_layer_count=0, epochs=10,momentum=.5,learning_rate=.01,batch_size=16)
cancer_back_1 = backprop_nn(cancer_data, "classification", hidden_layer_count=1, network_shape=[9,1,2],epochs=50,momentum=.7,learning_rate=.1,batch_size=16)
cancer_back_2 = backprop_nn(cancer_data, "classification", hidden_layer_count=2, network_shape=[9,1,5,2],epochs=50,momentum=.9,learning_rate=.01,batch_size=16)

glass_back_0 = backprop_nn(glass_data, "classification", hidden_layer_count=0, epochs=100,momentum=.5,learning_rate=.001,batch_size=16)
glass_back_1 = backprop_nn(glass_data, "classification", hidden_layer_count=1, network_shape=[9,1,6],epochs=500,momentum=.95,learning_rate=.01,batch_size=32)
glass_back_2 = backprop_nn(glass_data, "classification", hidden_layer_count=2, network_shape=[9,1,9,6],epochs=500,momentum=.95,learning_rate=.01,batch_size=32)

soybean_back_0 = backprop_nn(soybean_data, "classification", hidden_layer_count=0, epochs=10,momentum=.99,learning_rate=.01,batch_size=16)
soybean_back_1 = backprop_nn(soybean_data, "classification", hidden_layer_count=1, network_shape=[35,1,4],epochs=500,momentum=.99,learning_rate=.01,batch_size=16)
soybean_back_2 = backprop_nn(soybean_data, "classification", hidden_layer_count=2, network_shape=[35,1,9,4],epochs=500,momentum=.95,learning_rate=.1,batch_size=256)


# Regression Sets
abalone_back_0 = backprop_nn(abalone_data, "regression", hidden_layer_count=0, epochs=200,momentum=.9,learning_rate=.01,batch_size=32)
abalone_back_1 = backprop_nn(abalone_data, "regression", hidden_layer_count=1, network_shape=[8,1,1],epochs=500,momentum=.95,learning_rate=.01,batch_size=16)
abalone_back_2 = backprop_nn(abalone_data, "regression", hidden_layer_count=2, network_shape=[8,1,9,1],epochs=500,momentum=.99,learning_rate=.001,batch_size=16)

fire_back_0 = backprop_nn(fire_data, "regression", hidden_layer_count=0, epochs=200,momentum=.9,learning_rate=.01,batch_size=16)
fire_back_1 = backprop_nn(fire_data, "regression", hidden_layer_count=1, network_shape=[12,1,1],epochs=200,momentum=.95,learning_rate=.01,batch_size=16)
fire_back_2 = backprop_nn(fire_data, "regression", hidden_layer_count=2, network_shape=[12,1,1,1],epochs=500,momentum=.95,learning_rate=.001,batch_size=16)

machine_back_0 = backprop_nn(machine_data, "regression", hidden_layer_count=0, epochs=500,momentum=.95,learning_rate=.01,batch_size=16)
machine_back_1 = backprop_nn(machine_data, "regression", hidden_layer_count=1, network_shape=[9,1,1],epochs=500,momentum=.99,learning_rate=.01,batch_size=16)
machine_back_2 = backprop_nn(machine_data, "regression", hidden_layer_count=2, network_shape=[9,1,7,1],epochs=500,momentum=.99,learning_rate=.01,batch_size=16)

# Genetic Algorithm NN Instantiation

In [682]:
# Classification Sets
cancer_GA_0 = GA_nn(cancer_data, "classification", network_shape=[9,2])
cancer_GA_1 = GA_nn(cancer_data, "classification", network_shape=[9,1,2])
cancer_GA_2 = GA_nn(cancer_data, "classification", network_shape=[9,1,5,2])

glass_GA_0 = GA_nn(glass_data, "classification", network_shape=[9,6])
glass_GA_1 = GA_nn(glass_data, "classification", network_shape=[9,1,6])
glass_GA_2 = GA_nn(glass_data, "classification", network_shape=[9,1,9,6])

soybean_GA_0 = GA_nn(soybean_data, "classification", network_shape=[35,4])
soybean_GA_1 = GA_nn(soybean_data, "classification", network_shape=[35,1,4])
soybean_GA_2 = GA_nn(soybean_data, "classification", network_shape=[35,1,9,4])


# Regression Sets
abalone_GA_0 = GA_nn(abalone_data, "regression", network_shape=[8,1])
abalone_GA_1 = GA_nn(abalone_data, "regression", network_shape=[8,1,1])
abalone_GA_2 = GA_nn(abalone_data, "regression", network_shape=[8,1,9,1])

fire_GA_0 = GA_nn(fire_data, "regression", network_shape=[12,1])
fire_GA_1 = GA_nn(fire_data, "regression", network_shape=[12,1,1])
fire_GA_2 = GA_nn(fire_data, "regression", network_shape=[12,1,1,1])

machine_GA_0 = GA_nn(machine_data, "regression", network_shape=[9,1])
machine_GA_1 = GA_nn(machine_data, "regression", network_shape=[9,1,1])
machine_GA_2 = GA_nn(machine_data, "regression", network_shape=[9,1,7,1])

# Differential Evolution NN Instantiation

In [683]:
# Classification Sets
cancer_DE_0 = DE_nn(cancer_data, "classification", network_shape=[9,2],epochs=200,scaling_factor=0.4,crossover_rate=0.9)
cancer_DE_1 = DE_nn(cancer_data, "classification", network_shape=[9,1,2],epochs=10,scaling_factor=0.5,crossover_rate=0.7)
cancer_DE_2 = DE_nn(cancer_data, "classification", network_shape=[9,1,5,2],epochs=200,scaling_factor=0.7,crossover_rate=0.9)

glass_DE_0 = DE_nn(glass_data, "classification", network_shape=[9,6],epochs=100,scaling_factor=0.4,crossover_rate=0.5)
glass_DE_1 = DE_nn(glass_data, "classification", network_shape=[9,1,6],epochs=500,scaling_factor=0.4,crossover_rate=0.1)
glass_DE_2 = DE_nn(glass_data, "classification", network_shape=[9,1,9,6],epochs=500,scaling_factor=0.4,crossover_rate=0.9)

soybean_DE_0 = DE_nn(soybean_data, "classification", network_shape=[35,4],epochs=500,scaling_factor=0.5,crossover_rate=0.3)
soybean_DE_1 = DE_nn(soybean_data, "classification", network_shape=[35,1,4],epochs=200,scaling_factor=0.5,crossover_rate=0.7)
soybean_DE_2 = DE_nn(soybean_data, "classification", network_shape=[35,1,9,4],epochs=500,scaling_factor=0.5,crossover_rate=0.5)


# Regression Sets
abalone_DE_0 = DE_nn(abalone_data, "regression", network_shape=[8,1],epochs=500,scaling_factor=0.4,crossover_rate=0.7)
abalone_DE_1 = DE_nn(abalone_data, "regression", network_shape=[8,1,1],epochs=500,scaling_factor=0.5,crossover_rate=0.9)
abalone_DE_2 = DE_nn(abalone_data, "regression", network_shape=[8,1,9,1],epochs=500,scaling_factor=0.4,crossover_rate=0.1)

fire_DE_0 = DE_nn(fire_data, "regression", network_shape=[12,1],epochs=500,scaling_factor=0.4,crossover_rate=0.7)
fire_DE_1 = DE_nn(fire_data, "regression", network_shape=[12,1,1],epochs=500,scaling_factor=0.7,crossover_rate=0.1)
fire_DE_2 = DE_nn(fire_data, "regression", network_shape=[12,1,1,1],epochs=50,scaling_factor=1.0,crossover_rate=0.3)

machine_DE_0 = DE_nn(machine_data, "regression", network_shape=[9,1],epochs=500,scaling_factor=0.7,crossover_rate=0.5)
machine_DE_1 = DE_nn(machine_data, "regression", network_shape=[9,1,1],epochs=100,scaling_factor=0.4,crossover_rate=0.9)
machine_DE_2 = DE_nn(machine_data, "regression", network_shape=[9,1,7,1],epochs=200,scaling_factor=0.4,crossover_rate=0.1)

# PSO NN Instantiation

In [684]:
#Classification Sets
soybean_PSO_0 = PSO_nn(soybean_data, "classification", network_shape=[35,4])
soybean_PSO_1 = PSO_nn(soybean_data, "classification", network_shape=[35,1,4])
soybean_PSO_2 = PSO_nn(soybean_data, "classification", network_shape=[35,1,9,4])

cancer_PSO_0 = PSO_nn(cancer_data, "classification", network_shape=[9,2])
cancer_PSO_1 = PSO_nn(cancer_data, "classification", network_shape=[9,1,2])
cancer_PSO_2 = PSO_nn(cancer_data, "classification", network_shape=[9,1,5,2])

glass_PSO_0 = PSO_nn(glass_data, "classification", network_shape=[9,6])
glass_PSO_1 = PSO_nn(glass_data, "classification", network_shape=[9,1,6])
glass_PSO_2 = PSO_nn(glass_data, "classification", network_shape=[9,1,9,6])

#Regression sets
abalone_PSO_0 = PSO_nn(abalone_data, "regression", network_shape=[8,1])
abalone_PSO_1 = PSO_nn(abalone_data, "regression", network_shape=[8,1,1])
abalone_PSO_2 = PSO_nn(abalone_data, "regression", network_shape=[8,1,9,1])

fire_PSO_0 = PSO_nn(fire_data, "regression", network_shape=[12,1])
fire_PSO_1 = PSO_nn(fire_data, "regression", network_shape=[12,1,1])
fire_PSO_2 = PSO_nn(fire_data, "regression", network_shape=[12,1,1,1])

machine_PSO_0 = PSO_nn(machine_data, "regression", network_shape=[9,1])
machine_PSO_1 = PSO_nn(machine_data, "regression", network_shape=[9,1,1])
machine_PSO_2 = PSO_nn(machine_data, "regression", network_shape=[9,1,7,1])

# Loading Performance Data

In [685]:
backprop_scores = load_arrays_from_csv('/home/carlthedog3/CSCI_447/Project_4/Code/Performance_Data/Backprop_Data.csv')
GA_scores = load_arrays_from_csv('/home/carlthedog3/CSCI_447/Project_4/Code/Performance_Data/GA_Data.csv')
DE_scores = load_arrays_from_csv('/home/carlthedog3/CSCI_447/Project_4/Code/Performance_Data/DE_Data.csv')
PSO_scores = load_arrays_from_csv('/home/carlthedog3/CSCI_447/Project_4/Code/Performance_Data/PSO_Data.csv')

cancer_back_0_score = backprop_scores[0]
cancer_back_1_score = backprop_scores[1]
cancer_back_2_score = backprop_scores[2]

cancer_GA_0_score = GA_scores[0]
cancer_GA_1_score = GA_scores[1]
cancer_GA_2_score = GA_scores[2]

cancer_DE_0_score = DE_scores[0]
cancer_DE_1_score = DE_scores[1]
cancer_DE_2_score = DE_scores[2]

cancer_PSO_0_score = PSO_scores[0]
cancer_PSO_1_score = PSO_scores[1]
cancer_PSO_2_score = PSO_scores[2]

machine_back_0_score = backprop_scores[15]
machine_back_1_score = backprop_scores[16]
machine_back_2_score = backprop_scores[17]

machine_GA_0_score = GA_scores[15]
machine_GA_1_score = GA_scores[16]
machine_GA_2_score = GA_scores[17]

machine_DE_0_score = DE_scores[15]
machine_DE_1_score = DE_scores[16]
machine_DE_2_score = DE_scores[17]

machine_PSO_0_score = PSO_scores[15]
machine_PSO_1_score = PSO_scores[16]
machine_PSO_2_score = PSO_scores[17]

# Video Demo
## Performance From One Test Fold

In [686]:
# Task 1
print("Classification 2-Layer Network Performance on Fold 0 (Cancer)")
print(f"Backpropogation Performance:             {cancer_back_2_score[0]}")
print(f"Genetic Algorithm Performance:           {cancer_GA_2_score[0]}")
print(f"Differential Evolution Performance:      {cancer_DE_2_score[0]}")
print(f"Particle Swarm Optimization Performance: {cancer_PSO_2_score[0]}")
print()
print("Regression 2-Layer Network Performance on Fold 0 (Machine)")
print(f"Backpropogation Performance:             {machine_back_2_score[0]}")
print(f"Genetic Algorithm Performance:           {machine_GA_2_score[0]}")
print(f"Differential Evolution Performance:      {machine_DE_2_score[0]}")
print(f"Particle Swarm Optimization Performance: {machine_PSO_2_score[0]}")

Classification 2-Layer Network Performance on Fold 0 (Cancer)
Backpropogation Performance:             0.9841269841269841
Genetic Algorithm Performance:           0.7619047619047619
Differential Evolution Performance:      0.9682539682539683
Particle Swarm Optimization Performance: 0.9841269841269841

Regression 2-Layer Network Performance on Fold 0 (Machine)
Backpropogation Performance:             0.00013029516636413
Genetic Algorithm Performance:           0.010959068800884389
Differential Evolution Performance:      0.0006995035901707402
Particle Swarm Optimization Performance: 0.004166883666168647


## GA Operations: Selection, Crossover, Mutation

In [687]:
# Task 2
cancer_GA_1.init_population(50)

# Selection
print("GA Selection Demonstration:")
top_candidates, bottom_candidates = cancer_GA_1.select_candidates(cancer_GA_1.get_training_data(0), 2)
print(f"Total Population Size:         {len(cancer_GA_1.population)}")
print(f"Number of Selected Candidates: {len(top_candidates)}")
print()
print()

# Crossover
print("GA Crossover Demonstration:")
new_candidates = cancer_GA_1.crossover(top_candidates, 0.5)
print()
print()

# Mutation
print("GA Mutation Demonstration:")
new_candidates = cancer_GA_1.mutate(new_candidates, 0.5)

GA Selection Demonstration:


Evaluating Population: 100%|██████████| 50/50 [00:01<00:00, 45.01it/s]

Total Population Size:         50
Number of Selected Candidates: 2


GA Crossover Demonstration:
Parent 1:
([array([[ 0.12727378,  1.93154979, -0.06798994,  1.26949288,  0.4827801 ,
         1.29730574,  1.25086382, -1.63474196,  0.45677646]]), array([[-0.54281872],
       [ 1.29652271]])], [array([[-1.80102937]]), array([[ 1.16591348],
       [-0.08889926]])])
Parent 2:
([array([[-0.58418627, -0.63065987, -1.45704883, -1.70887627, -0.24885898,
         0.27860196, -1.83769887,  0.6622846 , -0.99973307]]), array([[ 0.40888025],
       [-1.28706856]])], [array([[1.32576453]]), array([[-0.83709984],
       [-0.63413404]])])
Crossover Result:
([array([[ 0.12727378,  1.93154979, -0.06798994,  1.26949288, -0.24885898,
         1.29730574, -1.83769887,  0.6622846 , -0.99973307]]), array([[-0.54281872],
       [-1.28706856]])], [array([[-1.80102937]]), array([[-0.83709984],
       [-0.63413404]])])


GA Mutation Demonstration:
Original Candidate:
([array([[ 0.12727378,  1.93154979, -0.0679899




## DE Operations: Crossover, Mutation

In [688]:
# Task 3 (Crossover and Mutation)
cancer_DE_1.init_population(50)
cancer_DE_1.trial_vector(cancer_DE_1.population[0], 0.7, 0.5)

DE Mutation Demonstration (Based on V_i = X_1 + F * (X_2 - X_3)):
Candidate 1:
([array([[-0.54160397,  0.68570388, -0.52879621,  1.18474452, -0.42028097,
        -0.34741806,  1.16431159, -1.07691831, -2.23377251]]), array([[-1.90660933],
       [ 0.20049704]])], [array([[0.29539386]]), array([[-1.13671025],
       [ 0.01544126]])])
Candidate 2:
([array([[ 0.07038105, -1.26422118,  0.92349748, -0.16556517, -0.5132611 ,
        -0.85512764,  0.96172845, -0.48416921, -1.08967418]]), array([[-1.53131859],
       [-0.27703023]])], [array([[0.85336463]]), array([[0.36768649],
       [0.7255923 ]])])
Candidate 3:
([array([[ 0.0845457 ,  0.46592268, -2.03432975, -0.84070599,  0.66719543,
        -1.0564024 ,  0.94444166,  2.09711173, -0.54626269]]), array([[-0.87998328],
       [-0.08838167]])], [array([[-2.31995959]]), array([[ 1.23611965],
       [-0.43054679]])])
Mutated Candidate:
([array([[-0.55151922, -0.52539682,  1.54168285,  1.65734309, -1.24660055,
        -0.20652573,  1.17641234, 

([array([[ 0.71166683, -0.52539682,  1.35237508,  1.65734309, -1.09848368,
           0.35949951,  1.17641234, -2.88381496, -1.81375338]]),
  array([[0.38150059],
         [0.50467811]])],
 [array([[1.34501428]]),
  array([[1.44023155],
         [0.51273745]])])

## PSO Operations: pbest Calculation, gbest Calculation, Velocity Update, Position Update

In [689]:
# Task 4
cancer_PSO_1.init_population()
# pbest/gbest Calculation
for i in range(49):
    cancer_PSO_1.evaluate_fitness(i, cancer_PSO_1.get_training_data(0))
print()
print()

# Velocity Update
cancer_PSO_1.update_velocity(0)
print()
print()

# Position Update
cancer_PSO_1.update_position(0)

pbest Calculation Demonstration:
Original pbest:
([array([[-0.34885189,  0.58646631, -0.48030552,  0.05468181, -0.70572225,
         1.24074491, -0.47441466, -1.24615936,  0.16336774]]), array([[1.87540473],
       [1.98335321]])], [array([[-0.07400044]]), array([[-0.7364713],
       [ 1.0497217]])])
Updated pbest:
([array([[-0.34885189,  0.58646631, -0.48030552,  0.05468181, -0.70572225,
         1.24074491, -0.47441466, -1.24615936,  0.16336774]]), array([[1.87540473],
       [1.98335321]])], [array([[-0.07400044]]), array([[-0.7364713],
       [ 1.0497217]])])




Velocity Update Demonstration:
Original Velocity:
([array([[0., 0., 0., 0., 0., 0., 0., 0., 0.]]), array([[0.],
       [0.]])], [array([[0.]]), array([[0.],
       [0.]])])
Updated Velocity:
([array([[ 0.08671293,  0.26793908, -0.00661794, -0.00085864,  0.21747755,
        -0.00404394, -0.31271041,  0.74882279, -0.68208358]]), array([[-0.08319638],
       [-0.17597   ]])], [array([[0.2361459]]), array([[0.23524331],
      

## Average Performance for 0,1, and 2 Hidden Layers on Cancer and Machine Datasets for Each Algorithm

In [690]:
# Task 5
print(f"Cancer Backpropogation Net w/ 0 Hidden Layers Average Performance:              {np.mean(cancer_back_0_score)}")
print(f"Cancer Backpropogation Net w/ 1 Hidden Layers Average Performance:              {np.mean(cancer_back_1_score)}")
print(f"Cancer Backpropogation Net w/ 2 Hidden Layers Average Performance:              {np.mean(cancer_back_2_score)}")
print()
print(f"Cancer Genetic Algorithm Net w/ 0 Hidden Layers Average Performance:            {np.mean(cancer_GA_0_score)}")
print(f"Cancer Genetic Algorithm Net w/ 1 Hidden Layers Average Performance:            {np.mean(cancer_GA_1_score)}")
print(f"Cancer Genetic Algorithm Net w/ 2 Hidden Layers Average Performance:            {np.mean(cancer_GA_2_score)}")
print()
print(f"Cancer Differential Evolution Net w/ 0 Hidden Layers Average Performance:       {np.mean(cancer_DE_0_score)}")
print(f"Cancer Differential Evolution Net w/ 1 Hidden Layers Average Performance:       {np.mean(cancer_DE_1_score)}")
print(f"Cancer Differential Evolution Net w/ 2 Hidden Layers Average Performance:       {np.mean(cancer_DE_2_score)}")
print()
print(f"Cancer Particle Swarm Optimization Net w/ 0 Hidden Layers Average Performance:  {np.mean(cancer_PSO_0_score)}")
print(f"Cancer Particle Swarm Optimization Net w/ 1 Hidden Layers Average Performance:  {np.mean(cancer_PSO_1_score)}")
print(f"Cancer Particle Swarm Optimization Net w/ 2 Hidden Layers Average Performance:  {np.mean(cancer_PSO_2_score)}")
print()
print()
print(f"Machine Backpropogation Net w/ 0 Hidden Layers Average Performance:             {np.mean(machine_back_0_score)}")
print(f"Machine Backpropogation Net w/ 1 Hidden Layers Average Performance:             {np.mean(machine_back_1_score)}")
print(f"Machine Backpropogation Net w/ 2 Hidden Layers Average Performance:             {np.mean(machine_back_2_score)}")
print()
print(f"Machine Genetic Algorithm Net w/ 0 Hidden Layers Average Performance:           {np.mean(machine_GA_0_score)}")
print(f"Machine Genetic Algorithm Net w/ 1 Hidden Layers Average Performance:           {np.mean(machine_GA_1_score)}")
print(f"Machine Genetic Algorithm Net w/ 2 Hidden Layers Average Performance:           {np.mean(machine_GA_2_score)}")
print()
print(f"Machine Differential Evolution Net w/ 0 Hidden Layers Average Performance:      {np.mean(machine_DE_0_score)}")
print(f"Machine Differential Evolution Net w/ 1 Hidden Layers Average Performance:      {np.mean(machine_DE_1_score)}")
print(f"Machine Differential Evolution Net w/ 2 Hidden Layers Average Performance:      {np.mean(machine_DE_2_score)}")
print()
print(f"Machine Particle Swarm Optimization Net w/ 0 Hidden Layers Average Performance: {np.mean(machine_PSO_0_score)}")
print(f"Machine Particle Swarm Optimization Net w/ 1 Hidden Layers Average Performance: {np.mean(machine_PSO_1_score)}")
print(f"Machine Particle Swarm Optimization Net w/ 2 Hidden Layers Average Performance: {np.mean(machine_PSO_2_score)}")

Cancer Backpropogation Net w/ 0 Hidden Layers Average Performance:              0.42910906298003065
Cancer Backpropogation Net w/ 1 Hidden Layers Average Performance:              0.9650281618023552
Cancer Backpropogation Net w/ 2 Hidden Layers Average Performance:              0.9682027649769583

Cancer Genetic Algorithm Net w/ 0 Hidden Layers Average Performance:            0.7550179211469533
Cancer Genetic Algorithm Net w/ 1 Hidden Layers Average Performance:            0.8219662058371735
Cancer Genetic Algorithm Net w/ 2 Hidden Layers Average Performance:            0.7252176139272912

Cancer Differential Evolution Net w/ 0 Hidden Layers Average Performance:       0.8490015360983103
Cancer Differential Evolution Net w/ 1 Hidden Layers Average Performance:       0.9634152585765487
Cancer Differential Evolution Net w/ 2 Hidden Layers Average Performance:       0.9666410650281618

Cancer Particle Swarm Optimization Net w/ 0 Hidden Layers Average Performance:  0.9666410650281618
Cancer