In [78]:
import numpy as np
import matplotlib.pyplot as plt
import wandb
import time

from keras.datasets import fashion_mnist

In [None]:
# Activation functions

def sigmoid(z):
    return 1.0 / (1 + np.exp(-(z)))
def sigmoid_derivative(z):
    return  (1.0 / (1 + np.exp(-(z))))*(1 -  1.0 / (1 + np.exp(-(z))))

def tanh(z):
    return np.tanh(z)
def tanh_derivative(z):
    return 1 - np.tanh(z) ** 2

def relu(z):
    return np.maximum(0.001, z) 
def relu_derivative(z):
    return (z>0)*1 + (z<0)*0.001 

In [None]:
class NeuralNetwork:
    def __init__(self, hidden_layers, hidden_neurons, train_images, train_labels, num_train, val_images,val_labels,num_val,test_images, test_labels, num_test,optimizer,batch_size,weight_decay,learning_rate,
    max_epochs,activation,initializer,loss_function):
        
        self.num_classes = np.max(train_labels) + 1
        self.hidden_layers = hidden_layers
        self.hidden_neurons = hidden_neurons
        self.output_size = self.num_classes
        self.img_height = train_images.shape[1]
        self.img_width = train_images.shape[2]
        self.input_size = self.img_height * self.img_width

        self.layer_structure = ([self.input_size]+ hidden_layers * [hidden_neurons]+ [self.output_size])
    

        self.num_train = num_train
        self.num_val = num_val
        self.num_test = num_test
        
        self.train_data = np.transpose(
            train_images.reshape(train_images.shape[0], -1)) / 255
        self.test_data = np.transpose(
            test_images.reshape(test_images.shape[0], -1)) / 255
        self.val_data = np.transpose(
            val_images.reshape(val_images.shape[0], -1)) / 255
        
        self.train_labels = self.one_hot_encode(train_labels)
        self.val_labels = self.one_hot_encode(val_labels)
        self.test_labels = self.one_hot_encode(test_labels)

        self.activation_functions = {"SIGMOID": sigmoid, "TANH": tanh, "RELU": relu}
        self.derivative_activations = {"SIGMOID": sigmoid_derivative,
                                       "TANH": tanh_derivative,
                                       "RELU": relu_derivative}


        self.initializers = {"XAVIER": self.xavier_initializer,"RANDOM": self.random_initializer}

        self.optimizers = {"SGD": self.sgd,    "MGD": self.mgd,
            "NGD": self.ngd,  "RMSPROP": self.rmsProp, "ADAM": self.adam}
        
    
        self.activation_func = self.activation_functions[activation]
        self.derivative_activations = self.derivative_activations[activation]
    
        self.optimizer = self.optimizers[optimizer]
        self.initializer_func = self.initializers[initializer]
        self.loss_function = loss_function
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        
        self.weights, self.biases = self.initialize_network(self.layer_structure)

    def one_hot_encode(self, labels):
        encoded_labels = np.zeros((self.num_classes, labels.shape[0]))
        for i in range(labels.shape[0]):
            encoded_labels[int(labels[i]), i] = 1.0
        return encoded_labels
     
    #  Loss functions
    def mean_squared_error(self, true_labels, predicted_labels):
        return np.mean((true_labels - predicted_labels) ** 2)

    def cross_entropy_loss(self, true_labels, predicted_labels):
        return np.mean([-true_labels[i] * np.log(predicted_labels[i]) for i in range(len(predicted_labels))])

    def l2_regularisation_loss(self, weight_decay):
        return weight_decay * np.sum([np.linalg.norm(self.weights[str(i + 1)]) ** 2 for i in range(len(self.weights))])
    

    def compute_accuracy(self, true_labels, predicted_labels, data_size):
        true_class_labels = [np.argmax(true_labels[:, i]) for i in range(data_size)]
        predicted_class_labels = [np.argmax(predicted_labels[:, i]) for i in range(data_size)]
        correct_predictions = sum(1 for i in range(data_size) if true_class_labels[i] == predicted_class_labels[i])
        return correct_predictions / data_size, true_class_labels, predicted_class_labels

    def xavier_initializer(self, size):
        in_dim, out_dim = size[1], size[0]
        std_dev = np.sqrt(2 / (in_dim + out_dim))
        return np.random.normal(0, std_dev, size=(out_dim, in_dim))

    def random_initializer(self, size):
        in_dim, out_dim = size[1], size[0]
        return np.random.normal(0, 1, size=(out_dim, in_dim))

        
    def initialize_network(self, layer_structure):
        weight_matrices = {}
        bias_vectors = {}
        total_layers = len(layer_structure)
        for i in range(0, total_layers - 1):
            weight_matrix = self.initializer_func(size=[layer_structure[i + 1], layer_structure[i]])
            bias_vector = np.zeros((layer_structure[i + 1], 1))
            weight_matrices[str(i + 1)] = weight_matrix
            bias_vectors[str(i + 1)] = bias_vector
        return weight_matrices, bias_vectors

    def forward_propagation(self, input_batch, weight_matrices, bias_vectors):
        """
        Returns the neural network output given input data, weights, and biases.
        Arguments:
                 : input_batch - input matrix
                 : weight_matrices - Weight matrices
                 : bias_vectors - Bias vectors 
        """
        num_layers = len(weight_matrices) + 1
        activation_outputs = {}
        pre_activations = {}
        activation_outputs["0"] = input_batch
        pre_activations["0"] = input_batch
        
        for i in range(0, num_layers - 2):
            if i == 0:
                W = weight_matrices[str(i + 1)]
                b = bias_vectors[str(i + 1)]
                pre_activations[str(i + 1)] = np.add(np.matmul(W, input_batch), b)
                activation_outputs[str(i + 1)] = self.activation_func(pre_activations[str(i + 1)])
            else:
                W = weight_matrices[str(i + 1)]
                b = bias_vectors[str(i + 1)]
                pre_activations[str(i + 1)] = np.add(np.matmul(W, activation_outputs[str(i)]), b)
                activation_outputs[str(i + 1)] = self.activation_func(pre_activations[str(i + 1)])

        W = weight_matrices[str(num_layers - 1)]
        b = bias_vectors[str(num_layers - 1)]
        pre_activations[str(num_layers - 1)] = np.add(np.matmul(W, activation_outputs[str(num_layers - 2)]), b)
        final_output = sigmoid(pre_activations[str(num_layers - 1)])
        activation_outputs[str(num_layers - 1)] = final_output
        return final_output, activation_outputs, pre_activations
    
    def back_propagation(self, predicted_output, activation_outputs, pre_activations, true_output, weight_decay=0):

        # Performs backpropagation to compute gradients of weights and biases.
    
        #  Arguments:
        # : predicted_output - Output of the neural network
        # : activation_outputs - Dictionary of activation outputs from forward propagation
        # : pre_activations - Dictionary of pre-activation values
        # : true_output - True labels
        # : weight_decay - Regularization parameter (default: 0)
        
        # Returns:
        # : - Gradients for weight matrices
        # : bias_gradients - Gradients for bias vectors 
        

        alpha = weight_decay
        weight_gradients= {}
        bias_gradients = {}
        num_layers = len(self.layer_structure)
    
    # Compute gradient of the output layer
        activation_gradients = {}
        if self.loss_function == "CROSS":
           activation_gradients[str(num_layers - 1)] = -(true_output - predicted_output)
        elif self.loss_function == "MSE":
            activation_gradients[str(num_layers - 1)] = np.multiply(
            2 * (predicted_output - true_output), np.multiply(predicted_output, (1 - predicted_output))
        )

    # Backpropagate through the layers
        for i in range(num_layers - 2, -1, -1):
            if alpha != 0:
              weight_gradients[str(i + 1)] = (
                np.outer(activation_gradients[str(i + 1)], activation_outputs[str(i)])
                + alpha * self.weights[str(i + 1)]
            )
            else:
              weight_gradients[str(i + 1)] = np.outer(activation_gradients[str(i + 1)], activation_outputs[str(i)])

            bias_gradients[str(i + 1)] = activation_gradients[str(i + 1)]

            if i != 0:
               hidden_gradient = np.matmul(self.weights[str(i + 1)].T, activation_gradients[str(i + 1)])

               activation_gradients[str(i)] = np.multiply(hidden_gradient, self.derivative_activations(pre_activations[str(i)]))
            #    activation_gradients[str(i)] = np.multiply(hidden_gradient, self.derivative_activations[int(i)](pre_activations[str(i)]))


    
            #    activation_gradients[str(i)] = np.multiply(hidden_gradient, self.derivative_activations[str(i)](pre_activations[str(i)]))


            else:
               hidden_gradient = np.matmul(self.weights[str(i + 1)].T, activation_gradients[str(i + 1)])
               activation_gradients[str(i)] = np.multiply(hidden_gradient, pre_activations[str(i)])

        return weight_gradients, bias_gradients
    
    
    def predict(self, input_batch, length_dataset):
        predictions = []        
    
        for i in range(length_dataset):
            final_output, activation_outputs, pre_activations = self.forward_propagation(
                input_batch[:, i].reshape(self.input_size, 1),
                self.weights,
                self.biases,
        )

            predictions.append(final_output.reshape(self.num_classes,))
    
        predictions = np.array(predictions).transpose()
        return predictions


    def sgd(self, epochs, length_dataset, learning_rate, weight_decay=0):

    # Implements Stochastic Gradient Descent (SGD) for training the neural network.
    
    # Arguments:
    #     : epochs - Number of training epochs
    #     : length_dataset - Number of training samples to use
    #     : learning_rate - Step size for weight updates
    #     : weight_decay - Regularization parameter (default: 0)
    
    # Returns:
    #     : training_loss - List of loss values over epochs
    #     : training_accuracy - List of training accuracies
    #     : validation_accuracy - List of validation accuracies
    #     : final_predictions - Model predictions after training
    

        training_loss = []
        training_accuracy = []
        validation_accuracy = []
    
        num_layers = len(self.layer_structure)

        X_train = self.train_data[:, :length_dataset]
        Y_train = self.train_labels[:, :length_dataset]

        for epoch in range(epochs):
            start_time = time.time()
        
            indices = np.arange(length_dataset)
            np.random.shuffle(indices)
            X_train = X_train[:, indices].reshape(self.input_size, length_dataset)
            Y_train = Y_train[:, indices].reshape(self.num_classes, length_dataset)
        
            batch_loss = []
        
            weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
            bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            for i in range(length_dataset):
                output, activation_outputs, pre_activations = self.forward_propagation(
                    X_train[:, i].reshape(self.input_size, 1),
                    self.weights,
                    self.biases,
            )
                weight_gradients, bias_gradients = self.back_propagation(
                    output, activation_outputs, pre_activations, Y_train[:, i].reshape(self.num_classes, 1), weight_decay
            )

                for l in range(num_layers - 1):
                    weight_updates[str(l + 1)] = weight_gradients[str(l + 1)]
                    bias_updates[str(l + 1)] = bias_gradients[str(l + 1)]

                if self.loss_function == "MSE":
                    batch_loss.append(
                       self.mean_squared_error(Y_train[:, i].reshape(self.num_classes, 1), output)
                         + self.l2_regularisation_loss(weight_decay)
                )
                elif self.loss_function == "CROSS":
                    batch_loss.append(
                    self.cross_entropy_loss(Y_train[:, i].reshape(self.num_classes, 1), output)
                    + self.l2_regularisation_loss(weight_decay)
                )

                self.weights = {str(l + 1): (self.weights[str(l + 1)] - learning_rate * weight_updates[str(l + 1)])
                            for l in range(len(self.weights))}
                self.biases = {str(l + 1): (self.biases[str(l + 1)] - learning_rate * bias_updates[str(l + 1)])
                           for l in range(len(self.biases))}

            elapsed_time = time.time() - start_time
        
            predictions = self.predict(self.train_data, self.num_train)
        
            training_loss.append(np.mean(batch_loss))
            training_accuracy.append(self.compute_accuracy(Y_train, predictions, length_dataset)[0])
            validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
        
            print(
                "Epoch: %d, Loss: %.3e, Training Accuracy: %.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                % (
                epoch,
                training_loss[epoch],
                training_accuracy[epoch],
                validation_accuracy[epoch],
                elapsed_time,
                learning_rate,
            )
        )

            wandb.log({'loss': np.mean(batch_loss),
                   'training_accuracy': training_accuracy[epoch],
                   'validation_accuracy': validation_accuracy[epoch],
                   'epoch': epoch})

        return training_loss, training_accuracy, validation_accuracy, predictions

   
    def mgd(self, epochs, length_dataset, batch_size, learning_rate, weight_decay=0):
    
    # Implements Mini-Batch Gradient Descent (MGD) for training the neural network.

    # Arguments:
    #     epochs (int): Number of training epochs.
    #     length_dataset (int): Number of training samples to use.
    #     batch_size (int): Mini-batch size for gradient updates.
    #     learning_rate (float): Step size for weight updates.
    #     weight_decay (float, optional): Regularization parameter (default: 0).

    # Returns:
    #     tuple: (training_loss, training_accuracy, validation_accuracy, final_predictions)
      
    
        MOMENTUM = 0.9
        training_loss = []
        training_accuracy = []
        validation_accuracy = []

        num_layers = len(self.layer_structure)

        X_train = self.train_data[:, :length_dataset]
        Y_train = self.train_labels[:, :length_dataset]

        prev_velocity_w = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
        prev_velocity_b = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

        num_points_seen = 0

        for epoch in range(epochs):
            start_time = time.time()

            indices = np.arange(length_dataset)
            np.random.shuffle(indices)
            X_train = X_train[:, indices].reshape(self.input_size, length_dataset)
            Y_train = Y_train[:, indices].reshape(self.num_classes, length_dataset)

            batch_loss = []

            weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
            bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            for i in range(length_dataset):
                output, activation_outputs, pre_activations = self.forward_propagation(
                    X_train[:, i].reshape(self.input_size, 1),
                    self.weights,
                    self.biases,
            )
                weight_gradients, bias_gradients = self.back_propagation(
                    output, activation_outputs, pre_activations, Y_train[:, i].reshape(self.num_classes, 1)
            )

                for l in range(num_layers - 1):
                    weight_updates[str(l + 1)] += weight_gradients[str(l + 1)]
                    bias_updates[str(l + 1)] += bias_gradients[str(l + 1)]

                if self.loss_function == "MSE":
                    batch_loss.append(
                        self.mean_squared_error(Y_train[:, i].reshape(self.num_classes, 1), output)
                        + self.l2_regularisation_loss(weight_decay)
                )
                elif self.loss_function == "CROSS":
                    batch_loss.append(
                        self.cross_entropy_loss(Y_train[:, i].reshape(self.num_classes, 1), output)
                        + self.l2_regularisation_loss(weight_decay)
                )

                num_points_seen += 1

                if num_points_seen % batch_size == 0:
                    velocity_w = {
                       str(l + 1): MOMENTUM * prev_velocity_w[str(l + 1)] + learning_rate * weight_updates[str(l + 1)] / batch_size
                       for l in range(num_layers - 1)
                }
                    velocity_b = {
                       str(l + 1): MOMENTUM * prev_velocity_b[str(l + 1)] + learning_rate * bias_updates[str(l + 1)] / batch_size
                        for l in range(num_layers - 1)
                }

                    self.weights = {str(l + 1): self.weights[str(l + 1)] - velocity_w[str(l + 1)] for l in range(num_layers - 1)}
                    self.biases = {str(l + 1): self.biases[str(l + 1)] - velocity_b[str(l + 1)] for l in range(num_layers - 1)}

                    prev_velocity_w = velocity_w
                    prev_velocity_b = velocity_b

                    # Reset batch gradients
                    weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
                    bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            elapsed_time = time.time() - start_time

            predictions = self.predict(self.train_data, self.num_train)

            training_loss.append(np.mean(batch_loss))
            training_accuracy.append(self.compute_accuracy(Y_train, predictions, length_dataset)[0])
            validation_accuracy.append(self.compute_accuracy(self.test_labels, self.predict(self.test_data, self.num_test), self.num_test)[0])

            print(
            "Epoch: %d, Loss: %.3e, Training Accuracy: %.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
            % (
                epoch,
                training_loss[epoch],
                training_accuracy[epoch],
                validation_accuracy[epoch],
                elapsed_time,
                learning_rate,
            )
        )

            wandb.log({
            'loss': np.mean(batch_loss),
            'training_accuracy': training_accuracy[epoch],
            'validation_accuracy': validation_accuracy[epoch],
            'epoch': epoch
        })

        return training_loss, training_accuracy, validation_accuracy, predictions
    
    def ngd(self, epochs, length_dataset, batch_size, learning_rate, weight_decay=0):
    
    # Implements Nesterov Accelerated Gradient (NGD) for training the neural network.

    # Arguments:
    #     epochs (int): Number of training epochs.
    #     length_dataset (int): Number of training samples to use.
    #     batch_size (int): Mini-batch size for gradient updates.
    #     learning_rate (float): Step size for weight updates.
    #     weight_decay (float, optional): Regularization parameter (default: 0).

    # Returns:
    #     tuple: (training_loss, training_accuracy, validation_accuracy, final_predictions)
    
        GAMMA = 0.9
    
        X_train = self.train_data[:, :length_dataset]
        Y_train = self.train_labels[:, :length_dataset]

        training_loss = []
        training_accuracy = []
        validation_accuracy = []
    
        num_layers = len(self.layer_structure)
    
        prev_velocity_w = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
        prev_velocity_b = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}
    
        num_points_seen = 0
        for epoch in range(epochs):
            start_time = time.time()
        
            indices = np.arange(length_dataset)
            np.random.shuffle(indices)
            X_train = X_train[:, indices].reshape(self.input_size, length_dataset)
            Y_train = Y_train[:, indices].reshape(self.num_classes, length_dataset)

            batch_loss = []
        
            weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
            bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}
        
            velocity_w = {str(l + 1): GAMMA * prev_velocity_w[str(l + 1)] for l in range(num_layers - 1)}
            velocity_b = {str(l + 1): GAMMA * prev_velocity_b[str(l + 1)] for l in range(num_layers - 1)}
        
            for i in range(length_dataset):
                winter = {str(l + 1): self.weights[str(l + 1)] - velocity_w[str(l + 1)] for l in range(num_layers - 1)}
                binter = {str(l + 1): self.biases[str(l + 1)] - velocity_b[str(l + 1)] for l in range(num_layers - 1)}
            
                output, activation_outputs, pre_activations = self.forward_propagation(
                    X_train[:, i].reshape(self.input_size, 1), winter, binter)
                weight_gradients, bias_gradients = self.back_propagation(
                    output, activation_outputs, pre_activations, Y_train[:, i].reshape(self.num_classes, 1))
            
                for l in range(num_layers - 1):
                    weight_updates[str(l + 1)] += weight_gradients[str(l + 1)]
                    bias_updates[str(l + 1)] += bias_gradients[str(l + 1)]
            
                if self.loss_function == "MSE":
                    batch_loss.append(
                    self.mean_squared_error(Y_train[:, i].reshape(self.num_classes, 1), output)
                    + self.l2_regularisation_loss(weight_decay)
                )
                elif self.loss_function == "CROSS":
                    batch_loss.append(
                    self.cross_entropy_loss(Y_train[:, i].reshape(self.num_classes, 1), output)
                    + self.l2_regularisation_loss(weight_decay)
                )
            
                num_points_seen += 1
            
                if num_points_seen % batch_size == 0:
                    velocity_w = {
                        str(l + 1): GAMMA * prev_velocity_w[str(l + 1)] + learning_rate * weight_updates[str(l + 1)] / batch_size
                        for l in range(num_layers - 1)
                }
                velocity_b = {
                    str(l + 1): GAMMA * prev_velocity_b[str(l + 1)] + learning_rate * bias_updates[str(l + 1)] / batch_size
                    for l in range(num_layers - 1)
                }
                
                self.weights = {str(l + 1): self.weights[str(l + 1)] - velocity_w[str(l + 1)] for l in range(num_layers - 1)}
                self.biases = {str(l + 1): self.biases[str(l + 1)] - velocity_b[str(l + 1)] for l in range(num_layers - 1)}
                
                prev_velocity_w = velocity_w
                prev_velocity_b = velocity_b
                
                weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
                bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}
        
            elapsed_time = time.time() - start_time
            predictions = self.predict(self.train_data, self.num_train)
        
            training_loss.append(np.mean(batch_loss))
            training_accuracy.append(self.compute_accuracy(Y_train, predictions, length_dataset)[0])
            validation_accuracy.append(self.compute_accuracy(self.test_labels, self.predict(self.     test_data, self.num_test), self.num_test)[0])
        
            print(
            "Epoch: %d, Loss: %.3e, Training Accuracy: %.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
            % (
                epoch,
                training_loss[epoch],
                training_accuracy[epoch],
                validation_accuracy[epoch],
                elapsed_time,
                learning_rate
            )
        )
        
            wandb.log({
            'loss': np.mean(batch_loss),
            'training_accuracy': training_accuracy[epoch],
            'validation_accuracy': validation_accuracy[epoch],
            'epoch': epoch
        })
        
        return training_loss, training_accuracy, validation_accuracy, predictions

    
    def rmsProp(self, epochs, length_dataset, batch_size, learning_rate, weight_decay=0): 
    
    # Implements RMSProp optimizer for training the neural network.

    # Arguments:
    #     epochs (int): Number of training epochs.
    #     length_dataset (int): Number of training samples to use.
    #     batch_size (int): Mini-batch size for gradient updates.
    #     learning_rate (float): Step size for weight updates.
    #     weight_decay (float, optional): Regularization parameter (default: 0).

    # Returns:
    #     tuple: (training_loss, training_accuracy, validation_accuracy, final_predictions)
    

        EPSILON = 1e-8
        BETA = 0.9

        X_train = self.train_data[:, :length_dataset]
        Y_train = self.train_labels[:, :length_dataset]

        training_loss = []
        training_accuracy = []
        validation_accuracy = []
    
        num_layers = len(self.layer_structure)

        v_w = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
        v_b = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

        num_points_seen = 0
        for epoch in range(epochs):
            start_time = time.time()

            indices = np.arange(length_dataset)
            np.random.shuffle(indices)
            X_train = X_train[:, indices].reshape(self.input_size, length_dataset)
            Y_train = Y_train[:, indices].reshape(self.num_classes, length_dataset)

            batch_loss = []

            weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
            bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            for i in range(length_dataset):
                output, activation_outputs, pre_activations = self.forward_propagation(
                    X_train[:, i].reshape(self.input_size, 1), self.weights, self.biases)
                
                weight_gradients, bias_gradients = self.back_propagation(
                    output, activation_outputs, pre_activations, Y_train[:, i].reshape(self.num_classes, 1))

                for l in range(num_layers - 1):
                    weight_updates[str(l + 1)] += weight_gradients[str(l + 1)]
                    bias_updates[str(l + 1)] += bias_gradients[str(l + 1)]

                if self.loss_function == "MSE":
                    batch_loss.append(
                    self.mean_squared_error(Y_train[:, i].reshape(self.num_classes, 1), output)
                    + self.l2_regularisation_loss(weight_decay)
                )
                elif self.loss_function == "CROSS":
                    batch_loss.append(
                    self.cross_entropy_loss(Y_train[:, i].reshape(self.num_classes, 1), output)
                    + self.l2_regularisation_loss(weight_decay)
                )

                num_points_seen += 1

                if num_points_seen % batch_size == 0:
                    v_w = {
                        str(l + 1): BETA * v_w[str(l + 1)] + (1 - BETA) * (weight_updates[str(l + 1)] / batch_size) ** 2
                        for l in range(num_layers - 1)
                }
                    v_b = {
                        str(l + 1): BETA * v_b[str(l + 1)] + (1 - BETA) * (bias_updates[str(l + 1)] / batch_size) ** 2
                        for l in range(num_layers - 1)
                }

                    self.weights = {
                        str(l + 1): self.weights[str(l + 1)] - (learning_rate / np.sqrt(v_w[str(l + 1)] + EPSILON)) * weight_updates[str(l + 1)] / batch_size
                        for l in range(num_layers - 1)
                }
                    self.biases = {
                        str(l + 1): self.biases[str(l + 1)] - (learning_rate / np.sqrt(v_b[str(l + 1)] + EPSILON)) * bias_updates[str(l + 1)] / batch_size
                        for l in range(num_layers - 1)
                }

                    weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
                    bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            elapsed_time = time.time() - start_time
            predictions = self.predict(self.train_data, self.num_train)

            training_loss.append(np.mean(batch_loss))
            training_accuracy.append(self.compute_accuracy(Y_train, predictions, length_dataset)[0])
            validation_accuracy.append(self.compute_accuracy(self.test_labels, self.predict(self.test_data, self.num_test), self.num_test)[0])

            print(
            "Epoch: %d, Loss: %.3e, Training Accuracy: %.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
            % (
                epoch,
                training_loss[epoch],
                training_accuracy[epoch],
                validation_accuracy[epoch],
                elapsed_time,
                learning_rate
            )
                )

            wandb.log({
            'loss': np.mean(batch_loss),
            'training_accuracy': training_accuracy[epoch],
            'validation_accuracy': validation_accuracy[epoch],
            'epoch': epoch
               })

        return training_loss, training_accuracy, validation_accuracy, predictions
    
    def adam(self, epochs, length_dataset, batch_size, learning_rate, weight_decay=0):
        """
        Implements the Adam optimizer for training the neural network.

        Arguments:
            epochs (int): Number of training epochs.
            length_dataset (int): Number of training samples to use.
            batch_size (int): Mini-batch size for gradient updates.
            learning_rate (float): Step size for weight updates.
            weight_decay (float, optional): Regularization parameter (default: 0).

        Returns:
            tuple: (training_loss, training_accuracy, validation_accuracy, final_predictions)
        """
        
        EPSILON = 1e-8
        BETA1, BETA2 = 0.9, 0.99

        X_train = self.train_data[:, :length_dataset]
        Y_train = self.train_labels[:, :length_dataset]

        training_loss = []
        training_accuracy = []
        validation_accuracy = []
        
        num_layers = len(self.layer_structure)

        # Initialize first and second moment estimates
        m_w = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
        m_b = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

        v_w = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
        v_b = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

        num_points_seen = 0
        for epoch in range(epochs):
            start_time = time.time()

            indices = np.arange(length_dataset)
            np.random.shuffle(indices)
            X_train = X_train[:, indices].reshape(self.input_size, length_dataset)
            Y_train = Y_train[:, indices].reshape(self.num_classes, length_dataset)

            batch_loss = []

            weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
            bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            for i in range(length_dataset):
                output, activation_outputs, pre_activations = self.forward_propagation(
                    X_train[:, i].reshape(self.input_size, 1), self.weights, self.biases)
                weight_gradients, bias_gradients = self.back_propagation(
                    output, activation_outputs, pre_activations, Y_train[:, i].reshape(self.num_classes, 1))

                for l in range(num_layers - 1):
                    weight_updates[str(l + 1)] += weight_gradients[str(l + 1)]
                    bias_updates[str(l + 1)] += bias_gradients[str(l + 1)]

                if self.loss_function == "MSE":
                    batch_loss.append(
                        self.mean_squared_error(Y_train[:, i].reshape(self.num_classes, 1), output)
                        + self.l2_regularisation_loss(weight_decay)
                    )
                elif self.loss_function == "CROSS":
                    batch_loss.append(
                        self.cross_entropy_loss(Y_train[:, i].reshape(self.num_classes, 1), output)
                        + self.l2_regularisation_loss(weight_decay)
                    )

                num_points_seen += 1

                if num_points_seen % batch_size == 0:
                    # Compute biased first and second moment estimates
                    m_w = {str(l + 1): BETA1 * m_w[str(l + 1)] + (1 - BETA1) * (weight_updates[str(l + 1)] / batch_size)
                        for l in range(num_layers - 1)}
                    m_b = {str(l + 1): BETA1 * m_b[str(l + 1)] + (1 - BETA1) * (bias_updates[str(l + 1)] / batch_size)
                        for l in range(num_layers - 1)}

                    v_w = {str(l + 1): BETA2 * v_w[str(l + 1)] + (1 - BETA2) * (weight_updates[str(l + 1)] / batch_size) ** 2
                        for l in range(num_layers - 1)}
                    v_b = {str(l + 1): BETA2 * v_b[str(l + 1)] + (1 - BETA2) * (bias_updates[str(l + 1)] / batch_size) ** 2
                        for l in range(num_layers - 1)}

                    # Bias correction
                    m_w_hat = {str(l + 1): m_w[str(l + 1)] / (1 - BETA1 ** (epoch + 1)) for l in range(num_layers - 1)}
                    m_b_hat = {str(l + 1): m_b[str(l + 1)] / (1 - BETA1 ** (epoch + 1)) for l in range(num_layers - 1)}

                    v_w_hat = {str(l + 1): v_w[str(l + 1)] / (1 - BETA2 ** (epoch + 1)) for l in range(num_layers - 1)}
                    v_b_hat = {str(l + 1): v_b[str(l + 1)] / (1 - BETA2 ** (epoch + 1)) for l in range(num_layers - 1)}

                    # Update weights and biases
                    self.weights = {
                        str(l + 1): self.weights[str(l + 1)] - (learning_rate / (np.sqrt(v_w_hat[str(l + 1)] + EPSILON))) * m_w_hat[str(l + 1)]
                        for l in range(num_layers - 1)
                    }
                    self.biases = {
                        str(l + 1): self.biases[str(l + 1)] - (learning_rate / (np.sqrt(v_b_hat[str(l + 1)] + EPSILON))) * m_b_hat[str(l + 1)]
                        for l in range(num_layers - 1)
                    }

                    weight_updates = {str(l + 1): np.zeros_like(self.weights[str(l + 1)]) for l in range(num_layers - 1)}
                    bias_updates = {str(l + 1): np.zeros_like(self.biases[str(l + 1)]) for l in range(num_layers - 1)}

            elapsed_time = time.time() - start_time
            predictions = self.predict(self.train_data, self.num_train)

            training_loss.append(np.mean(batch_loss))
            training_accuracy.append(self.compute_accuracy(Y_train, predictions, length_dataset)[0])
            validation_accuracy.append(self.compute_accuracy(self.test_labels, self.predict(self.test_data, self.num_test), self.num_test)[0])

            print(
                "Epoch: %d, Loss: %.3e, Training Accuracy: %.2f, Validation Accuracy: %.2f, Time: %.2f, Learning Rate: %.3e"
                % (
                    epoch,
                    training_loss[epoch],
                    training_accuracy[epoch],
                    validation_accuracy[epoch],
                    elapsed_time,
                    learning_rate,
                )
            )

            wandb.log({
                'loss': np.mean(batch_loss),
                'training_accuracy': training_accuracy[epoch],
                'validation_accuracy': validation_accuracy[epoch],
                'epoch': epoch
            })

        return training_loss, training_accuracy, validation_accuracy, predictions

In [89]:
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# Define dataset sizes
total_train_samples = train_labels.shape[0]
train_size = int(0.9 * total_train_samples)  # 90% for training
validation_size = total_train_samples - train_size  # 10% for validation
test_size = test_labels.shape[0]

# Shuffle indices to randomize the dataset
train_indices = np.random.permutation(total_train_samples)

# Split dataset into training, validation, and test sets
shuffled_train_images = train_images[train_indices]
shuffled_train_labels = train_labels[train_indices]

X_train = shuffled_train_images[:train_size, :]
Y_train = shuffled_train_labels[:train_size]

X_valid = shuffled_train_images[train_size:, :]
Y_valid = shuffled_train_labels[train_size:]

X_test = test_images
Y_test = test_labels

# Define sweep configuration for hyperparameter tuning
sweep_config = {
    "name": "Bayes_Hyperparam_Tuning",
    "method": "bayes",
    "metric": {
        "name": "val_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "epochs": {
            "values": [5, 10]
        },
        "init_method": {
            "values": ["RANDOM", "XAVIER"]
        },
        "layers": {
            "values": [2, 3, 4]
        },
        "hidden_neurons": {
            "values": [32, 64, 128]
        },
        "activation_func": {
            "values": ['TANH', 'SIGMOID', 'RELU']
        },
        "learning_rate": {
            "values": [0.001, 0.0001]
        },
        "weight_decay": {
            "values": [0, 0.0005, 0.5]
        },
        "optimizer": {
            "values": ["SGD", "MGD", "NGD", "RMSPROP", "ADAM"]
        },
        "batch_size": {
            "values": [16, 32, 64]
        }
    }
}

# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project='Fashion_MNIST_Images', entity='singhsonalika5-indian-institute-of-technology-madras')


Create sweep with ID: r7vljbht
Sweep URL: https://wandb.ai/singhsonalika5-indian-institute-of-technology-madras/Fashion_MNIST_Images/sweeps/r7vljbht


In [90]:
def train():    
        config_defaults = dict(
            max_epochs=5,
            hidden_layers=3,
            hidden_neurons=32,
            weight_decay=0,
            learning_rate=1e-3,
            optimizer="MGD",
            batch_size=16,
            activation="TANH",
            initializer="XAVIER",
            loss_function="CROSS",
        )
        
        wandb.init(config=config_defaults)
    
        wandb.run.name = "hl_" + str(wandb.config.hidden_layers) + "_hn_" + str(wandb.config.hidden_neurons) + "_opt_" + wandb.config.optimizer + "_act_" + wandb.config.activation + "_lr_" + str(wandb.config.learning_rate) + "_bs_"+str(wandb.config.batch_size) + "_init_" + wandb.config.initializer + "_ep_"+ str(wandb.config.max_epochs)+ "_l2_" + str(wandb.config.weight_decay) 
    
        CONFIG = wandb.config

        NN = NeuralNetwork(
        hidden_layers=CONFIG.hidden_layers,
        hidden_neurons=CONFIG.hidden_neurons,
        train_images=train_images,
        train_labels=train_labels,
        num_train=train_size,
        val_images=X_valid,
        val_labels=Y_valid,
        num_val=validation_size,
        test_images=X_test,
        test_labels=Y_test,
        num_test=test_size,
        optimizer=CONFIG.optimizer,
        batch_size=CONFIG.batch_size,
        weight_decay=CONFIG.weight_decay,
        learning_rate=CONFIG.learning_rate,
        max_epochs=CONFIG.max_epochs,
        activation=CONFIG.activation,
        initializer=CONFIG.initializer,
        loss_function=CONFIG.loss_function
    )

        training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
        NN.max_epochs, NN.num_train, NN.batch_size, NN.learning_rate)
    


In [92]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: h9xlu2c8 with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	weight_decay: 0


  return 1.0 / (1 + np.exp(-(z)))
  return np.mean([-true_labels[i] * np.log(predicted_labels[i]) for i in range(len(predicted_labels))])
  return np.mean([-true_labels[i] * np.log(predicted_labels[i]) for i in range(len(predicted_labels))])


Run h9xlu2c8 errored:
Traceback (most recent call last):
  File "c:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\1481853125.py", line 43, in train
    training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
                                                                         ^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 293, in sgd
    validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
                                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 207, in predict
    input_batch[:, i].reshape(self.input_size, 1),
    ~~~~~~~~~~~^^^^^^
TypeError: 'int' object is 

Epoch: 0, Loss: 1.443e-01, Training Accuracy: 0.10, Validation Accuracy: 0.65, Time: 124.93, Learning Rate: 1.000e-04
Epoch: 1, Loss: 9.684e-02, Training Accuracy: 0.10, Validation Accuracy: 0.71, Time: 134.10, Learning Rate: 1.000e-04
Epoch: 2, Loss: 8.019e-02, Training Accuracy: 0.10, Validation Accuracy: 0.74, Time: 137.70, Learning Rate: 1.000e-04
Epoch: 3, Loss: 7.132e-02, Training Accuracy: 0.10, Validation Accuracy: 0.76, Time: 165.98, Learning Rate: 1.000e-04
Epoch: 4, Loss: 6.587e-02, Training Accuracy: 0.10, Validation Accuracy: 0.77, Time: 150.46, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▄▂▁▁
training_accuracy,▄▆█▁▃
validation_accuracy,▁▄▇██

0,1
epoch,4.0
loss,0.06587
training_accuracy,0.09998
validation_accuracy,0.767


[34m[1mwandb[0m: Agent Starting Run: 9nuzieeq with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: RMSPROP
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 6.220e-02, Training Accuracy: 0.10, Validation Accuracy: 0.83, Time: 47.92, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.162e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 49.91, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.768e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 49.78, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.568e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 47.83, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.418e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 48.55, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,█▃▅▁▂
validation_accuracy,▁▆▇▇█

0,1
epoch,4.0
loss,0.03418
training_accuracy,0.09954
validation_accuracy,0.8675


[34m[1mwandb[0m: Agent Starting Run: 6b9jufm3 with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	weight_decay: 0.0005


Run 6b9jufm3 errored:
Traceback (most recent call last):
  File "c:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\1481853125.py", line 43, in train
    training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
                                                                         ^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 293, in sgd
    validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
                                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 207, in predict
    input_batch[:, i].reshape(self.input_size, 1),
    ~~~~~~~~~~~^^^^^^
TypeError: 'int' object is 

Epoch: 0, Loss: 5.308e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 46.59, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.939e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 47.77, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.587e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 46.74, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.378e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 46.42, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.217e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 45.95, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▂▁
training_accuracy,▆▁▄█▄
validation_accuracy,▂▁▃▆█

0,1
epoch,4.0
loss,0.03217
training_accuracy,0.09852
validation_accuracy,0.8704


[34m[1mwandb[0m: Agent Starting Run: 7mgp9ez2 with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 2
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: NGD
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 8.663e-02, Training Accuracy: 0.10, Validation Accuracy: 0.72, Time: 111.32, Learning Rate: 1.000e-03
Epoch: 1, Loss: 6.372e-02, Training Accuracy: 0.10, Validation Accuracy: 0.78, Time: 135.12, Learning Rate: 1.000e-03
Epoch: 2, Loss: 5.872e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 148.07, Learning Rate: 1.000e-03
Epoch: 3, Loss: 5.530e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 134.99, Learning Rate: 1.000e-03
Epoch: 4, Loss: 5.310e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 142.53, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,█▇▆▅▁
validation_accuracy,▁▆███

0,1
epoch,4.0
loss,0.0531
training_accuracy,0.09846
validation_accuracy,0.8075


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: avb8has3 with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 64
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: NGD
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 8.981e-02, Training Accuracy: 0.10, Validation Accuracy: 0.71, Time: 76.79, Learning Rate: 1.000e-03
Epoch: 1, Loss: 6.455e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 75.32, Learning Rate: 1.000e-03
Epoch: 2, Loss: 5.965e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 76.04, Learning Rate: 1.000e-03
Epoch: 3, Loss: 5.627e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 78.93, Learning Rate: 1.000e-03
Epoch: 4, Loss: 5.473e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 76.19, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▃▄▇█▁
validation_accuracy,▁▇▇█▇

0,1
epoch,4.0
loss,0.05473
training_accuracy,0.09976
validation_accuracy,0.8019


[34m[1mwandb[0m: Agent Starting Run: yfnikiuz with config:
[34m[1mwandb[0m: 	activation_func: TANH
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: MGD
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 1.387e-01, Training Accuracy: 0.10, Validation Accuracy: 0.68, Time: 47.01, Learning Rate: 1.000e-03
Epoch: 1, Loss: 8.554e-02, Training Accuracy: 0.10, Validation Accuracy: 0.74, Time: 39.94, Learning Rate: 1.000e-03
Epoch: 2, Loss: 6.746e-02, Training Accuracy: 0.10, Validation Accuracy: 0.78, Time: 40.91, Learning Rate: 1.000e-03
Epoch: 3, Loss: 5.868e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 39.87, Learning Rate: 1.000e-03
Epoch: 4, Loss: 5.312e-02, Training Accuracy: 0.10, Validation Accuracy: 0.82, Time: 40.19, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▄▂▁▁
training_accuracy,▁▅▃▄█
validation_accuracy,▁▄▆▇█

0,1
epoch,4.0
loss,0.05312
training_accuracy,0.10278
validation_accuracy,0.8179


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0ak06evn with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 2
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: MGD
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 1.226e-01, Training Accuracy: 0.10, Validation Accuracy: 0.73, Time: 99.64, Learning Rate: 1.000e-04
Epoch: 1, Loss: 7.981e-02, Training Accuracy: 0.10, Validation Accuracy: 0.77, Time: 94.88, Learning Rate: 1.000e-04
Epoch: 2, Loss: 6.652e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 98.81, Learning Rate: 1.000e-04
Epoch: 3, Loss: 5.954e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 90.73, Learning Rate: 1.000e-04
Epoch: 4, Loss: 5.522e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 89.91, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▄▂▁▁
training_accuracy,▃▇▅▁█
validation_accuracy,▁▄▆▇█

0,1
epoch,4.0
loss,0.05522
training_accuracy,0.10124
validation_accuracy,0.8131


[34m[1mwandb[0m: Agent Starting Run: p739jje9 with config:
[34m[1mwandb[0m: 	activation_func: TANH
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 7.020e-02, Training Accuracy: 0.10, Validation Accuracy: 0.82, Time: 104.86, Learning Rate: 1.000e-04
Epoch: 1, Loss: 4.611e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 101.49, Learning Rate: 1.000e-04
Epoch: 2, Loss: 4.208e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 96.37, Learning Rate: 1.000e-04
Epoch: 3, Loss: 3.979e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 101.67, Learning Rate: 1.000e-04
Epoch: 4, Loss: 3.827e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 110.41, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▆█▄▁▇
validation_accuracy,▁▅▆▇█

0,1
epoch,4.0
loss,0.03827
training_accuracy,0.10131
validation_accuracy,0.855


In [93]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: 5wyo9bym with config:
[34m[1mwandb[0m: 	activation_func: TANH
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 64
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 2
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: NGD
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 1.750e-01, Training Accuracy: 0.10, Validation Accuracy: 0.50, Time: 71.67, Learning Rate: 1.000e-04
Epoch: 1, Loss: 1.588e-01, Training Accuracy: 0.10, Validation Accuracy: 0.52, Time: 69.59, Learning Rate: 1.000e-04
Epoch: 2, Loss: 1.342e-01, Training Accuracy: 0.10, Validation Accuracy: 0.60, Time: 70.38, Learning Rate: 1.000e-04
Epoch: 3, Loss: 1.174e-01, Training Accuracy: 0.10, Validation Accuracy: 0.62, Time: 71.43, Learning Rate: 1.000e-04
Epoch: 4, Loss: 1.078e-01, Training Accuracy: 0.10, Validation Accuracy: 0.64, Time: 70.29, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▆▄▂▁
training_accuracy,▇█▃▁█
validation_accuracy,▁▂▆▇█

0,1
epoch,4.0
loss,0.10782
training_accuracy,0.10085
validation_accuracy,0.6371


[34m[1mwandb[0m: Agent Starting Run: 7idvgeel with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: MGD
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 8.946e-02, Training Accuracy: 0.10, Validation Accuracy: 0.78, Time: 47.58, Learning Rate: 1.000e-03
Epoch: 1, Loss: 5.188e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 43.25, Learning Rate: 1.000e-03
Epoch: 2, Loss: 4.458e-02, Training Accuracy: 0.10, Validation Accuracy: 0.83, Time: 43.71, Learning Rate: 1.000e-03
Epoch: 3, Loss: 4.120e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 43.00, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.926e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 42.22, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▇▇▃▁█
validation_accuracy,▁▆▅▇█

0,1
epoch,4.0
loss,0.03926
training_accuracy,0.10037
validation_accuracy,0.8524


[34m[1mwandb[0m: Agent Starting Run: emcy8prw with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 1.035e-01, Training Accuracy: 0.10, Validation Accuracy: 0.77, Time: 47.69, Learning Rate: 1.000e-04
Epoch: 1, Loss: 6.159e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 44.02, Learning Rate: 1.000e-04
Epoch: 2, Loss: 5.214e-02, Training Accuracy: 0.10, Validation Accuracy: 0.82, Time: 46.45, Learning Rate: 1.000e-04
Epoch: 3, Loss: 4.751e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 44.22, Learning Rate: 1.000e-04
Epoch: 4, Loss: 4.481e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 44.06, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▅▃█▁▆
validation_accuracy,▁▅▇██

0,1
epoch,4.0
loss,0.04481
training_accuracy,0.10044
validation_accuracy,0.8397


[34m[1mwandb[0m: Agent Starting Run: jsqjc44e with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 64
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 2
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: MGD
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 1.317e-01, Training Accuracy: 0.10, Validation Accuracy: 0.67, Time: 54.71, Learning Rate: 1.000e-04
Epoch: 1, Loss: 9.101e-02, Training Accuracy: 0.10, Validation Accuracy: 0.73, Time: 47.59, Learning Rate: 1.000e-04
Epoch: 2, Loss: 7.552e-02, Training Accuracy: 0.10, Validation Accuracy: 0.76, Time: 50.01, Learning Rate: 1.000e-04
Epoch: 3, Loss: 6.662e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 49.27, Learning Rate: 1.000e-04
Epoch: 4, Loss: 6.076e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 50.03, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▄▂▂▁
training_accuracy,▅▆▁█▄
validation_accuracy,▁▄▆▇█

0,1
epoch,4.0
loss,0.06076
training_accuracy,0.10026
validation_accuracy,0.8018


[34m[1mwandb[0m: Agent Starting Run: gp78i780 with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	weight_decay: 0.5


  return 1.0 / (1 + np.exp(-(z)))
  return np.mean([-true_labels[i] * np.log(predicted_labels[i]) for i in range(len(predicted_labels))])
  return np.mean([-true_labels[i] * np.log(predicted_labels[i]) for i in range(len(predicted_labels))])


Run gp78i780 errored:
Traceback (most recent call last):
  File "c:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\1481853125.py", line 43, in train
    training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
                                                                         ^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 293, in sgd
    validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
                                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 207, in predict
    input_batch[:, i].reshape(self.input_size, 1),
    ~~~~~~~~~~~^^^^^^
TypeError: 'int' object is 

Epoch: 0, Loss: 5.935e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 43.80, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.128e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 42.14, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.782e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 41.28, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.589e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 42.80, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.441e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 42.67, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,█▁▇▇▇
validation_accuracy,▁▂▅▇█

0,1
epoch,4.0
loss,0.03441
training_accuracy,0.10004
validation_accuracy,0.8642


[34m[1mwandb[0m: Agent Starting Run: erxyp1kq with config:
[34m[1mwandb[0m: 	activation_func: TANH
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 5.006e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 106.12, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.673e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 104.26, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.286e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 103.37, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.053e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 13594.92, Learning Rate: 1.000e-03
Epoch: 4, Loss: 2.880e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 262.89, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▄▂▂▁
training_accuracy,▁▂▅▄█
validation_accuracy,▁▄▆▇█

0,1
epoch,4.0
loss,0.0288
training_accuracy,0.1012
validation_accuracy,0.8714


[34m[1mwandb[0m: Agent Starting Run: fcn6tlvj with config:
[34m[1mwandb[0m: 	activation_func: TANH
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	weight_decay: 0.0005


Run fcn6tlvj errored:
Traceback (most recent call last):
  File "c:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\1481853125.py", line 43, in train
    training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
                                                                         ^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 293, in sgd
    validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
                                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 207, in predict
    input_batch[:, i].reshape(self.input_size, 1),
    ~~~~~~~~~~~^^^^^^
TypeError: 'int' object is 

Run xmu2wsjr errored:
Traceback (most recent call last):
  File "c:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\1481853125.py", line 43, in train
    training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
                                                                         ^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 293, in sgd
    validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
                                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 207, in predict
    input_batch[:, i].reshape(self.input_size, 1),
    ~~~~~~~~~~~^^^^^^
TypeError: 'int' object is 

Epoch: 0, Loss: 1.199e-01, Training Accuracy: 0.10, Validation Accuracy: 0.71, Time: 242.19, Learning Rate: 1.000e-04
Epoch: 1, Loss: 7.849e-02, Training Accuracy: 0.10, Validation Accuracy: 0.75, Time: 292.19, Learning Rate: 1.000e-04
Epoch: 2, Loss: 6.649e-02, Training Accuracy: 0.10, Validation Accuracy: 0.78, Time: 279.92, Learning Rate: 1.000e-04
Epoch: 3, Loss: 5.982e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 275.03, Learning Rate: 1.000e-04
Epoch: 4, Loss: 5.557e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 286.81, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▃▁▃▁█
validation_accuracy,▁▄▆▇█

0,1
epoch,4.0
loss,0.05557
training_accuracy,0.10189
validation_accuracy,0.8092


In [94]:
wandb.agent(sweep_id, train, count=5)

[34m[1mwandb[0m: Agent Starting Run: 4sduv1er with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 2
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: NGD
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 1.038e-01, Training Accuracy: 0.10, Validation Accuracy: 0.74, Time: 160.63, Learning Rate: 1.000e-03
Epoch: 1, Loss: 6.898e-02, Training Accuracy: 0.10, Validation Accuracy: 0.74, Time: 150.82, Learning Rate: 1.000e-03
Epoch: 2, Loss: 6.388e-02, Training Accuracy: 0.10, Validation Accuracy: 0.77, Time: 151.80, Learning Rate: 1.000e-03
Epoch: 3, Loss: 5.941e-02, Training Accuracy: 0.10, Validation Accuracy: 0.74, Time: 150.68, Learning Rate: 1.000e-03
Epoch: 4, Loss: 5.726e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 158.78, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▁▂▆▃█
validation_accuracy,▁▂▅▁█

0,1
epoch,4.0
loss,0.05726
training_accuracy,0.10213
validation_accuracy,0.7928


[34m[1mwandb[0m: Agent Starting Run: dxyyu738 with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: RMSPROP
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 5.099e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 309.97, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.911e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 300.91, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.541e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 286.89, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.281e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 290.22, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.093e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 301.94, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▄▃▂▁
training_accuracy,█▅▆▁▇
validation_accuracy,▁▆▆██

0,1
epoch,4.0
loss,0.03093
training_accuracy,0.0998
validation_accuracy,0.8712


[34m[1mwandb[0m: Agent Starting Run: v2o2nztt with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 7.807e-02, Training Accuracy: 0.10, Validation Accuracy: 0.82, Time: 135.66, Learning Rate: 1.000e-03
Epoch: 1, Loss: 4.322e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 126.49, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.917e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 119.53, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.666e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 603.41, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.521e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 117.40, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▂▂▁▁
training_accuracy,▅█▁▂▃
validation_accuracy,▁▆▇▆█

0,1
epoch,4.0
loss,0.03521
training_accuracy,0.10009
validation_accuracy,0.8642


[34m[1mwandb[0m: Agent Starting Run: egnqvkid with config:
[34m[1mwandb[0m: 	activation_func: SIGMOID
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 64
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 5.611e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 169.17, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.838e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 177.10, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.468e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 168.51, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.239e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 162.27, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.061e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 173.47, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▁▃█▆█
validation_accuracy,▁▅▄▇█

0,1
epoch,4.0
loss,0.03061
training_accuracy,0.10148
validation_accuracy,0.8727


[34m[1mwandb[0m: Agent Starting Run: kb64eo68 with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 64
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch: 0, Loss: 9.177e-02, Training Accuracy: 0.10, Validation Accuracy: 0.80, Time: 177.37, Learning Rate: 1.000e-04
Epoch: 1, Loss: 5.354e-02, Training Accuracy: 0.10, Validation Accuracy: 0.82, Time: 179.75, Learning Rate: 1.000e-04
Epoch: 2, Loss: 4.719e-02, Training Accuracy: 0.10, Validation Accuracy: 0.83, Time: 186.90, Learning Rate: 1.000e-04


[34m[1mwandb[0m: Network error (SSLError), entering retry loop.


Epoch: 3, Loss: 4.416e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 175.46, Learning Rate: 1.000e-04
Epoch: 4, Loss: 4.223e-02, Training Accuracy: 0.10, Validation Accuracy: 0.85, Time: 183.32, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▃█▄▁▅
validation_accuracy,▁▅▆▇█

0,1
epoch,4.0
loss,0.04223
training_accuracy,0.09965
validation_accuracy,0.846


In [95]:
wandb.agent(sweep_id, train, count=5)

[34m[1mwandb[0m: Agent Starting Run: 5valaimp with config:
[34m[1mwandb[0m: 	activation_func: TANH
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 32
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 2
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	weight_decay: 0.5


Run 5valaimp errored:
Traceback (most recent call last):
  File "c:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\1481853125.py", line 43, in train
    training_loss, training_accuracy, validation_accuracy, predictions = NN.optimizer(
                                                                         ^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 293, in sgd
    validation_accuracy.append(self.compute_accuracy(self.num_test, self.predict(self.num_train, self.num_val), self.num_val)[0])
                                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\singh\AppData\Local\Temp\ipykernel_18712\2078877857.py", line 207, in predict
    input_batch[:, i].reshape(self.input_size, 1),
    ~~~~~~~~~~~^^^^^^
TypeError: 'int' object is 

Epoch: 0, Loss: 1.622e-01, Training Accuracy: 0.10, Validation Accuracy: 0.59, Time: 236.44, Learning Rate: 1.000e-04
Epoch: 1, Loss: 1.263e-01, Training Accuracy: 0.10, Validation Accuracy: 0.70, Time: 237.36, Learning Rate: 1.000e-04
Epoch: 2, Loss: 1.015e-01, Training Accuracy: 0.10, Validation Accuracy: 0.71, Time: 1272.66, Learning Rate: 1.000e-04
Epoch: 3, Loss: 8.801e-02, Training Accuracy: 0.10, Validation Accuracy: 0.74, Time: 234.08, Learning Rate: 1.000e-04
Epoch: 4, Loss: 8.057e-02, Training Accuracy: 0.10, Validation Accuracy: 0.73, Time: 230.11, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▅▃▂▁
training_accuracy,█▁▂▃▁
validation_accuracy,▁▆▇█▇

0,1
epoch,4.0
loss,0.08057
training_accuracy,0.09844
validation_accuracy,0.7265


[34m[1mwandb[0m: Agent Starting Run: foz3ybci with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: NGD
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 8.656e-02, Training Accuracy: 0.10, Validation Accuracy: 0.71, Time: 438.31, Learning Rate: 1.000e-03
Epoch: 1, Loss: 6.452e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 408.69, Learning Rate: 1.000e-03
Epoch: 2, Loss: 5.989e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 207.56, Learning Rate: 1.000e-03
Epoch: 3, Loss: 5.567e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 133.72, Learning Rate: 1.000e-03
Epoch: 4, Loss: 5.354e-02, Training Accuracy: 0.10, Validation Accuracy: 0.81, Time: 150.53, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
training_accuracy,▇█▄▁▇
validation_accuracy,▁▇▇██

0,1
epoch,4.0
loss,0.05354
training_accuracy,0.10081
validation_accuracy,0.8108


[34m[1mwandb[0m: Agent Starting Run: tflg8qsq with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 64
[34m[1mwandb[0m: 	init_method: RANDOM
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	weight_decay: 0.5


Epoch: 0, Loss: 5.171e-02, Training Accuracy: 0.10, Validation Accuracy: 0.84, Time: 74.39, Learning Rate: 1.000e-03
Epoch: 1, Loss: 3.771e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 62.37, Learning Rate: 1.000e-03
Epoch: 2, Loss: 3.404e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 59.97, Learning Rate: 1.000e-03
Epoch: 3, Loss: 3.181e-02, Training Accuracy: 0.10, Validation Accuracy: 0.86, Time: 57.17, Learning Rate: 1.000e-03
Epoch: 4, Loss: 3.012e-02, Training Accuracy: 0.10, Validation Accuracy: 0.87, Time: 59.25, Learning Rate: 1.000e-03


0,1
epoch,▁▃▅▆█
loss,█▃▂▂▁
training_accuracy,▆▁▄▁█
validation_accuracy,▁▅▇▆█

0,1
epoch,4.0
loss,0.03012
training_accuracy,0.10193
validation_accuracy,0.873


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aujr4zhx with config:
[34m[1mwandb[0m: 	activation_func: RELU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_neurons: 128
[34m[1mwandb[0m: 	init_method: XAVIER
[34m[1mwandb[0m: 	layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: NGD
[34m[1mwandb[0m: 	weight_decay: 0


Epoch: 0, Loss: 1.240e-01, Training Accuracy: 0.10, Validation Accuracy: 0.72, Time: 205.49, Learning Rate: 1.000e-04
Epoch: 1, Loss: 8.260e-02, Training Accuracy: 0.10, Validation Accuracy: 0.75, Time: 135.92, Learning Rate: 1.000e-04
Epoch: 2, Loss: 7.040e-02, Training Accuracy: 0.10, Validation Accuracy: 0.77, Time: 122.30, Learning Rate: 1.000e-04
Epoch: 3, Loss: 6.354e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 146.16, Learning Rate: 1.000e-04
Epoch: 4, Loss: 5.919e-02, Training Accuracy: 0.10, Validation Accuracy: 0.79, Time: 147.11, Learning Rate: 1.000e-04


0,1
epoch,▁▃▅▆█
loss,█▄▂▁▁
training_accuracy,▁▄▄█▂
validation_accuracy,▁▄▆██

0,1
epoch,4.0
loss,0.05919
training_accuracy,0.09993
validation_accuracy,0.7934
