# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Neuron Class

Activation Computation and output based on the provided aggregate signal. Each neuron has one output

In [2]:
class Neuron:
    def __init__(self, input_size):
        self.params = Parameters(input_size, 1)

# Parameter Class

Manages weights, bias, learning rate, regularization, and dropout or any other parameters for a layer during neural network training.

In [3]:
class Parameters:
    def __init__(self, input_size, num_neurons):
        self.weights = np.random.randn(input_size, num_neurons) * 0.1
        self.bias = np.random.randn(1, num_neurons) * 0.1

    def get_weights(self):
        return self.weights

    def get_bias(self):
        return self.bias

# Layer Class

Represents a layer in the neural network, applying forward propagation and managing its parameters and activation function.

In [4]:
class Layer:
    def __init__(self, input_size, num_neurons, activation_type):
        self.neurons = [Neuron(input_size) for _ in range(num_neurons)]
        self.activation_fn = Activation(activation_type)
        self.input_size = input_size
        self.num_neurons = num_neurons
        self.activation_type = activation_type
        self.inputs = None

# Loss Function

Defines various loss functions (MSE, RMSE, Cross-Entropy) for evaluating model performance.

In [5]:
class LossFunction:
    @staticmethod
    def mse(predicted, actual):
        return np.mean((predicted - actual) ** 2)

    @staticmethod
    def mse_derivative(predicted, actual):
        return 2 * (predicted - actual) / actual.size
    
    @staticmethod
    def binary_cross_entropy(predicted, actual):
        epsilon = 1e-15
        predicted = np.clip(predicted, epsilon, 1 - epsilon)
        return -np.mean(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))

    @staticmethod
    def binary_cross_entropy_derivative(predicted, actual):
        epsilon = 1e-15
        predicted = np.clip(predicted, epsilon, 1 - epsilon)
        return (predicted - actual) / (predicted * (1 - predicted))

# Activation Function

Implements various activation functions (ReLU, Sigmoid, Tanh, Softmax) and computes the activation or its derivative

In [6]:
class Activation:
    def __init__(self, type):
        self.type = type

    def forward(self, inputs):
        self.inputs = inputs
        if self.type == "relu":
            self.output = np.maximum(0, inputs)
        elif self.type == "sigmoid":
        # Clipping inputs to avoid overflow
            clipped_inputs = np.clip(inputs, -709, 709)  # np.exp(709) is close to the limit
            self.output = 1 / (1 + np.exp(-clipped_inputs))
        elif self.type == "linear":
            self.output = inputs
        elif self.type == "tanh":
            self.output = np.tanh(inputs)
        else:
            raise ValueError(f"Invalid activation function type: {self.type}")
        return self.output

    def derivative(self):
        if self.type == "relu":
            return np.where(self.inputs > 0, 1, 0)
        elif self.type == "sigmoid":
            return self.output * (1 - self.output)
        elif self.type == "linear":
            return np.ones_like(self.inputs)
        elif self.type == "tanh":
            return 1 - np.power(self.output, 2)
        else:
            raise ValueError(f"No derivative implemented for activation function type: {self.type}")


# Dropout & Regularization 

Implements various activation functions (ReLU, Sigmoid, Tanh, Softmax) and computes the activation or its derivative.
Computes regularization term for weights based on the specified regularization type ("l1" or "l2").

In [7]:
class Dropout:
    def __init__(self, rate):
        self.rate = rate
        self.mask = None

class Regularization:
    @staticmethod
    def l1(weights, lambda_val):
        return lambda_val * np.sum(np.abs(weights))

    @staticmethod
    def l1_derivative(weights, lambda_val):
        return lambda_val * np.sign(weights)

    @staticmethod
    def l2(weights, lambda_val):
        return (lambda_val / 2) * np.sum(np.square(weights))

    @staticmethod
    def l2_derivative(weights, lambda_val):
        return lambda_val * weights

    @staticmethod
    def compute(reg_type, weights, lambda_val):
        if reg_type == "l1":
            return Regularization.l1(weights, lambda_val)
        elif reg_type == "l2":
            return Regularization.l2(weights, lambda_val)
        else:
            raise ValueError("Invalid regularization type. Expected 'l1' or 'l2'.")

    @staticmethod
    def derivative(reg_type, weights, lambda_val):
        if reg_type == "l1":
            return Regularization.l1_derivative(weights, lambda_val)
        elif reg_type == "l2":
            return Regularization.l2_derivative(weights, lambda_val)
        else:
            raise ValueError("Invalid regularization type. Expected 'l1' or 'l2'.")


# Gradient Descent

Updates weights and biases using gradient descent during the training process.

In [8]:
class GradientDescent:
    @staticmethod
    def update_parameters(layers, learning_rate, lambda_val=0.01):
        for layer in layers:
            if hasattr(layer, 'neurons'):
                for neuron in layer.neurons:
                    neuron.params.weights -= learning_rate * neuron.d_weights
                    neuron.params.bias -= learning_rate * neuron.d_bias


# Forward Propagation

Applies forward propagation for the entire neural network, computing outputs for each layer.

In [9]:
class ForwardPropagation:
    @staticmethod
    def apply_dropout(layer, inputs, training=True):
        if not isinstance(layer, Dropout):
            return inputs  # If the layer is not a Dropout layer, just return the inputs

        if training:
            # Create a mask and apply it
            mask = np.random.binomial(1, 1 - layer.rate, size=inputs.shape) / (1 - layer.rate)
            return inputs * mask
        else:
            return inputs  # No dropout during evaluation
    
    @staticmethod
    def forward_layer(layer, inputs):
        # Save the inputs to the layer
        layer.inputs = inputs
        neuron_outputs = np.hstack([np.dot(inputs, neuron.params.get_weights()) + neuron.params.get_bias() for neuron in layer.neurons])
        return layer.activation_fn.forward(neuron_outputs)

    @staticmethod
    def forward(network, inputs):
        for layer in network.layers:
            inputs = ForwardPropagation.forward_layer(layer, inputs)
        return inputs

# Back Propagation

Performs backward propagation to compute gradients and update weights based on the loss function.

In [10]:
class BackwardPropagation:
    @staticmethod
    def backward_layer(layer, d_output):
        d_output_activation = layer.activation_fn.derivative() * d_output
        d_inputs = np.zeros((d_output_activation.shape[0], layer.input_size))
        for i, neuron in enumerate(layer.neurons):
            d_output_neuron = d_output_activation[:, i:i+1]
            neuron.d_weights = np.dot(layer.inputs.T, d_output_neuron)
            neuron.d_bias = np.sum(d_output_neuron, axis=0, keepdims=True)
            d_inputs += np.dot(d_output_neuron, neuron.params.get_weights().T)
        layer.inputs = d_inputs  # Update layer inputs for next layer backward pass

    @staticmethod
    def compute_gradients(network, loss_gradient):
        for layer in reversed(network.layers):
            BackwardPropagation.backward_layer(layer, loss_gradient)
            loss_gradient = layer.inputs  # Use updated layer inputs as next gradient

# Input Normalization

Normalizes input data by calculating mean and standard deviation for better convergence during training.

In [11]:
class InputNormalization:
    def __init__(self, method='zscore'):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit(self, data):
        if self.method == 'minmax':
            self.min = np.min(data, axis=0)
            self.max = np.max(data, axis=0)
        elif self.method == 'zscore':
            self.mean = np.mean(data, axis=0)
            self.std = np.std(data, axis=0)
            self.std[self.std == 0] = 1  # Prevent division by zero for features with no variation
        elif self.method == 'max':
            self.max = np.max(np.abs(data), axis=0)
        else:
            raise ValueError(f"Unknown normalization method: {self.method}")

    def transform(self, data):
        if self.method == 'minmax':
            return (data - self.min) / (self.max - self.min)
        elif self.method == 'zscore':
            return (data - self.mean) / self.std
        elif self.method == 'max':
            return data / self.max
        else:
            raise ValueError(f"Unknown normalization method: {self.method}")

    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)


# Mini Batch Generation

Creates mini-batches from the training data to facilitate training with smaller subsets.

In [12]:
class MiniBatchGenerator:
    def __init__(self, X, y, batch_size=32):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.n_samples = X.shape[0]
        self.indices = np.arange(self.n_samples)
        np.random.shuffle(self.indices)

    def get_batches(self):
        for start_idx in range(0, self.n_samples, self.batch_size):
            end_idx = min(start_idx + self.batch_size, self.n_samples)
            batch_indices = self.indices[start_idx:end_idx]
            yield self.X[batch_indices], self.y[batch_indices]

# Neural Network Model

Represents the entire neural network model, managing layers and their configurations for training and inference.

In [13]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, inputs):
        # Utilize the ForwardPropagation class for the forward pass
        return ForwardPropagation.forward(self, inputs)

    def backward(self, loss_gradient):
        # Utilize the BackwardPropagation class for computing gradients
        BackwardPropagation.compute_gradients(self, loss_gradient)

    def evaluate(self, X, y):
        predictions = self.predict(X)
        predictions = np.round(predictions)  # Adjust as needed for your case
        accuracy = np.mean(predictions == y)
        return accuracy

    def predict(self, inputs):
        return self.forward(inputs)

# Training Class

Manages the training process, including forward and backward propagation, updating parameters, and evaluating performance over multiple epochs.

In [14]:
class NetworkTrainer:
    def __init__(self, neural_network, learning_rate, lambda_val, batch_size):
        self.neural_network = neural_network
        self.learning_rate = learning_rate
        self.lambda_val = lambda_val
        self.batch_size = batch_size

    def train(self, X_train, y_train, epochs):
        # Optionally, you could implement mini-batch training by integrating the MiniBatchGenerator here
        for epoch in range(epochs):
            # If using mini-batches, this loop would iterate over batches instead of the entire dataset at once
            outputs = self.neural_network.forward(X_train)
            loss = LossFunction.binary_cross_entropy(outputs, y_train) + self.regularization_loss()

            print(f"Epoch {epoch+1}, Loss: {loss}")
            loss_gradient = LossFunction.mse_derivative(outputs, y_train)

            self.neural_network.backward(loss_gradient)
            GradientDescent.update_parameters(self.neural_network.layers, self.learning_rate, self.lambda_val)

    def regularization_loss(self):
        # Calculate and return the regularization loss
        reg_loss = 0
        reg_type = "l2"  # or "l1" depending on the chosen regularization
        for layer in self.neural_network.layers:
            for neuron in layer.neurons:
                reg_loss += Regularization.compute(reg_type, neuron.params.weights, self.lambda_val)
        return reg_loss

    def evaluate(self, X, y):
        # Utilize the NeuralNetwork's evaluate method
        return self.neural_network.evaluate(X, y)


# Loading Dataset

In [15]:
dataset = pd.read_csv("train_x.csv")
labels = pd.read_csv("train_label.csv")

# Analyzing the shapes of the dataset and labels
dataset_shape = dataset.shape
labels_shape = labels.shape

dataset_shape, labels_shape

((999, 784), (999, 10))

# Split Data into Train Test and Validation

In [16]:
# Splitting the dataset into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(dataset, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Checking the shapes of the splits
(X_train.shape, y_train.shape), (X_val.shape, y_val.shape), (X_test.shape, y_test.shape)


(((799, 784), (799, 10)), ((100, 784), (100, 10)), ((100, 784), (100, 10)))

# Creating Numpy Arrays for processing

In [17]:
input_normalizer = InputNormalization(method='zscore')
# Convert the pandas dataframes to numpy arrays for processing with the neural network
X_train_np = input_normalizer.fit_transform(X_train.to_numpy())
X_val_np = input_normalizer.transform(X_val.to_numpy())
X_test_np = input_normalizer.transform(X_test.to_numpy())

y_train_np = y_train.to_numpy()
y_val_np = y_val.to_numpy()
y_test_np = y_test.to_numpy()

# Setting the Inputs

In [18]:
input_size = 784  
output_size = 10  
epochs = 100
learning_rate = 0.1
lambda_val = 0.001
batch_size = 32  

# Setting up the Neural Network Model

In [19]:
nn = NeuralNetwork()
nn.add_layer(Layer(input_size=input_size, num_neurons=10, activation_type='relu'))
nn.add_layer(Layer(input_size=10, num_neurons=8, activation_type='relu'))
nn.add_layer(Layer(input_size=8, num_neurons=8, activation_type='relu'))  # Second layer with 8 neurons and ReLU
nn.add_layer(Layer(input_size=8, num_neurons=4, activation_type='relu'))
nn.add_layer(Layer(input_size=4, num_neurons=1, activation_type='sigmoid')) 

# Instantiate the training Class

In [20]:
trainer = NetworkTrainer(nn, learning_rate, lambda_val, batch_size)

# Evaluate the Model

In [21]:
# Training the neural network using the trainer
print("Training the Neural Network...")
trainer.train(X_train_np, y_train_np, epochs)
# Evaluating the neural network
print("Evaluating on Validation Data...")
val_accuracy = nn.evaluate(X_val_np, y_val_np)
print(f"Validation Accuracy: {val_accuracy}")

print("Evaluating on Test Data...")
test_accuracy = nn.evaluate(X_test_np, y_test_np)
print(f"Test Accuracy: {test_accuracy}")

Training the Neural Network...
Epoch 1, Loss: 0.7441630344778378
Epoch 2, Loss: 0.743266403415926
Epoch 3, Loss: 0.7423723505890616
Epoch 4, Loss: 0.7414808741032028
Epoch 5, Loss: 0.7405919611710142
Epoch 6, Loss: 0.7397056055643951
Epoch 7, Loss: 0.7388218011216416
Epoch 8, Loss: 0.7379405400760305
Epoch 9, Loss: 0.7370618155193919
Epoch 10, Loss: 0.7361856205504597
Epoch 11, Loss: 0.7353119482721777
Epoch 12, Loss: 0.7344407917797298
Epoch 13, Loss: 0.7335721442140006
Epoch 14, Loss: 0.7327059987024096
Epoch 15, Loss: 0.7318423493703187
Epoch 16, Loss: 0.7309811908413881
Epoch 17, Loss: 0.7301225141499187
Epoch 18, Loss: 0.7292663126640787
Epoch 19, Loss: 0.7284125794276808
Epoch 20, Loss: 0.7275613123228645
Epoch 21, Loss: 0.726712501251079
Epoch 22, Loss: 0.7258661400107354
Epoch 23, Loss: 0.7250222204237592
Epoch 24, Loss: 0.7241807436410712
Epoch 25, Loss: 0.7233416976274626
Epoch 26, Loss: 0.7225050721459695
Epoch 27, Loss: 0.7216708607502929
Epoch 28, Loss: 0.7208390571745806
