In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

class Activation:
    def __init__(self, type):
        self.type = type

    def forward(self, inputs):
        self.inputs = inputs
        if self.type == "relu":
            self.output = np.maximum(0, inputs)
        elif self.type == "sigmoid":
        # Clipping inputs to avoid overflow
            clipped_inputs = np.clip(inputs, -709, 709)  # np.exp(709) is close to the limit
            self.output = 1 / (1 + np.exp(-clipped_inputs))
        elif self.type == "linear":
            self.output = inputs
        elif self.type == "tanh":
            self.output = np.tanh(inputs)
        else:
            raise ValueError(f"Invalid activation function type: {self.type}")
        return self.output

    def derivative(self):
        if self.type == "relu":
            return np.where(self.inputs > 0, 1, 0)
        elif self.type == "sigmoid":
            return self.output * (1 - self.output)
        elif self.type == "linear":
            return np.ones_like(self.inputs)
        elif self.type == "tanh":
            return 1 - np.power(self.output, 2)
        else:
            raise ValueError(f"No derivative implemented for activation function type: {self.type}")

class Parameters:
    def __init__(self, input_size, num_neurons):
        self.weights = np.random.randn(input_size, num_neurons) * 0.1
        self.bias = np.random.randn(1, num_neurons) * 0.1

    def get_weights(self):
        return self.weights

    def get_bias(self):
        return self.bias

class Neuron:
    def __init__(self, input_size):
        self.params = Parameters(input_size, 1)  # Each neuron has one output

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.params.get_weights()) + self.params.get_bias()
        return self.output

    def backward(self, d_output):
        # Reshape d_output if necessary to ensure it's 2D (batch_size, 1)
        if d_output.ndim == 1:
            d_output = d_output[:, np.newaxis]

        self.d_weights = np.dot(self.inputs.T, d_output)
        self.d_bias = np.sum(d_output, axis=0, keepdims=True)
        self.d_inputs = np.dot(d_output, self.params.get_weights().T)

        return self.d_inputs

class Layer:
    def __init__(self, input_size, num_neurons, activation_type):
        self.neurons = [Neuron(input_size) for _ in range(num_neurons)]
        self.activation_fn = Activation(activation_type)

    def forward(self, inputs):
        self.inputs = inputs
        neuron_outputs = np.hstack([neuron.forward(inputs) for neuron in self.neurons])
        return self.activation_fn.forward(neuron_outputs)

    def backward(self, d_output):
        d_output_activation = self.activation_fn.derivative() * d_output
        d_inputs = np.zeros_like(self.inputs, dtype=np.float64)
        for i, neuron in enumerate(self.neurons):
            neuron_d_inputs = neuron.backward(d_output_activation[:, i:i+1])
            d_inputs += neuron_d_inputs
        return d_inputs

class LossFunction:
    @staticmethod
    def mse(predicted, actual):
        return np.mean((predicted - actual) ** 2)

    @staticmethod
    def mse_derivative(predicted, actual):
        return 2 * (predicted - actual) / actual.size
    
class Dropout:
    def __init__(self, rate):
        self.rate = rate
        self.mask = None

    def forward(self, inputs, training=True):
        if training:
            self.mask = np.random.binomial(1, 1 - self.rate, size=inputs.shape) / (1 - self.rate)
            self.output = inputs * self.mask
        else:
            self.output = inputs
        return self.output

    def backward(self, d_output):
        # Only propagate gradients where the mask is not zero
        self.d_inputs = d_output * self.mask
        return self.d_inputs

class Regularization:
    @staticmethod
    def l2(weights, lambda_val):
        return (lambda_val / 2) * np.sum(weights ** 2)

    @staticmethod
    def l2_derivative(weights, lambda_val):
        return lambda_val * weights
    
class BackPropagation:
    @staticmethod
    def update_parameters(layers, learning_rate, lambda_val=0.01):
        for layer in layers:
            if hasattr(layer, 'neurons'):
                for neuron in layer.neurons:
                    neuron.params.weights -= learning_rate * (neuron.d_weights + Regularization.l2_derivative(neuron.params.weights, lambda_val))
                    neuron.params.bias -= learning_rate * neuron.d_bias
                    
                    
class InputNormalization:
    def __init__(self, method='zscore'):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit(self, data):
        if self.method == 'minmax':
            self.min = np.min(data, axis=0)
            self.max = np.max(data, axis=0)
        elif self.method == 'zscore':
            self.mean = np.mean(data, axis=0)
            self.std = np.std(data, axis=0)
            self.std[self.std == 0] = 1  # Prevent division by zero for features with no variation
        elif self.method == 'max':
            self.max = np.max(np.abs(data), axis=0)
        else:
            raise ValueError(f"Unknown normalization method: {self.method}")

    def transform(self, data):
        if self.method == 'minmax':
            return (data - self.min) / (self.max - self.min)
        elif self.method == 'zscore':
            return (data - self.mean) / self.std
        elif self.method == 'max':
            return data / self.max
        else:
            raise ValueError(f"Unknown normalization method: {self.method}")

    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)

                    
                    
class NeuralNetwork:
    def __init__(self):
        self.layers = []

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backward(self, loss_gradient):
        for layer in reversed(self.layers):
            loss_gradient = layer.backward(loss_gradient)
            
    def train(self, inputs, targets, epochs, learning_rate, lambda_val=0.01):
        for epoch in range(epochs):
            outputs = self.forward(inputs)
            loss = LossFunction.mse(outputs, targets)
            # Add regularization loss
            reg_loss = 0
            for layer in self.layers:
                if hasattr(layer, 'neurons'):
                    for neuron in layer.neurons:
                        reg_loss += Regularization.l2(neuron.params.weights, lambda_val)
            loss += reg_loss
            print(f"Epoch {epoch+1}, Loss: {loss}")
            loss_gradient = LossFunction.mse_derivative(outputs, targets)
            self.backward(loss_gradient)
            BackPropagation.update_parameters(self.layers, learning_rate, lambda_val)

            

    def evaluate(self, X, y):
        predictions = self.predict(X)
        # Assuming binary classification or single-label multiclass classification
        predictions = np.round(predictions)  # Adjust as needed for your case
        accuracy = np.mean(predictions == y)
        return accuracy

    def predict(self, inputs):
        return self.forward(inputs)


dataset = pd.read_csv("train_x.csv")
labels = pd.read_csv("train_label.csv")

# Analyzing the shapes of the dataset and labels
dataset_shape = dataset.shape
labels_shape = labels.shape

dataset_shape, labels_shape

input_normalizer = InputNormalization(method='zscore')

from sklearn.model_selection import train_test_split

# Splitting the dataset into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(dataset, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Checking the shapes of the splits
(X_train.shape, y_train.shape), (X_val.shape, y_val.shape), (X_test.shape, y_test.shape)


# Convert the pandas dataframes to numpy arrays for processing with the neural network
X_train_np = input_normalizer.fit_transform(X_train.to_numpy())
X_val_np = input_normalizer.transform(X_val.to_numpy())
X_test_np = input_normalizer.transform(X_test.to_numpy())

y_train_np = y_train.to_numpy()
y_val_np = y_val.to_numpy()
y_test_np = y_test.to_numpy()
# Create the neural network
nn = NeuralNetwork()

input_size = 784  
output_size = 10  

nn.add_layer(Layer(input_size=input_size, num_neurons=64, activation_type='relu'))
nn.add_layer(Dropout(rate=0.1))  # Dropout layer with a 50% dropout rate
nn.add_layer(Layer(input_size=64, num_neurons=output_size, activation_type='sigmoid'))


# Training parameters
epochs = 100
learning_rate = 0.1
lambda_val = 0.001
# Training the neural network
print("Training the Neural Network...")
nn.train(X_train_np, y_train_np, epochs, learning_rate, lambda_val)

# Evaluating the neural network
print("Evaluating on Validation Data...")
val_accuracy = nn.evaluate(X_val_np, y_val_np)
print(f"Validation Accuracy: {val_accuracy}")

print("Evaluating on Test Data...")
test_accuracy = nn.evaluate(X_test_np, y_test_np)
print(f"Test Accuracy: {test_accuracy}")


Training the Neural Network...
Epoch 1, Loss: 0.5906435743153546
Epoch 2, Loss: 0.5745391818120322
Epoch 3, Loss: 0.5637463283127535
Epoch 4, Loss: 0.5529279587746171
Epoch 5, Loss: 0.5411514950689724
Epoch 6, Loss: 0.5287311708470934
Epoch 7, Loss: 0.517508605619935
Epoch 8, Loss: 0.5073821888861842
Epoch 9, Loss: 0.4983736356737441
Epoch 10, Loss: 0.48947842585612167
Epoch 11, Loss: 0.4824712721361907
Epoch 12, Loss: 0.47235479937940783
Epoch 13, Loss: 0.46427635822070784
Epoch 14, Loss: 0.45810844813568785
Epoch 15, Loss: 0.45153322885712016
Epoch 16, Loss: 0.44502659187418747
Epoch 17, Loss: 0.4382080073924419
Epoch 18, Loss: 0.43342202717574896
Epoch 19, Loss: 0.42777992116934277
Epoch 20, Loss: 0.42340063572218756
Epoch 21, Loss: 0.41650189966210927
Epoch 22, Loss: 0.4122749804282665
Epoch 23, Loss: 0.409197142950604
Epoch 24, Loss: 0.404082608914082
Epoch 25, Loss: 0.4007703409979039
Epoch 26, Loss: 0.39659871442668115
Epoch 27, Loss: 0.3931550015902538
Epoch 28, Loss: 0.3908067