In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [19]:
class Neuron:
    def __init__(self, input_size):
        self.params = Parameters(input_size, 1)
class Parameters:
    def __init__(self, input_size, num_neurons):
        self.weights = np.random.randn(input_size, num_neurons) * 0.1
        self.bias = np.random.randn(1, num_neurons) * 0.1

    def get_weights(self):
        return self.weights

    def get_bias(self):
        return self.bias
class Layer:
    def __init__(self, input_size, num_neurons, activation_type):
        self.neurons = [Neuron(input_size) for _ in range(num_neurons)]
        self.activation_fn = Activation(activation_type)
        self.input_size = input_size
        self.num_neurons = num_neurons
        self.activation_type = activation_type
        self.inputs = None

In [20]:
class LossFunction:
    @staticmethod
    def mse(predicted, actual):
        return np.mean((predicted - actual) ** 2)

    @staticmethod
    def mse_derivative(predicted, actual):
        return 2 * (predicted - actual) / actual.size

    @staticmethod
    def binary_cross_entropy(predicted, actual):
        epsilon = 1e-15
        predicted = np.clip(predicted, epsilon, 1 - epsilon)
        return -np.mean(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))

    @staticmethod
    def binary_cross_entropy_derivative(predicted, actual):
        epsilon = 1e-15
        predicted = np.clip(predicted, epsilon, 1 - epsilon)
        return (predicted - actual) / (predicted * (1 - predicted))

class Activation:
    def __init__(self, type):
        self.type = type

    def forward(self, inputs):
        self.inputs = inputs
        if self.type == "relu":
            self.output = np.maximum(0, inputs)
        elif self.type == "sigmoid":
        # Clipping inputs to avoid overflow
            clipped_inputs = np.clip(inputs, -709, 709)  # np.exp(709) is close to the limit
            self.output = 1 / (1 + np.exp(-clipped_inputs))
        elif self.type == "linear":
            self.output = inputs
        elif self.type == "tanh":
            self.output = np.tanh(inputs)
        else:
            raise ValueError(f"Invalid activation function type: {self.type}")
        return self.output

    def derivative(self):
        if self.type == "relu":
            return np.where(self.inputs > 0, 1, 0)
        elif self.type == "sigmoid":
            return self.output * (1 - self.output)
        elif self.type == "linear":
            return np.ones_like(self.inputs)
        elif self.type == "tanh":
            return 1 - np.power(self.output, 2)
        else:
            raise ValueError(f"No derivative implemented for activation function type: {self.type}")


In [21]:
import numpy as np

class LossFunction:
    @staticmethod
    def mse(predicted, actual):
        return np.mean((predicted - actual) ** 2)

    @staticmethod
    def mse_derivative(predicted, actual):
        return 2 * (predicted - actual) / actual.size

    @staticmethod
    def binary_cross_entropy(predicted, actual):
        epsilon = 1e-15
        predicted = np.clip(predicted, epsilon, 1 - epsilon)
        return -np.mean(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))

    @staticmethod
    def binary_cross_entropy_derivative(predicted, actual):
        epsilon = 1e-15
        predicted = np.clip(predicted, epsilon, 1 - epsilon)
        return (predicted - actual) / (predicted * (1 - predicted))

class Activation:
    def __init__(self, type):
        self.type = type
        self.forward_funcs = {
            "relu": lambda x: np.maximum(0, x),
            "sigmoid": lambda x: 1 / (1 + np.exp(np.clip(-x, -709, 709))),
            "linear": lambda x: x,
            "tanh": lambda x: np.tanh(x)
        }
        self.derivative_funcs = {
            "relu": lambda x: np.where(x > 0, 1, 0),
            "sigmoid": lambda x: x * (1 - x),
            "linear": lambda _: np.ones_like(self.inputs),
            "tanh": lambda x: 1 - np.power(x, 2)
        }
        if type not in self.forward_funcs:
            raise ValueError(f"Invalid activation function type: {type}")

    def forward(self, inputs):
        self.inputs = inputs
        self.output = self.forward_funcs[self.type](inputs)
        return self.output

    def derivative(self):
        if self.type not in self.derivative_funcs:
            raise ValueError(f"No derivative implemented for activation function type: {self.type}")
        return self.derivative_funcs[self.type](self.output)



In [22]:
import numpy as np

class Dropout:
    def __init__(self, rate):
        self.rate = rate
        self.mask = None

class Regularization:
    @staticmethod
    def compute(reg_type, weights, lambda_val):
        if reg_type == "l1":
            return lambda_val * np.sum(np.abs(weights)), lambda_val * np.sign(weights)
        elif reg_type == "l2":
            return (lambda_val / 2) * np.sum(np.square(weights)), lambda_val * weights
        else:
            raise ValueError("Invalid regularization type. Expected 'l1' or 'l2'.")

class GradientDescent:
    @staticmethod
    def update_parameters(layers, learning_rate):
        for layer in layers:
            if hasattr(layer, 'neurons'):
                for neuron in layer.neurons:
                    neuron.params.weights -= learning_rate * neuron.d_weights
                    neuron.params.bias -= learning_rate * neuron.d_bias

class ForwardPropagation:
    @staticmethod
    def apply_dropout(inputs, rate, training=True):
        if training:
            mask = np.random.binomial(1, 1 - rate, size=inputs.shape) / (1 - rate)
            return inputs * mask
        return inputs

    @staticmethod
    def forward_layer(layer, inputs):
        layer.inputs = inputs
        neuron_outputs = np.hstack([np.dot(inputs, neuron.params.get_weights()) + neuron.params.get_bias() for neuron in layer.neurons])
        return layer.activation_fn.forward(neuron_outputs)

    @staticmethod
    def forward(network, inputs):
        for layer in network.layers:
            inputs = ForwardPropagation.forward_layer(layer, inputs)
        return inputs

class BackwardPropagation:
    @staticmethod
    def backward_layer(layer, d_output):
        d_output_activation = layer.activation_fn.derivative() * d_output
        d_inputs = np.zeros((d_output_activation.shape[0], layer.input_size))
        for i, neuron in enumerate(layer.neurons):
            d_output_neuron = d_output_activation[:, i:i+1]
            neuron.d_weights = np.dot(layer.inputs.T, d_output_neuron)
            neuron.d_bias = np.sum(d_output_neuron, axis=0, keepdims=True)
            d_inputs += np.dot(d_output_neuron, neuron.params.get_weights().T)
        layer.inputs = d_inputs

    @staticmethod
    def compute_gradients(network, loss_gradient):
        for layer in reversed(network.layers):
            BackwardPropagation.backward_layer(layer, loss_gradient)
            loss_gradient = layer.inputs

class InputNormalization:
    def __init__(self, method='zscore'):
        self.method = method
        self.params = {}

    def fit(self, data):
        if self.method == 'minmax':
            self.params['min'] = np.min(data, axis=0)
            self.params['max'] = np.max(data, axis=0)
        elif self.method == 'zscore':
            self.params['mean'] = np.mean(data, axis=0)
            self.params['std'] = np.std(data, axis=0)
            self.params['std'][self.params['std'] == 0] = 1
        elif self.method == 'max':
            self.params['max'] = np.max(np.abs(data), axis=0)
        else:
            raise ValueError(f"Unknown normalization method: {self.method}")

    def transform(self, data):
        if self.method == 'minmax':
            return (data - self.params['min']) / (self.params['max'] - self.params['min'])
        elif self.method == 'zscore':
            return (data - self.params['mean']) / self.params['std']
        elif self.method == 'max':
            return data / self.params['max']
        else:
            raise ValueError(f"Unknown normalization method: {self.method}")

    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)

class MiniBatchGenerator:
    def __init__(self, X, y, batch_size=32):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.n_samples = X.shape[0]
        self.indices = np.arange(self.n_samples)
        np.random.shuffle(self.indices)

    def get_batches(self):
        for start_idx in range(0, self.n_samples, self.batch_size):
            end_idx = min(start_idx + self.batch_size, self.n_samples)
            batch_indices = self.indices[start_idx:end_idx]
            yield self.X[batch_indices], self.y[batch_indices]


Neural model

In [30]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, inputs):
        # Utilize the ForwardPropagation class for the forward pass
        return ForwardPropagation.forward(self, inputs)

    def backward(self, loss_gradient):
        # Utilize the BackwardPropagation class for computing gradients
        BackwardPropagation.compute_gradients(self, loss_gradient)

    def evaluate(self, X, y):
        predictions = self.predict(X)
        predictions = np.round(predictions)  # Adjust as needed for your case
        accuracy = np.mean(predictions == y)
        return accuracy

    def predict(self, inputs):
        return self.forward(inputs)

class NetworkTrainer:
    def __init__(self, neural_network, learning_rate, lambda_val, batch_size):
        self.neural_network = neural_network
        self.learning_rate = learning_rate
        self.lambda_val = lambda_val
        self.batch_size = batch_size

    def train(self, X_train, y_train, epochs):
        # Optionally, you could implement mini-batch training by integrating the MiniBatchGenerator here
        for epoch in range(epochs):
            # If using mini-batches, this loop would iterate over batches instead of the entire dataset at once
            outputs = self.neural_network.forward(X_train)
            loss = LossFunction.binary_cross_entropy(outputs, y_train) + self.regularization_loss()

            print(f"Epoch {epoch+1}, Loss: {loss}")
            loss_gradient = LossFunction.mse_derivative(outputs, y_train)

            self.neural_network.backward(loss_gradient)
            GradientDescent.update_parameters(self.neural_network.layers, self.learning_rate)  # Removed lambda_val

    def regularization_loss(self):
        # Calculate and return the regularization loss
        reg_loss = 0
        reg_type = "l2"  # or "l1" depending on the chosen regularization
        for layer in self.neural_network.layers:
            for neuron in layer.neurons:
                reg_loss_value, _ = Regularization.compute(reg_type, neuron.params.weights, self.lambda_val)
                reg_loss += reg_loss_value
        return reg_loss

    def evaluate(self, X, y):
        # Utilize the NeuralNetwork's evaluate method
        return self.neural_network.evaluate(X, y)


In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split

dataset = pd.read_csv("/content/train_x.csv")
labels = pd.read_csv("/content/train_label.csv")

# Analyzing the shapes of the dataset and labels
dataset_shape = dataset.shape
labels_shape = labels.shape

dataset_shape, labels_shape

# Splitting the dataset into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(dataset, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Checking the shapes of the splits
(X_train.shape, y_train.shape), (X_val.shape, y_val.shape), (X_test.shape, y_test.shape)

input_normalizer = InputNormalization(method='zscore')
# Convert the pandas dataframes to numpy arrays for processing with the neural network
X_train_np = input_normalizer.fit_transform(X_train.to_numpy())
X_val_np = input_normalizer.transform(X_val.to_numpy())
X_test_np = input_normalizer.transform(X_test.to_numpy())

y_train_np = y_train.to_numpy()
y_val_np = y_val.to_numpy()
y_test_np = y_test.to_numpy()

input_size = 784
output_size = 10
epochs = 100
learning_rate = 0.1
lambda_val = 0.001
batch_size = 32

nn = NeuralNetwork()
nn.add_layer(Layer(input_size=input_size, num_neurons=10, activation_type='relu'))
nn.add_layer(Layer(input_size=10, num_neurons=8, activation_type='relu'))
nn.add_layer(Layer(input_size=8, num_neurons=8, activation_type='relu'))  # Second layer with 8 neurons and ReLU
nn.add_layer(Layer(input_size=8, num_neurons=4, activation_type='relu'))
nn.add_layer(Layer(input_size=4, num_neurons=1, activation_type='sigmoid'))

trainer = NetworkTrainer(nn, learning_rate, lambda_val, batch_size)

# Training the neural network using the trainer
print("Training the Neural Network...")
trainer.train(X_train_np, y_train_np, epochs)
# Evaluating the neural network
print("Evaluating on Validation Data...")
val_accuracy = nn.evaluate(X_val_np, y_val_np)
print(f"Validation Accuracy: {val_accuracy}")

print("Evaluating on Test Data...")
test_accuracy = nn.evaluate(X_test_np, y_test_np)
print(f"Test Accuracy: {test_accuracy}")

Training the Neural Network...
Epoch 1, Loss: 0.7110398254833417
Epoch 2, Loss: 0.7102794571204073
Epoch 3, Loss: 0.7095210856842761
Epoch 4, Loss: 0.7087647068086229
Epoch 5, Loss: 0.7080103161227821
Epoch 6, Loss: 0.707257909251894
Epoch 7, Loss: 0.7065074818170526
Epoch 8, Loss: 0.705759051400944
Epoch 9, Loss: 0.7050126561638832
Epoch 10, Loss: 0.704268273520327
Epoch 11, Loss: 0.7035259186143801
Epoch 12, Loss: 0.7027855200126766
Epoch 13, Loss: 0.702047073327456
Epoch 14, Loss: 0.7013105741755201
Epoch 15, Loss: 0.7005760318987774
Epoch 16, Loss: 0.6998434703564798
Epoch 17, Loss: 0.6991129588689761
Epoch 18, Loss: 0.6983845182343641
Epoch 19, Loss: 0.697658146977061
Epoch 20, Loss: 0.6969338375287688
Epoch 21, Loss: 0.6962115011822742
Epoch 22, Loss: 0.6954910988520582
Epoch 23, Loss: 0.6947727167891832
Epoch 24, Loss: 0.6940563697872331
Epoch 25, Loss: 0.693342025275244
Epoch 26, Loss: 0.6926297278302973
Epoch 27, Loss: 0.6919194783531398
Epoch 28, Loss: 0.691211277445414
Epoch