In [1]:
import math
import random

class Layer:
    def __init__(self, learning_rate, n_out, n_in, activation):
        self.learning_rate = learning_rate
        self.n_out = n_out
        self.n_in = n_in
        self.activation = activation

        limit = math.sqrt(6 / (n_in + n_out))
        self.w = [[random.uniform(-limit, limit) for _ in range(n_in)] for _ in range(n_out)]
        self.b = [0.0 for _ in range(n_out)]
        
        self.output = []
        self.z_values = []
    
    def sigmoid(self, x):
        return 1 / (1 + math.exp(-x))
    
    def sigmoid_derivative(self, output):
        return output * (1 - output)
    
    def relu(self, x):
        return max(0, x)
    
    def relu_derivative(self, z):
        return int(z>0)
    
    def softmax(self, logits):
        max_logit = max(logits)
        exps = [math.exp(x - max_logit) for x in logits]
        sum_exps = sum(exps)
        return [e / sum_exps for e in exps]
    
    def forward(self, X):
        self.z_values = []
        self.output = []

        for x in X:
            z = []
            for i in range(self.n_out):
                weighted_sum = sum(self.w[i][j] * x[j] for j in range(self.n_in)) + self.b[i]
                z.append(weighted_sum)
            self.z_values.append(z)

            if self.activation == 'sigmoid':
                out = [self.sigmoid(val) for val in z]
            elif self.activation == 'relu':
                out = [self.relu(val) for val in z]
            elif self.activation == 'softmax':
                out = self.softmax(z)
            else:
                out = [val for val in z]  
            self.output.append(out)
        return self.output

    def compute_loss(self, y_true):
        loss = 0
        epsilon = 1e-15
        final_activation = self.activation  

        for y_pred, y_t in zip(self.output, y_true):
            if final_activation == 'softmax':
                
                for p, t in zip(y_pred, y_t):
                    p = min(max(p, epsilon), 1 - epsilon)
                    loss -= t * math.log(p)

            elif final_activation == 'sigmoid':
                
                p = min(max(y_pred[0], epsilon), 1 - epsilon)
                t = y_t[0]
                loss -= t * math.log(p) + (1 - t) * math.log(1 - p)

            elif final_activation == 'relu':
              
                loss += sum((p - t) ** 2 for p, t in zip(y_pred, y_t))

            else:
                
                loss += sum((p - t) ** 2 for p, t in zip(y_pred, y_t))

        return loss / len(y_true)



class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate, activations):
        self.layers = []
        for i in range(1, len(layer_sizes)):
            layer = Layer(learning_rate, n_out=layer_sizes[i], n_in=layer_sizes[i-1], activation=activations[i-1])
            self.layers.append(layer)

    def forward(self, X):
        output = X
        for layer in self.layers:
            output = layer.forward(output)
        return output

    def compute_loss(self, y_true):
        return self.layers[-1].compute_loss(y_true)

    def backprop(self, X, y_true):
        self.forward(X)
        m = len(y_true)
        last_layer = self.layers[-1]

        upstream_gradient = []
        for i in range(m):
            grad_sample = []
            for j in range(last_layer.n_out):
                grad_sample.append(last_layer.output[i][j] - y_true[i][j])
            upstream_gradient.append(grad_sample)

        for idx in reversed(range(len(self.layers))):
            layer = self.layers[idx]
            inputs = X if idx == 0 else self.layers[idx - 1].output
            upstream_gradient = self._backprop_layer(layer, upstream_gradient, inputs)

    def _backprop_layer(self, layer, upstream_gradient, input_to_layer):
        m = len(upstream_gradient)
        dL_dinput = []

        for sample_idx in range(m):
            x = input_to_layer[sample_idx]
            dL_dx_sample = [0] * len(x)

            for i in range(layer.n_out):
                if layer.activation == 'sigmoid':
                    do_dz = layer.sigmoid_derivative(layer.output[sample_idx][i])
                elif layer.activation == 'relu':
                    do_dz = layer.relu_derivative(layer.z_values[sample_idx][i])
                else:
                    do_dz = 1  

                dL_dz = upstream_gradient[sample_idx][i] * do_dz

                for j in range(len(x)):
                    dL_dx_sample[j] += layer.w[i][j] * dL_dz
                    layer.w[i][j] -= layer.learning_rate * dL_dz * x[j]

                layer.b[i] -= layer.learning_rate * dL_dz

            dL_dinput.append(dL_dx_sample)

        return dL_dinput


In [2]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

# --------- Step 1: Load and prepare data ----------
X, y = make_moons(n_samples=200, noise=0.2, random_state=42)

# Convert labels to one-hot encoding
def one_hot_encode(y, num_classes):
    return [[1 if i == label else 0 for i in range(num_classes)] for label in y]

y_oh = one_hot_encode(y, 2)

# Split dataset: 60% train, 20% val, 20% test
X_temp, X_test, y_temp, y_test = train_test_split(X.tolist(), y_oh, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

# --------- Step 2: Initialize neural network ----------
nn = NeuralNetwork(layer_sizes=[2, 5,2, 2], learning_rate=0.1, activations=['relu','relu', 'softmax'])

# --------- Step 3: Train the network ----------
def accuracy(y_pred, y_true):
    correct = 0
    for p, t in zip(y_pred, y_true):
        if p.index(max(p)) == t.index(max(t)):
            correct += 1
    return correct / len(y_true)

epochs = 50
for epoch in range(epochs):
    for x, y_t in zip(X_train, y_train):
        nn.backprop([x], [y_t])  # mini-batch size of 1

    train_preds = nn.forward(X_train)
    val_preds = nn.forward(X_val)

    train_acc = accuracy(train_preds, y_train)
    val_acc = accuracy(val_preds, y_val)

    print(f"Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

# --------- Step 4: Test accuracy ----------
test_preds = nn.forward(X_test)
test_acc = accuracy(test_preds, y_test)
print(f"Test Accuracy: {test_acc:.4f}")


Epoch 1/50 - Train Acc: 0.8250, Val Acc: 0.8000
Epoch 2/50 - Train Acc: 0.8917, Val Acc: 0.7750
Epoch 3/50 - Train Acc: 0.9000, Val Acc: 0.7750
Epoch 4/50 - Train Acc: 0.9167, Val Acc: 0.7750
Epoch 5/50 - Train Acc: 0.9167, Val Acc: 0.8250
Epoch 6/50 - Train Acc: 0.9333, Val Acc: 0.8250
Epoch 7/50 - Train Acc: 0.9417, Val Acc: 0.8250
Epoch 8/50 - Train Acc: 0.9333, Val Acc: 0.8500
Epoch 9/50 - Train Acc: 0.9417, Val Acc: 0.8500
Epoch 10/50 - Train Acc: 0.9500, Val Acc: 0.8500
Epoch 11/50 - Train Acc: 0.9417, Val Acc: 0.8500
Epoch 12/50 - Train Acc: 0.9417, Val Acc: 0.8500
Epoch 13/50 - Train Acc: 0.9417, Val Acc: 0.8750
Epoch 14/50 - Train Acc: 0.9500, Val Acc: 0.9000
Epoch 15/50 - Train Acc: 0.9417, Val Acc: 0.9000
Epoch 16/50 - Train Acc: 0.9833, Val Acc: 0.9250
Epoch 17/50 - Train Acc: 0.9833, Val Acc: 0.9000
Epoch 18/50 - Train Acc: 0.9833, Val Acc: 0.9250
Epoch 19/50 - Train Acc: 0.9833, Val Acc: 0.9250
Epoch 20/50 - Train Acc: 0.9833, Val Acc: 0.9250
Epoch 21/50 - Train Acc: 0.98