In [1]:
import random

## Back_Prop

In [2]:
class Value:
    """Represents a scalar value and its gradient."""
    def __init__(self, data, _children=(), _op=''):
        self.data = data  # The actual scalar value
        self.grad = 0  # Gradient for backpropagation
        self._backward = lambda: None  # Backward function for autograd
        self._prev = set(_children)  # Set of child nodes (dependencies)
        self._op = _op  # Operation that produced this node

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def relu(self):
        out = Value(self.data if self.data > 0 else 0, (self,), 'ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward

        return out

    def tanh(self):
        t = (2 / (1 + (-2 * self.data).exp())) - 1
        out = Value(t, (self,), 'tanh')

        def _backward():
            self.grad += (1 - t ** 2) * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float))
        out = Value(self.data ** other, (self,), f'**{other}')

        def _backward():
            self.grad += (other * self.data ** (other - 1)) * out.grad
        out._backward = _backward

        return out

    def __neg__(self):
        return self * -1

    def __sub__(self, other):
        return self + (-other)

    def __truediv__(self, other):
        return self * other ** -1

    def backward(self):
        topo = []
        visited = set()

        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)

        build_topo(self)

        self.grad = 1
        for node in reversed(topo):
            node._backward()


In [3]:
# Create a dataset for XNOR operation with 3 variables
def create_xnor_data():
    # Training data (12 samples)
    train_X = [
        [0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1],
        [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 0]
    ]
    train_y = [1, 0, 0, 1, 0, 1, 0, 1]  # XNOR outputs

    # Test data (4 samples)
    test_X = [[1, 0, 1], [1, 1, 1], [1, 1, 0]]
    test_y = [1, 0, 1]

    return train_X, train_y, test_X, test_y


In [4]:
# Initialize weights and biases
def init_weights(input_size, hidden_sizes, output_size):
    """Initialize weights and biases for the network."""
    sizes = [input_size] + hidden_sizes + [output_size]  # Layer sizes
    weights = []
    biases = []
    for i in range(len(sizes) - 1):
        # Initialize weights and biases for each layer
        layer_weights = [[Value(random.uniform(-1, 1)) for _ in range(sizes[i + 1])] for _ in range(sizes[i])]
        layer_biases = [Value(random.uniform(-1, 1)) for _ in range(sizes[i + 1])]
        weights.append(layer_weights)
        biases.append(layer_biases)
    return weights, biases


In [5]:
def forward_layer(inputs, weights, biases, activation="relu"):
    """Forward pass for a single layer."""
    outputs = []
    for j in range(len(weights[0])):  # Loop through each neuron in the layer
        # Compute weighted sum + bias
        z = sum((inputs[i] * weights[i][j] for i in range(len(inputs))), Value(0)) + biases[j]
        # Apply activation function
        if activation == "relu":
            outputs.append(z.relu())
        elif activation == "sigmoid":
            outputs.append(z.tanh())
        else:
            outputs.append(z)  # No activation
    return outputs

In [7]:
# Forward pass through the network
def forward_pass(X, weights, biases):
    """Compute forward pass through the entire network."""
    inputs = [Value(x) for x in X]  # Convert inputs to Value objects
    layer1 = forward_layer(inputs, weights[0], biases[0], activation="relu")
    layer2 = forward_layer(layer1, weights[1], biases[1], activation="relu")
    layer3 = forward_layer(layer2, weights[2], biases[2], activation="relu")
    output = forward_layer(layer3, weights[3], biases[3], activation="tanh")  # Output layer
    return output[0]


In [8]:
# Training loop
def train_network(train_X, train_y, weights, biases, epochs=100000, lr=0.001):
    """Train the network using the training dataset."""
    for epoch in range(epochs):
        total_loss = 0
        for X, y in zip(train_X, train_y):
            # Forward pass
            pred = forward_pass(X, weights, biases)

            # Calculate loss (Mean Squared Error)
            target = Value(y)
            loss = (pred - target) ** 2
            total_loss += loss.data

            # Backward pass
            loss.backward()

            # Update weights and biases
            for layer_weights, layer_biases in zip(weights, biases):
                for i in range(len(layer_weights)):
                    for j in range(len(layer_weights[i])):
                        layer_weights[i][j].data -= lr * layer_weights[i][j].grad  # Gradient descent
                        layer_weights[i][j].grad = 0  # Reset gradient
                for j in range(len(layer_biases)):
                    layer_biases[j].data -= lr * layer_biases[j].grad  # Gradient descent
                    layer_biases[j].grad = 0  # Reset gradient

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss}")


In [9]:
# Test the network
def test_network(test_X, test_y, weights, biases):
    """Test the network using the test dataset."""
    print("\nTesting the network:")
    for X, y in zip(test_X, test_y):
        pred = forward_pass(X, weights, biases)
        print(f"Input: {X}, Prediction: {round(pred.data)}, Actual: {y}")


In [10]:
# Main function
def main():
    train_X, train_y, test_X, test_y = create_xnor_data()  # Create dataset
    weights, biases = init_weights(3, [5, 5, 5], 1)  # 3 input nodes, 3 hidden layers with 5 nodes each, 1 output node

    train_network(train_X, train_y, weights, biases, epochs=1000, lr=0.001)  # Train the network
    test_network(test_X, test_y, weights, biases)  # Test the network

if __name__ == "__main__":
    main()


Epoch 0, Loss: 21.288975705647882
Epoch 100, Loss: 2.0789132578239635
Epoch 200, Loss: 2.0105986246684417
Epoch 300, Loss: 2.0084255679300416
Epoch 400, Loss: 2.007507200932536
Epoch 500, Loss: 2.0069668073251163
Epoch 600, Loss: 2.006538438993442
Epoch 700, Loss: 2.006184499761303
Epoch 800, Loss: 2.005876161807622
Epoch 900, Loss: 2.0056201116431667

Testing the network:
Input: [1, 0, 1], Prediction: 0, Actual: 1
Input: [1, 1, 1], Prediction: 1, Actual: 0
Input: [1, 1, 0], Prediction: 0, Actual: 1


## FF

In [11]:
import numpy as np

# XNOR dataset
X = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], 
              [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 0]])
y = np.array([[1], [0], [0], [1], [0], [1], [0], [1]])  

# Initialize weights and biases
input_size = 3
hidden_size = 5  
output_size = 1

np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)  
b1 = np.zeros((1, hidden_size))  
W2 = np.random.randn(hidden_size, output_size)  
b2 = np.zeros((1, output_size))  

# Activation functions
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Forward pass
def forward(X):
    global z1, a1, z2, y_pred
    z1 = np.dot(X, W1) + b1  # Hidden layer computation
    a1 = relu(z1)  # Apply ReLU
    z2 = np.dot(a1, W2) + b2  # Output layer computation
    y_pred = sigmoid(z2)  # Apply Sigmoid
    return y_pred

# Backward pass (Gradient Descent)
def backward(X, y, lr=0.01):
    global W1, b1, W2, b2
    m = len(y)

    # Compute gradients
    dz2 = y_pred - y  # Derivative of loss w.r.t. output
    dW2 = (1 / m) * np.dot(a1.T, dz2)  # Gradient for W2
    db2 = (1 / m) * np.sum(dz2, axis=0, keepdims=True)  # Gradient for b2

    dz1 = np.dot(dz2, W2.T) * (z1 > 0)  # ReLU derivative
    dW1 = (1 / m) * np.dot(X.T, dz1)  # Gradient for W1
    db1 = (1 / m) * np.sum(dz1, axis=0, keepdims=True)  # Gradient for b1

    # Update parameters
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

# Training loop
epochs = 5000
for epoch in range(epochs):
    forward(X)
    backward(X, y, lr=0.1)
    if epoch % 500 == 0:
        loss = np.mean((y_pred - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Test predictions
print("\nPredictions after training:")
print(y_pred.round())  # Round to 0 or 1 for binary classification

Epoch 0, Loss: 0.2805
Epoch 500, Loss: 0.1421
Epoch 1000, Loss: 0.0501
Epoch 1500, Loss: 0.0138
Epoch 2000, Loss: 0.0047
Epoch 2500, Loss: 0.0021
Epoch 3000, Loss: 0.0011
Epoch 3500, Loss: 0.0007
Epoch 4000, Loss: 0.0004
Epoch 4500, Loss: 0.0003

Predictions after training:
[[1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]]


In [12]:
import numpy as np

# XNOR dataset
X = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], 
              [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 0]])
y = np.array([[1], [0], [0], [1], [0], [1], [0], [1]])  # XNOR output

# Initialize weights and biases
input_size = 3
hidden_size = 5  # One hidden layer with 5 neurons
output_size = 1

np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)  # Weights from input to hidden
b1 = np.zeros((1, hidden_size))  # Bias for hidden layer
W2 = np.random.randn(hidden_size, output_size)  # Weights from hidden to output
b2 = np.zeros((1, output_size))  # Bias for output layer

# Activation functions
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Forward pass
def forward(X):
    global z1, a1, z2, y_pred
    z1 = np.dot(X, W1) + b1  # Hidden layer computation
    a1 = relu(z1)  # Apply ReLU
    z2 = np.dot(a1, W2) + b2  # Output layer computation
    y_pred = sigmoid(z2)  # Apply Sigmoid
    return y_pred

# Backward pass (Gradient Descent)
def backward(X, y, lr=0.01):
    global W1, b1, W2, b2
    m = len(y)

    # Compute gradients
    dz2 = y_pred - y  # Derivative of loss w.r.t. output
    dW2 = (1 / m) * np.dot(a1.T, dz2)  # Gradient for W2
    db2 = (1 / m) * np.sum(dz2, axis=0, keepdims=True)  # Gradient for b2

    dz1 = np.dot(dz2, W2.T) * (z1 > 0)  # ReLU derivative
    dW1 = (1 / m) * np.dot(X.T, dz1)  # Gradient for W1
    db1 = (1 / m) * np.sum(dz1, axis=0, keepdims=True)  # Gradient for b1

    # Update parameters
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

# Training loop
epochs = 5000
for epoch in range(epochs):
    forward(X)
    backward(X, y, lr=0.1)
    if epoch % 500 == 0:
        loss = np.mean((y_pred - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Test predictions
print("\nPredictions after training:")
print(y_pred.round())  # Round to 0 or 1 for binary classification

# Define outputs
outputs = []
for i in range(len(X)):
    outputs.append({
        "Input": X[i],
        "Predicted Output": y_pred[i].round(),  # Rounded prediction
        "Actual Output": y[i]
    })

# Display outputs
for i, output in enumerate(outputs):
    print(f"Input: {output['Input']}, Predicted Output: {output['Predicted Output']}, Actual Output: {output['Actual Output']}")

Epoch 0, Loss: 0.2805
Epoch 500, Loss: 0.1421
Epoch 1000, Loss: 0.0501
Epoch 1500, Loss: 0.0138
Epoch 2000, Loss: 0.0047
Epoch 2500, Loss: 0.0021
Epoch 3000, Loss: 0.0011
Epoch 3500, Loss: 0.0007
Epoch 4000, Loss: 0.0004
Epoch 4500, Loss: 0.0003

Predictions after training:
[[1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]]
Input: [0 0 0], Predicted Output: [1.], Actual Output: [1]
Input: [0 0 1], Predicted Output: [0.], Actual Output: [0]
Input: [0 1 0], Predicted Output: [0.], Actual Output: [0]
Input: [0 1 1], Predicted Output: [1.], Actual Output: [1]
Input: [1 0 0], Predicted Output: [0.], Actual Output: [0]
Input: [1 0 1], Predicted Output: [1.], Actual Output: [1]
Input: [1 1 1], Predicted Output: [0.], Actual Output: [0]
Input: [1 1 0], Predicted Output: [1.], Actual Output: [1]


## MLP

In [13]:
import random
import math

class Neuron():
    '''
        A conceptual Neuron hat can be trained using a 
        fit and predict methodology, without any library
    '''
    
    def __init__(self, position_in_layer, is_output_neuron=False):
        self.weights = []
        self.inputs = []
        self.output = None
        
        # This is used for the backpropagation update
        self.updated_weights = []
        # This is used to know how to update the weights
        self.is_output_neuron = is_output_neuron
        # This delta is used for the update at the backpropagation
        self.delta = None
        # This is used for the backpropagation update
        self.position_in_layer = position_in_layer 
        
    def attach_to_output(self, neurons):
        '''
            Helper function to store the reference of the other neurons
            To this particular neuron (used for backpropagation)
        '''
        
        self.output_neurons = neurons
    
    def sigmoid(self, x):
        '''
            simple sigmoid function (logistic) used for the activation
        '''
        return 1 / (1 + math.exp(-x))
    
    def init_weights(self, num_input):
        '''
            This is used to setup the weights when we know how many inputs there is for
            a given neuron
        '''
        
        # Randomly initalize the weights
        for i in range(num_input+1):
            self.weights.append(random.uniform(0,1))
        
    def predict(self, row):
        '''
            Given a row of data it will predict what the output should be for
            this given neuron. We can have many input, but only one output for a neuron
        '''
        
        # Reset the inputs
        self.inputs = []
        
        # We iterate over the weights and the features in the given row
        activation = 0
        for weight, feature in zip(self.weights, row):
            self.inputs.append(feature)
            activation = activation + weight*feature
            
        
        self.output = self.sigmoid(activation)
        return self.output
    
        
            
    def update_neuron(self):
        '''
            Will update a given neuron weights by replacing the current weights
            with those used during the backpropagation. This need to be done at the end of the
            backpropagation
        '''
        
        self.weights = []
        for new_weight in self.updated_weights:
            self.weights.append(new_weight)
    
    def calculate_update(self, learning_rate, target):
        '''
            This function will calculate the updated weights for this neuron. It will first calculate
            the right delta (depending if this neuron is a ouput or a hidden neuron), then it will
            calculate the right updated_weights. It will not overwrite the weights yet as they are needed
            for other update in the backpropagation algorithm.
        '''
        
        if self.is_output_neuron:
            # Calculate the delta for the output
            self.delta = (self.output - target)*self.output*(1-self.output)
        else:
            # Calculate the delta
            delta_sum = 0
            # this is to know which weights this neuron is contributing in the output layer
            cur_weight_index = self.position_in_layer 
            for output_neuron in self.output_neurons:
                delta_sum = delta_sum + (output_neuron.delta * output_neuron.weights[cur_weight_index])

            # Update this neuron delta
            self.delta = delta_sum*self.output*(1-self.output)
            
            
        # Reset the update weights
        self.updated_weights = []
        
        # Iterate over each weight and update them
        for cur_weight, cur_input in zip(self.weights, self.inputs):
            gradient = self.delta*cur_input
            new_weight = cur_weight - learning_rate*gradient
            self.updated_weights.append(new_weight)
         

In [14]:
class Layer():
    '''
        Layer is modelizing a layer in the fully-connected-feedforward neural network architecture.
        It will play the role of connecting everything together inside and will be doing the backpropagation 
        update.
    '''
    
    def __init__(self, num_neuron, is_output_layer = False):
        
        # Will create that much neurons in this layer
        self.is_output_layer = is_output_layer
        self.neurons = []
        for i in range(num_neuron):
            # Create neuron
            neuron = Neuron(i,  is_output_neuron=is_output_layer)
            self.neurons.append(neuron)
    
    def attach(self, layer):
        '''
            This function attach the neurons from this layer to another one
            This is needed for the backpropagation algorithm
        '''
        # Iterate over the neurons in the current layer and attach 
        # them to the next layer
        for in_neuron in self.neurons:
            in_neuron.attach_to_output(layer.neurons)
            
    def init_layer(self, num_input):
        '''
            This will initialize the weights of each neuron in the layer.
            By giving the right num_input it will spawn the right number of weights
        '''
        
        # Iterate over each of the neuron and initialize
        # the weights that connect with the previous layer
        for neuron in self.neurons:
            neuron.init_weights(num_input)
    
    def predict(self, row):
        '''
            This will calcualte the activations for the full layer given the row of data 
            streaming in.
        '''
        row.append(1) # need to add the bias
        activations = [neuron.predict(row) for neuron in self.neurons]
        return activations
            
        

In [15]:
class MultiLayerPerceptron():
    '''
        We will be creating the multi-layer perceptron with only two layer:
        an input layer, a perceptrons layer and a one neuron output layer which does binary classification
    '''
    def __init__(self, learning_rate = 0.01, num_iteration = 100):
        
        # Layers
        self.layers = []
                
        # Training parameters
        self.learning_rate = learning_rate
        self.num_iteration = num_iteration
        
        
    def add_output_layer(self, num_neuron):
        '''
            This helper function will create a new output layer and add it to the architecture
        '''
        self.layers.insert(0, Layer(num_neuron, is_output_layer = True))
    
    def add_hidden_layer(self, num_neuron):
        '''
            This helper function will create a new hidden layer, add it to the architecture
            and finally attach it to the front of the architecture
        '''
        # Create an hidden layer
        hidden_layer = Layer(num_neuron)
        # Attach the last added layer to this new layer
        hidden_layer.attach(self.layers[0])
        # Add this layers to the architecture
        self.layers.insert(0, hidden_layer)

    def add_input_layer(self, num_neuron):
        '''
            This helper function will create a starting input layer, add it to the architecture
            and finally attach it to the front of the architecture
        '''
        # Create an input layer
        hidden_layer = Layer(num_neuron)
        # Attach the last added layer to this new layer
        hidden_layer.attach(self.layers[0])
        # Add this layers to the architecture
        self.layers.insert(0, hidden_layer)
        
    def update_layers(self, target):
        '''
            Will update all the layers by calculating the updated weights and then updating 
            the weights all at once when the new weights are found.
        '''
        # Iterate over each of the layer in reverse order
        # to calculate the updated weights
        for layer in reversed(self.layers):
                           
            # Calculate update the hidden layer
            for neuron in layer.neurons:
                neuron.calculate_update(self.learning_rate, target)  
        
        # Iterate over each of the layer in normal order
        # to update the weights
        for layer in self.layers:
            for neuron in layer.neurons:
                neuron.update_neuron()
    
    def fit(self, X, y):
        '''
            Main training function of the neural network algorithm. This will make use of backpropagation.
            It will use stochastic gradient descent by selecting one row at random from the dataset and 
            use predict to calculate the error. The error will then be backpropagated and new weights calculated.
            Once all the new weights are calculated, the whole network weights will be updated
        '''
        num_row = len(X)
        num_feature = len(X[0]) # Here we assume that we have a rectangular matrix
        
        # Init the weights throughout each of the layer
        self.layers[0].init_layer(num_feature)
        
        for i in range(1, len(self.layers)):
            num_input = len(self.layers[i-1].neurons)
            self.layers[i].init_layer(num_input)

        # Launch the training algorithm
        for i in range(self.num_iteration):
            
            # Stochastic Gradient Descent
            r_i = random.randint(0,num_row-1)
            row = X[r_i] # take the random sample from the dataset
            yhat = self.predict(row)
            target = y[r_i]
            
            # Update the layers using backpropagation   
            self.update_layers(target)
            
            # At every 100 iteration we calculate the error
            # on the whole training set
            if i % 1000 == 0:
                total_error = 0
                for r_i in range(num_row):
                    row = X[r_i]
                    yhat = self.predict(row)
                    error = (y[r_i] - yhat)
                    total_error = total_error + error**2
                mean_error = total_error/num_row
                print(f"Iteration {i} with error = {mean_error}")
        
    
    def predict(self, row):
        '''
            Prediction function that will take a row of input and give back the output
            of the whole neural network.
        '''
        
        # Gather all the activation in the hidden layer
        
        activations = self.layers[0].predict(row)
        for i in range(1, len(self.layers)):
            activations = self.layers[i].predict(activations)

        outputs = []
        for activation in activations:                        
            # Decide if we output a 1 or 0
            if activation >= 0.5:
                outputs.append(1.0)
            else:
                outputs.append(0.0)
                           
        # We currently have only One output allowed
        return outputs[0]

In [16]:
# XOR function (one or the other but not both)
X = [[0,0], [0,1], [1,0], [1,1]]
y = [0, 1, 1, 0]

# Init the parameters for the network
clf = MultiLayerPerceptron(learning_rate = 0.1, num_iteration = 100000)
# Create the architecture backward
clf.add_output_layer(num_neuron = 1)
clf.add_hidden_layer(num_neuron = 3)
clf.add_input_layer(num_neuron = 2)
# Train the network
clf.fit(X,y)

Iteration 0 with error = 0.5
Iteration 1000 with error = 0.5
Iteration 2000 with error = 0.5
Iteration 3000 with error = 0.5
Iteration 4000 with error = 0.5
Iteration 5000 with error = 0.5
Iteration 6000 with error = 0.5
Iteration 7000 with error = 0.5
Iteration 8000 with error = 0.5
Iteration 9000 with error = 0.5
Iteration 10000 with error = 0.5
Iteration 11000 with error = 0.5
Iteration 12000 with error = 0.5
Iteration 13000 with error = 0.5
Iteration 14000 with error = 0.5
Iteration 15000 with error = 0.5
Iteration 16000 with error = 0.5
Iteration 17000 with error = 0.5
Iteration 18000 with error = 0.5
Iteration 19000 with error = 0.5
Iteration 20000 with error = 0.5
Iteration 21000 with error = 0.5
Iteration 22000 with error = 0.25
Iteration 23000 with error = 0.5
Iteration 24000 with error = 0.5
Iteration 25000 with error = 0.5
Iteration 26000 with error = 0.5
Iteration 27000 with error = 0.5
Iteration 28000 with error = 0.25
Iteration 29000 with error = 0.5
Iteration 30000 with 

In [17]:
print("Expected 0.0, got: ",clf.predict([0,0]))
print("Expected 1.0, got: ",clf.predict([0,1]))
print("Expected 1.0, got: ",clf.predict([1,0]))
print("Expected 0.0, got: ",clf.predict([1,1]))

Expected 0.0, got:  0.0
Expected 1.0, got:  1.0
Expected 1.0, got:  1.0
Expected 0.0, got:  0.0
