In [13]:
import math
import random
class Param:
    def __init__(self, val = 0, grad = 0):
        self.val = random.uniform(-5, 5)
        self.grad = 0
    

In [14]:
class Neuron:
    def __init__(self, nin):
        self.val = 0
        self.grad = 0
        self.w_params = [Param() for i in range(nin)]
        self.b_param = Param()
    
    def __repr__(self):
        return f"{self.val}"
    
    def __mul__(self, other):
        return self.val * other
    def __rmul__(self, other):
        return self.val * other 
        

In [15]:
class Layer:
    def __init__(self, nin, size):
        self.neurons = [Neuron(nin) for i in range(size)]
    
    def __repr__(self):
        return f"{self.neurons}"
    
    def __iter__(self):
        return iter(self.neurons)

In [16]:
def loss(y_pred, y):
    if(isinstance(y_pred, Layer)):
        return sum([(y_pred.neurons[i].val - y[i])**2 for i in range(len(y_pred.neurons))])
    
def dot(v1, v2):
    if(isinstance(v1, Layer)):
        return sum([v1.neurons[i].val*v2[i].val for i in range(len(v1.neurons))])
    else:
        return sum([v1[i]*v2[i].val for i in range(len(v1))])

def sigmoid(x):
    return 1./(1 + math.exp(-x))

class MLP:
    def __init__(self, sizes):
        self.layers = [Layer(sizes[i], sizes[i+1]) for i in range(len(sizes) - 1)]
        
    def forward(self, x):
        self.input = x
        for layer in self.layers:
            layer.prev_layer = x
            for neuron in layer.neurons:
                neuron.val = sigmoid(dot(x , neuron.w_params) + neuron.b_param.val)
            x = layer
        
        return x
        
    
    def backward(self, y, batch_size=1):
        #print(f"loss is {loss(self.layers[-1], y)}")
        
        # calc output grads & weight 
        for (neuron, y_val) in zip(self.layers[-1], y):
            neuron.grad += 2 * (neuron.val - y_val) * (1/batch_size) # implement mse loss 
            for w_param, x_val in zip(neuron.w_params, self.layers[-1].prev_layer):
                w_param.grad += (neuron.grad *
                               (neuron.val**2) * 
                                math.exp(-(dot(self.layers[-1].prev_layer, neuron.w_params) + neuron.b_param.val)) *
                                x_val)
            neuron.b_param.grad += (neuron.grad *
                               (neuron.val**2) * 
                                math.exp(-(dot(self.layers[-1].prev_layer, neuron.w_params) + neuron.b_param.val)))
        # calc rest of grads
        for i in range(len(self.layers) - 2, -1, -1):
            # calc neuron grads
            for idx, neuron in enumerate(self.layers[i]):
                # calc neuron grads
                for next_neuron in self.layers[i + 1]:
                    neuron.grad +=  (next_neuron.grad * 
                                     next_neuron.val**2 * 
                                     math.exp(-(dot(self.layers[i], next_neuron.w_params) + next_neuron.b_param.val))*
                                     next_neuron.w_params[idx].val
                                    )
                # calc param grads 
                for w_param, x_val in zip(neuron.w_params, self.layers[i].prev_layer):
                    w_param.grad += (neuron.grad *
                                   (neuron.val**2) * 
                                    math.exp(-(dot(self.layers[i].prev_layer, neuron.w_params) + neuron.b_param.val)) *
                                    x_val)
                neuron.b_param.grad += (neuron.grad *
                                   (neuron.val ** 2) *  
                                   math.exp(-(dot(self.layers[i].prev_layer, neuron.w_params) + neuron.b_param.val))
                                    )
    def zero_neurons_grads(self):
        for layer in self.layers:
            for neuron in layer.neurons:
                neuron.grad = 0
    def zero_params_grads(self):
        for layer in self.layers:
            for neuron in layer.neurons:
                #neuron.grad = 0
                for w_param in neuron.w_params:
                    w_param.grad = 0
                neuron.b_param.grad = 0
    
    def step(self, lr=1):
        for layer in self.layers:
            for neuron in layer.neurons:
                for w_param in neuron.w_params:
                    w_param.val -= w_param.grad * lr
                neuron.b_param.val -= neuron.b_param.val * lr

In [2]:
# x_data = [(0, 0), (0, 1), (1,0), (1, 1)]
# y_data = [(0, ), (1, ), (1, ), (0, )]
def train_MLP(mlp, x_data, y_data, num_iters = 1, lr=1):
    # calc grads
    for i in range(num_iters):
        total_loss = 0; 
        mlp.zero_params_grads()
        for sample in zip(x_data, y_data):
            mlp.zero_neurons_grads()
            ypred = mlp.forward(sample[0])
            total_loss += loss(ypred, sample[1])
            mlp.backward(sample[1], batch_size = len(x_data))
        
        if(i%100 == 0):
            print(f"Iteration {i}: Loss = {total_loss/len(x_data)}")

        # step 
        mlp.step()
        
    total_loss = 0; 
    #mlp.zero_params_grads()
    for sample in zip(x_data, y_data):
        mlp.zero_neurons_grads()
        ypred = mlp.forward(sample[0])
        total_loss += loss(ypred, sample[1])
        #mlp.backward(sample[1], batch_size = len(x_data))
            
    print(f"Final Loss = {total_loss/len(x_data)}")

def calc_loss(mlp, x_data, y_data):
    total_loss = 0
    for sample in zip(x_data, y_data):
            ypred = mlp.forward(sample[0])
            total_loss += loss(ypred, sample[1])
    
     print(f"Loss on test set: {total_loss/len(x_data)}")
    

In [12]:
# my_MLP = MLP([2, 10, 2, 1])

# train_MLP(my_MLP, x_data, y_data, num_iters = 500, lr = 1)

Iteration 0: Loss = 0.4967955422007997
Iteration 100: Loss = 0.04267504478950692
Iteration 200: Loss = 0.01256912775458454
Iteration 300: Loss = 0.007115362622937966
Iteration 400: Loss = 0.00488596913241005
Final Loss = 0.003691606765115635
