# Verification of micrograd

This Notebook verifies that the forward pass (prediction and loss) and backward pass (gradients) of micrograd yield the same result as pytorch.

This is done by initializing the same, simple neural network in both framework (same architecture, weights, biases and loss function), and verifying that they output the same prediction, loss and gradients.

In [152]:
from nn import MLP
from torch import nn
import torch
import random

### Initialize the networks

In [153]:
# Initialize the networks to be equal
random.seed(42)

# Initializing micrograd neural network
mlp = MLP(2, [2, 1])

# Initializing pytorch neural network
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_tanh_stack = nn.Sequential(
            nn.Linear(2, 2),
            nn.Tanh(),
            nn.Linear(2, 1),
            nn.Tanh()
        )
    
    def forward(self, x):
        return self.linear_tanh_stack(x)

model = NeuralNetwork()
loss_fn = nn.MSELoss()

model = model.double() # Model should have double precision (float64)

### Copy weights and biases from micrograd NN to pytorch NN

In [154]:
nn_weights = []
nn_biases = []
for l in mlp.layers:
    layer_weights = []
    layer_biases = []
    for n in l.neurons:
        layer_weights.append([w.data for w in n.w])
        layer_biases.append(n.b.data)
    nn_weights.append(layer_weights)
    nn_biases.append(layer_biases)

with torch.no_grad():
    model.linear_tanh_stack[0].weight.copy_(torch.tensor(nn_weights[0], dtype=torch.float64))
    model.linear_tanh_stack[0].bias.copy_(torch.tensor(nn_biases[0], dtype=torch.float64))
    model.linear_tanh_stack[2].weight.copy_(torch.tensor(nn_weights[1], dtype=torch.float64))
    model.linear_tanh_stack[2].bias.copy_(torch.tensor(nn_biases[1], dtype=torch.float64))

### Verify that weights and biases are identical

In [155]:
print("micrograd:")
for i in range(len(nn_weights)):
    print(f"\tLayer {i} weights: {nn_weights[i]} - biases: {nn_biases[i]}")

print("torch:")
torch.set_printoptions(precision=16)  # show up to 16 decimals
print(f"\tLayer 0 weights: {model.linear_tanh_stack[0].weight.data.tolist()} - biases: {model.linear_tanh_stack[0].bias.data.tolist()}")
print(f"\tLayer 1 weights: {model.linear_tanh_stack[2].weight.data.tolist()} - biases: {model.linear_tanh_stack[2].bias.data.tolist()}")


micrograd:
	Layer 0 weights: [[0.2788535969157675, -0.9499784895546661], [-0.5535785237023545, 0.4729424283280248]] - biases: [-0.4499413632617615, 0.3533989748458226]
	Layer 1 weights: [[0.7843591354096908, -0.8261223347411677]] - biases: [-0.15615636062945915]
torch:
	Layer 0 weights: [[0.2788535969157675, -0.9499784895546661], [-0.5535785237023545, 0.4729424283280248]] - biases: [-0.4499413632617615, 0.3533989748458226]
	Layer 1 weights: [[0.7843591354096908, -0.8261223347411677]] - biases: [-0.15615636062945915]


### Define input and target

In [156]:
# Input
x = [1.0, 0.0]
x_tensor = torch.tensor(x, dtype=torch.float64)

#  Target
y_target = 0.0
y_target_tensor = torch.tensor(y_target, dtype=torch.float64)

### Predicition (forward pass)

In [157]:
pred = mlp(x)
print("micrograd: \t", pred.data)

torch_pred = model(x_tensor)
print("torch: \t\t", torch_pred.item())

micrograd: 	 -0.12519737124741515
torch: 		 -0.12519737124741515


### Calculate loss

In [158]:
loss = (pred - y_target)**2
print("micrograd: \t", loss.data)

torch_loss = loss_fn(torch_pred, y_target_tensor)
print("torch: \t\t", torch_loss.item())

micrograd: 	 0.01567438176726309
torch: 		 0.01567438176726309


  return F.mse_loss(input, target, reduction=self.reduction)


### Backward pass (calculate gradients)

In [159]:
loss.backward()
torch_loss.backward()

print("micrograd:")
for i in range(len(mlp.layers)):
    print(f"\tLayer {i}:")
    for n in mlp.layers[i].neurons:
        param_list = [str(v.grad) for v in n.w]
        print(f"\t\tWeight grad {param_list} - bias grad: {str(n.b.grad)}")

print("torch:")
for name, param in model.named_parameters():
    print(f"\t{name} (grad): {param.grad.tolist()}")

micrograd:
	Layer 0:
		Weight grad ['-0.18777088190078228', '0.0'] - bias grad: -0.18777088190078228
		Weight grad ['0.19566825636430066', '0.0'] - bias grad: 0.19566825636430066
	Layer 1:
		Weight grad ['0.04176132224435531', '0.04868961513469122'] - bias grad: -0.2464699597084508
torch:
	linear_tanh_stack.0.weight (grad): [[-0.18777088190078228, 0.0], [0.19566825636430066, 0.0]]
	linear_tanh_stack.0.bias (grad): [-0.18777088190078228, 0.19566825636430066]
	linear_tanh_stack.2.weight (grad): [[0.04176132224435531, 0.04868961513469123]]
	linear_tanh_stack.2.bias (grad): [-0.2464699597084508]
