In [25]:
import sys
from pathlib import Path

project_root = Path("/Users/dheerajkumar/Developer/AI/Bgrad")  # <-- adjust this
sys.path.append(str(project_root))

from core import MLP, Layer, Neuron

In [26]:
from core.nn_module import MLP

# Inputs
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

# Outputs for each set of inputs
yexpected = [1.0, -1.0, -1.0, 1.0]

# Creating a neural network with random parameters (weights and biases)
nn = MLP(3, [4, 4, 1])

# Parameters of the NN
nn.parameters()

[Value(data=0.9172832611642332, grad=0),
 Value(data=0.9732894209471539, grad=0),
 Value(data=0.15447838456004526, grad=0),
 Value(data=0.15902144200615353, grad=0),
 Value(data=-0.2806854835092665, grad=0),
 Value(data=-0.46252997610972746, grad=0),
 Value(data=0.2968322365690579, grad=0),
 Value(data=0.35743267269281964, grad=0),
 Value(data=-0.8100832297239737, grad=0),
 Value(data=-0.9733086877116657, grad=0),
 Value(data=-0.7888068323239157, grad=0),
 Value(data=-0.24943795254346002, grad=0),
 Value(data=0.6833196631568894, grad=0),
 Value(data=-0.43419788330019937, grad=0),
 Value(data=0.5095567392400762, grad=0),
 Value(data=-0.7071475229029589, grad=0),
 Value(data=0.9108396227487516, grad=0),
 Value(data=0.8645257420737162, grad=0),
 Value(data=0.3624934755314242, grad=0),
 Value(data=-0.6098446680804972, grad=0),
 Value(data=0.11315491475020178, grad=0),
 Value(data=-0.5639976042498132, grad=0),
 Value(data=0.8806210953032967, grad=0),
 Value(data=-0.7297589692320643, grad=0)

In [27]:
# Let's train this custom NN on a tiny dataset and simulate training & learning process in neural network.
# Forward pass, backward pass, gradient descent

# Step 1 - Forward pass with current parameters and Calculating the loss
# Step 2 - Backward pass to evaluate gradient for each parameter Which is rate of change of loss with respect to the parameter
# Step 3 - Learning of the network by adjusting parameters based on the gradient
# Step 4 - Repeat the process for multiple iterations till we have very low loss and we'll stop at very low loss when we feel predictions are good with expected predictions.

In [28]:
# Now we'll do Step 1 to Step 4 iteratively multiple times till we have very low loss
# And we'll stop at very low loss when we feel predictions are good with expected predictions.
interations = 200

for k in range(interations):
    # Forward pass to calculate the loss with current parameters
    ypredicted = [nn(x) for x in xs]
    loss = sum((ypred - yexp)**2 for ypred, yexp in zip(ypredicted, yexpected))

    # Backward pass to calculate the gradient for each parameter
    loss.backward()

    # Speed/factor by which gradient moves in the opposite direction of gradient.
    learning_rate = 0.1 

    # Let's adjust parameters
    # In the opposite direction of the gradient because
    # If grad is +ve increasing parameter will increase loss and we want to decrease the loss.
    # If grad is -ve increasing parameter will decrease loaa and we want to decrease the loss.
    for p in nn.parameters():
        p.data -= learning_rate * p.grad

    # Clear old grads so backward pass in the next iteration results in the fresh gradients
    nn.zero_grad()
    
    # Printing each iteration for tracking
    print(k, loss.data)

0 6.236248109706986
1 2.3267802695278728
2 1.2191835799595179
3 0.6941541385089913
4 0.3222721292375048
5 0.062234464893389234
6 0.05134559291568403
7 0.04367208624491083
8 0.037940040115972386
9 0.033493015039957885
10 0.02994466201071131
11 0.0270500336253564
12 0.024645717524855774
13 0.022618393606693703
14 0.020886995548054243
15 0.019392011910069045
16 0.01808878520609186
17 0.016943163694071922
18 0.015928594389760933
19 0.01502412908962448
20 0.014213025669588823
21 0.013481747354162677
22 0.01281923399891867
23 0.012216362994552601
24 0.011665544716396171
25 0.011160414979009452
26 0.010695598453621894
27 0.010266524691110043
28 0.009869283619665853
29 0.009500510997794802
30 0.009157296835551353
31 0.008837111596554712
32 0.008537746288339133
33 0.008257263491329145
34 0.0079939570704486
35 0.007746318829015478
36 0.0075130107514721125
37 0.0072928417743771045
38 0.007084748248630288
39 0.006887777427871849
40 0.006701073451263742
41 0.006523865392861863
42 0.0063554570314820

In [29]:
ypredicted = [nn(x) for x in xs]
ypredicted

[Value(data=0.9850107919124951, grad=0),
 Value(data=-0.9913966746941082, grad=0),
 Value(data=-0.9779805535764464, grad=0),
 Value(data=0.9801082030278561, grad=0)]