In [1]:
import sys
sys.path.append("src")

from core.populationNode import PopulationNode
from core.ops import _as_node, _broadcast_to_match, add, sub, mul, sum_pop, matvec
from core.parameter import Parameter
from models.activations import tanh
from core.optim import GD


In [2]:
x1 = PopulationNode([2,3])
x1

PopulationNode( data=[2.0, 3.0], grad=[0.0, 0.0], op='leaf')

In [3]:
x2 = PopulationNode([4,4])
x2

PopulationNode( data=[4.0, 4.0], grad=[0.0, 0.0], op='leaf')

In [4]:
w1 = 3
w2 = 2

In [5]:
l1 = x1*x1
l1

PopulationNode( data=[4.0, 9.0], grad=[0.0, 0.0], op='*')

In [6]:
l2 = x2 * w2
l2

PopulationNode( data=[8.0, 8.0], grad=[0.0, 0.0], op='*')

In [7]:
x2 *x1

PopulationNode( data=[8.0, 12.0], grad=[0.0, 0.0], op='*')

In [8]:
x = PopulationNode([4.5])
y1 = x.tanh()
y2 = y1.tanh()
y3 = y2.tanh()
y4 = y3.tanh()
y5 = y4.tanh()
y6 = y5.tanh()
y7 = y6.tanh()
y8 = y7.tanh()
y8.backprop(debug=True)

[NODE] op=tanh, value=[0.4134608463957053], grad=[1.0] | <-- Parents=[[0.43977852653283767]]
[NODE] op=tanh, value=[0.43977852653283767], grad=[0.829050128497747] | <-- Parents=[[0.4719561929220392]]
[NODE] op=tanh, value=[0.4719561929220392], grad=[0.6687075620489031] | <-- Parents=[[0.5125841451937511]]
[NODE] op=tanh, value=[0.5125841451937511], grad=[0.519757868915453] | <-- Parents=[[0.5662285757646344]]
[NODE] op=tanh, value=[0.5662285757646344], grad=[0.3831953839732797] | <-- Parents=[[0.6419540521719098]]
[NODE] op=tanh, value=[0.6419540521719098], grad=[0.26033727257499406] | <-- Parents=[[0.7614904913621627]]
[NODE] op=tanh, value=[0.7614904913621627], grad=[0.15305097953277294] | <-- Parents=[[0.9997532108480275]]
[NODE] op=tanh, value=[0.9997532108480275], grad=[0.06430164957431488] | <-- Parents=[[4.5]]
[NODE] op=leaf, value=[4.5], grad=[3.1733982853148134e-05] | <-- Parents=[]


In [9]:
xs = [-6, -3.0, -0.5, 0, .5, 3.0, 6]
x = PopulationNode(xs)
print("x: ", x)
y = x.tanh()

sum_pop(y).backprop(debug=True)

x:  PopulationNode( data=[-6.0, -3.0, -0.5, 0.0, 0.5, 3.0, 6.0], grad=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], op='leaf')
[NODE] op=sum, value=[0.0], grad=[1.0] | <-- Parents=[[-0.9999877116507956, -0.9950547536867305, -0.46211715726000974, 0.0, 0.46211715726000974, 0.9950547536867305, 0.9999877116507956]]
[NODE] op=tanh, value=[-0.9999877116507956, -0.9950547536867305, -0.46211715726000974, 0.0, 0.46211715726000974, 0.9950547536867305, 0.9999877116507956], grad=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] | <-- Parents=[[-6.0, -3.0, -0.5, 0.0, 0.5, 3.0, 6.0]]
[NODE] op=leaf, value=[-6.0, -3.0, -0.5, 0.0, 0.5, 3.0, 6.0], grad=[2.4576547405286142e-05, 0.009866037165440211, 0.7864477329659274, 1.0, 0.7864477329659274, 0.009866037165440211, 2.4576547405286142e-05] | <-- Parents=[]


In [10]:
x_scalar = PopulationNode(0.5)
y_scalar = x_scalar.tanh()
y_scalar.backprop()
print("sum of unit gradient", sum(x_scalar.grad))

sum of unit gradient 0.7864477329659274


In [11]:
for v in [-3, -1, 0, 1, 3]:
    x = PopulationNode(v)
    y = x.tanh()
    y.backprop()
    print(v, x.grad)

-3 [0.009866037165440211]
-1 [0.41997434161402614]
0 [1.0]
1 [0.41997434161402614]
3 [0.009866037165440211]


In [13]:


w = Parameter([5.0, 5.0, 5.0])
opt = GD([w], lr=0.1)

def pop_loss(w):
    return sum((wi - 2.0) ** 2 for wi in w.data)

history = []

for _ in range(15):
    L = pop_loss(w)
    for i in range(len(w.data)):
        w.grad[i] = 2 * (w.data[i] - 2.0)
    opt.step()
    history.append(w.data.copy())
    opt.zero_grad()

history

     


[[4.4, 4.4, 4.4],
 [3.9200000000000004, 3.9200000000000004, 3.9200000000000004],
 [3.5360000000000005, 3.5360000000000005, 3.5360000000000005],
 [3.2288000000000006, 3.2288000000000006, 3.2288000000000006],
 [2.9830400000000004, 2.9830400000000004, 2.9830400000000004],
 [2.7864320000000005, 2.7864320000000005, 2.7864320000000005],
 [2.6291456, 2.6291456, 2.6291456],
 [2.50331648, 2.50331648, 2.50331648],
 [2.402653184, 2.402653184, 2.402653184],
 [2.3221225472, 2.3221225472, 2.3221225472],
 [2.25769803776, 2.25769803776, 2.25769803776],
 [2.206158430208, 2.206158430208, 2.206158430208],
 [2.1649267441664, 2.1649267441664, 2.1649267441664],
 [2.13194139533312, 2.13194139533312, 2.13194139533312],
 [2.105553116266496, 2.105553116266496, 2.105553116266496]]

In [12]:


w = Parameter([5.0, 5.1, 4.9])
opt = GD([w], lr=0.1)

history_asym = []

for _ in range(15):
    for i in range(len(w.data)):
        w.grad[i] = 2 * (w.data[i] - 2.0)
    opt.step()
    history_asym.append(w.data.copy())
    opt.zero_grad()

history_asym

     


[[4.4, 4.4799999999999995, 4.32],
 [3.9200000000000004, 3.9839999999999995, 3.8560000000000003],
 [3.5360000000000005, 3.5871999999999997, 3.4848000000000003],
 [3.2288000000000006, 3.2697599999999998, 3.1878400000000005],
 [2.9830400000000004, 3.015808, 2.9502720000000004],
 [2.7864320000000005, 2.8126463999999998, 2.7602176000000003],
 [2.6291456, 2.65011712, 2.6081740800000004],
 [2.50331648, 2.520093696, 2.486539264],
 [2.402653184, 2.4160749568, 2.3892314112],
 [2.3221225472, 2.33285996544, 2.31138512896],
 [2.25769803776, 2.266287972352, 2.249108103168],
 [2.206158430208, 2.2130303778816, 2.1992864825344],
 [2.1649267441664, 2.17042430230528, 2.15942918602752],
 [2.13194139533312, 2.136339441844224, 2.127543348822016],
 [2.105553116266496, 2.109071553475379, 2.1020346790576125]]

In [23]:
import numpy as np


def _almost_equal_list(a, b, tol=1e-6):
    assert len(a) == len(b)
    for x, y in zip(a, b):
        assert abs(x - y) < tol


def test_matvec_forward_backward_diagonal():
    A = [[2.0, 3.0],
         [4.0, 3.0]]

    x = PopulationNode([1.0, 1.0], requires_grad=True)
    y = matvec(A, x)          # [2,3]
    loss = sum_pop(y)         # 5

    loss.zero_grad_graph()
    loss.backprop(debug=True)

    assert y.data == [2.0, 3.0]
    # dL/dx = A^T @ [1,1] = [2,3]
    _almost_equal_list(x.grad, [2.0, 3.0])


def test_matvec_forward_backward_random():
    rng = np.random.default_rng(0)
    A = rng.normal(size=(3, 2))
    x0 = rng.normal(size=(2,))

    x = PopulationNode(x0.tolist(), requires_grad=True)
    y = matvec(A, x)
    loss = sum_pop(y)

    loss.zero_grad_graph()
    loss.backprop()

    # analytic: A^T @ ones(3)
    expected = A.T @ np.ones(3)
    got = np.array(x.grad)
    assert np.max(np.abs(got - expected)) < 1e-6


In [25]:

test_matvec_forward_backward_diagonal()

[NODE] op=sum, value=[5.0], grad=[1.0] | <-- Parents=[[2.0, 3.0]]
[NODE] op=matvec, value=[2.0, 3.0], grad=[1.0, 1.0] | <-- Parents=[[1.0, 1.0]]
[NODE] op=leaf, value=[1.0, 1.0], grad=[2.0, 3.0] | <-- Parents=[]
