In [1]:
from autograd import Value, Tensor
import random
random.seed(42)

from nn import mse_loss

In [6]:
a = Value(3)
b = Value(4)
c = a * b
d = c + b

In [7]:
d.backward()

In [2]:
import matplotlib.pyplot as plt
from IPython.display import clear_output
def smooth(scalars: [float], weight: float) -> [float]:  # Weight between 0 and 1
    last = scalars[0]  # First value in the plot (first timestep)
    smoothed = list()
    for point in scalars:
        smoothed_val = last * weight + (1 - weight) * point  # Calculate smoothed value
        smoothed.append(smoothed_val)                        # Save it
        last = smoothed_val                                  # Anchor the last smoothed value

    return smoothed

In [3]:
a = Tensor([[1, 2]])
b = Tensor([[1, 2], [3, 4]])
c = a @ b * 3
c[0][0].backward()
a.grad_tensor()

[[Value(3.0), Value(9.0)]]

In [4]:
import torch
a_ = torch.tensor([[1., 2]], requires_grad=True)
b_ = torch.tensor([[1., 2], [3, 4]], requires_grad=True)
c_ = a_ @ b_ * 3
c_[0][0].backward()
print(a_.grad)

tensor([[3., 9.]])


In [11]:
x = Tensor([[1, 2, 0, 0]])
from nn import softmax
print(softmax(x).sum())
x = torch.tensor([[1.0, 2, 0, 0]])
print(torch.nn.functional.softmax(x, dim=-1))

Value(1.0)
tensor([[0.2245, 0.6103, 0.0826, 0.0826]])


In [6]:
c.grad_tensor(),

([[Value(1.0), Value(0.0)]],)

In [7]:
def gen_data_point():
    return random.choice([
        [[0, 0], [0]],
        [[0, 1], [1]],
        [[1, 0], [1]],
        [[1, 1], [0]]
    ])



ds = [gen_data_point() for _ in range(1000)]

In [8]:
from nn import Module, LinearLayer, LeakyReLu


class Net(Module):
    def __init__(self):
        super().__init__()

        self.l1 = LinearLayer(2, 4)
        self.a1 = LeakyReLu()
        self.l2 = LinearLayer(4, 1)
        self.a2 = LeakyReLu()

        self.register_module(self.l1)
        self.register_module(self.a1)
        self.register_module(self.l2)
        self.register_module(self.a2)

    def forward(self, x):
        x = self.a1(self.l1(x))
        x = self.a2(self.l2(x))
        return x


In [9]:
from nn import SGD

net = Net()

optim = SGD(net, learning_rate=0.1)
losses = []
net.l2.bias

[Value(0.1919632703488401)]

In [10]:

for i in range(1000):
    xs, ys = [], []
    for j in range(16):
        x, y = random.choice(ds)
        xs.append(x)
        ys.append(y)
    x, y = Tensor(xs), Tensor(ys)
    y_ = net(x)
    loss = mse_loss(y_, y)
    loss.backward()
    optim.step()

    if i % 10:
        losses.append(loss.item())
        clear_output(wait=True)
        x_labels = range(len(losses))
        p = plt.plot(x_labels, losses, x_labels, smooth(losses, .9))
        plt.show()

KeyboardInterrupt: 

In [None]:
net.l2.bias

In [None]:

x, y = random.choice(ds)
x, y = Tensor([x]), Tensor([y])
y_ = net(x)
print(x, y_, y)

In [None]:
net.l1.weights

In [None]:
a_ = Tensor([[1, 0, 1]])
b_ = Tensor([[3, 1, 1]])
c = mse_loss(a_, b_)
c

In [None]:
c_ = a_ @ b_
c_

In [None]:
#c_[0][0].backward()
b_.grad_tensor()

In [None]:
import torch
import torch.functional

In [None]:
net = torch.nn.Sequential(
    torch.nn.Linear(2, 4, bias=True),
    torch.nn.Linear(4, 1, bias=True)
)
optim = torch.optim.SGD(net.parameters(), lr=0.01)

In [None]:
for i in range(1000):
    optim.zero_grad()

    x, y = random.choice(ds)
    x = torch.tensor([x])
    y = torch.tensor(y)
    y_ = net(x)
    loss = torch.nn.functional.mse_loss(y_, y)
    loss.backward()
    optim.step()
    print(loss)

In [None]:
c[0][0].backward()

In [None]:
from sortedcontainers import SortedDict


class BackwardContext:
    def __init__(self):
        self.current_depth = None
        self.execution_order = None

        self.reset()

    def reset(self):
        self.execution_order = SortedDict()
        self.current_depth = 0

    def register_node(self, callback, *args):
        depth = self.current_depth
        if depth not in self.execution_order:
            self.execution_order[depth] = [(callback, args)]
        else:
            self.execution_order[depth].append((callback, args))

    def execute(self, first_call, *args):
        self.reset()
        first_call(self, *args)
        while len(self.execution_order):
            depth, calls = self.execution_order.peekitem(0)
            self.current_depth = depth
            for call, args in calls:
                call(self, *args)
            self.execution_order.pop(depth)

In [None]:
class A:
    def __init__(self, v=None, u=None):
        self.v = v
        self.u = u

    def backward(self, ctx, x):
        if self.v is None and self.u is None:
            print(x)
        else:
            if self.v is not None:
                ctx.register_node(self.v.backward, x + 1)
            if self.u is not None:
                ctx.register_node(self.u.backward, x + 2)

'''
a
|\
b c
  |
  d
'''
a = A(A(), A(A()))
ctx = BackwardContext()
ctx.execute(a.backward, 1)