#Tensors

##numpy

In [2]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss= np.square(y_pred - y).sum()
    if t % 100 ==99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f"Result: y = {a} + {b} x + {c} x^2 + {d} x^3")

99 428.9545967016273
199 306.11153675952147
299 219.18774792625283
399 157.67950743537722
499 114.15541593230532
599 83.35704353326886
699 61.56352509784823
799 46.14196299858067
899 35.22930052925478
999 27.507219947635104
1099 22.042862445048197
1199 18.17612242366566
1299 15.439897069446978
1399 13.503654757908382
1499 12.133503728547483
1599 11.163936317680914
1699 10.477834837355859
1799 9.992323438517236
1899 9.648756642374654
1999 9.405635025924445
Result: y = -0.02567417777548198 + 0.857105891816554 x + 0.004429222886432891 x^2 + -0.09338231404538541 x^3


##pytorch

In [6]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1331.86572265625
199 922.0556640625
299 639.8554077148438
399 445.3105773925781
499 311.0487060546875
599 218.29234313964844
699 154.14358520507812
799 109.7339859008789
899 78.95923614501953
999 57.61223220825195
1099 42.79090118408203
1199 32.490966796875
1299 25.326831817626953
1399 20.339479446411133
1499 16.864604949951172
1599 14.441632270812988
1699 12.750835418701172
1799 11.570089340209961
1899 10.744953155517578
1999 10.167937278747559
Result: y = -0.03603760153055191 + 0.8432844281196594 x + 0.00621708482503891 x^2 + -0.09141633659601212 x^3


#Autograd

In [7]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum()
    if t & 100 == 99:
        print(t, loss.item())

        loss.backward()

        with torch.no_grad():
            a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

Result: y = -2.1234354972839355 + 0.06477495282888412 x + 1.2188695669174194 x^2 + -0.8172118663787842 x^3


##Defining new autograd functions

In [10]:
import torch
import math

class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 -1)

dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    P3 = LegendrePolynomial3.apply

    y_pred = a + b * P3(c + d * x)

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03519439697266
499 50.97850799560547
599 37.403133392333984
699 28.206867218017578
799 21.97318458557129
899 17.7457275390625
999 14.877889633178711
1099 12.93176555633545
1199 11.610918998718262
1299 10.71425724029541
1399 10.10548210144043
1499 9.692106246948242
1599 9.411375045776367
1699 9.220745086669922
1799 9.091285705566406
1899 9.003360748291016
1999 8.943639755249023
Result: y = -5.394172664097141e-09 + -2.208526849746704 * P3(1.367587154632588e-09 + 0.2554861009120941 x)


#nn module

In [13]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch. nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    model.zero_grad()

    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 1318.9122314453125
199 875.4879760742188
299 582.1654663085938
399 388.1294250488281
499 259.7687683105469
599 174.85171508789062
699 118.67330169677734
799 81.50595092773438
899 56.9155387878418
999 40.64556121826172
1099 29.880138397216797
1199 22.7567195892334
1299 18.042926788330078
1399 14.923539161682129
1499 12.85916519165039
1599 11.492829322814941
1699 10.588505744934082
1799 9.989925384521484
1899 9.593668937683105
1999 9.331364631652832
Result: y = 0.0035378476604819298 + 0.834938645362854 x + -0.0006103383493609726 x^2 + -0.09022922068834305 x^3


#optim

In [15]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch. nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad
linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 294.07843017578125
199 197.4935760498047
299 133.6110382080078
399 91.35411071777344
499 63.4030647277832
599 44.91334915161133
699 32.68270492553711
799 24.59278106689453
899 19.24228286743164
999 15.704054832458496
1099 13.36469841003418
1199 11.818257331848145
1299 10.796329498291016
1399 10.121207237243652
1499 9.675444602966309
1599 9.381251335144043
1699 9.18726921081543
1799 9.059471130371094
1899 8.975378036499023
1999 8.920125007629395
Result: y = 0.0016373625257983804 + 0.8469933867454529 x + -0.00027856716769747436 x^2 + -0.09194497019052505 x^3


#Custom nn Modules

In [21]:
import torch
import math


class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 940.2351684570312
199 625.1121826171875
299 416.62213134765625
399 278.6761474609375
499 187.4015350341797
599 127.00518035888672
699 87.0390625
799 60.590999603271484
899 43.0877685546875
999 31.50351333618164
1099 23.836124420166016
1199 18.760936737060547
1299 15.401325225830078
1399 13.177189826965332
1499 11.70461654663086
1599 10.729615211486816
1699 10.083984375
1799 9.656415939331055
1899 9.373233795166016
1999 9.185643196105957
Result: y = 0.003655795007944107 + 0.8383854627609253 x + -0.0006306851282715797 x^2 + -0.09071949124336243 x^3


#Control Flow + Weight Sharing

In [22]:
import random
import torch
import math

class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 351.6754455566406
3999 175.56256103515625
5999 87.30636596679688
7999 47.77751922607422
9999 28.418149948120117
11999 19.36927604675293
13999 13.313549041748047
15999 11.239903450012207
17999 9.952739715576172
19999 9.142263412475586
21999 9.143969535827637
23999 8.970033645629883
25999 8.934967994689941
27999 8.570767402648926
29999 8.880867004394531
Result: y = 0.005023995880037546 + 0.8569324612617493 x + -0.0014479807578027248 x^2 + -0.09367156773805618 x^3 + 0.00013109577412251383 x^4 ? + 0.00013109577412251383 x^5 ?
