## Tensors


### Warm-up:numpy

In [1]:
import numpy as np
import math

# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # forward pass: compute predicted y
    # y= a + b x + c x^2 + d x^3
    y_pred = a + b*x + c*x**2 + d*x**3

    # compute and print loss
    loss = np.square(y_pred - y).sum() #제곱합
    if t % 100 == 99:
        print(t, loss)
    
    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2 * (y_pred - y) 
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result y = {a} + {b} x + {c} x^2 + {d} x^3')

99 292.30575244863275
199 197.51938317673148
299 134.4778416008033
399 92.53396385975952
499 64.61624301054233
599 46.02662318818138
699 33.642931220611175
799 25.38960230866069
899 19.886365435793778
999 16.214994050184977
1099 13.7644029652506
1199 12.127746693546108
1299 11.034041716703364
1399 10.302715354832628
1499 9.813383949178863
1599 9.485750326102597
1699 9.266227238328907
1799 9.119032845040127
1899 9.02026065539507
1999 8.953928597941486
Result y = 0.006491503952927997 + 0.8470612893989192 x + -0.0011198924509721138 x^2 + -0.09195355682064425 x^3


### PyTorch: Tensors

In [2]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 6605.0751953125
199 4411.7978515625
299 2949.44775390625
399 1973.9442138671875
499 1322.8597412109375
599 888.06103515625
699 597.529541015625
799 403.2779235839844
899 273.3160705566406
999 186.308349609375
1099 128.01702880859375
1199 88.93572235107422
1299 62.71382522583008
1399 45.1061897277832
1499 33.273170471191406
1599 25.31415557861328
1699 19.956092834472656
1799 16.345714569091797
1899 13.910737991333008
1999 12.266877174377441
Result: y = -0.037339773029088974 + 0.8110913038253784 x + 0.006441730540245771 x^2 + -0.08683713525533676 x^3


## Autograd

### PyTorch: Tensors and autograd

In [5]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    loss.backward()

    # Update weights using gradient descent
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
    
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 tensor(3758.8455, grad_fn=<SumBackward0>)
199 tensor(2497.3796, grad_fn=<SumBackward0>)
299 tensor(1660.6471, grad_fn=<SumBackward0>)
399 tensor(1105.5250, grad_fn=<SumBackward0>)
499 tensor(737.1539, grad_fn=<SumBackward0>)
599 tensor(492.6509, grad_fn=<SumBackward0>)
699 tensor(330.3240, grad_fn=<SumBackward0>)
799 tensor(222.5258, grad_fn=<SumBackward0>)
899 tensor(150.9190, grad_fn=<SumBackward0>)
999 tensor(103.3390, grad_fn=<SumBackward0>)
1099 tensor(71.7139, grad_fn=<SumBackward0>)
1199 tensor(50.6865, grad_fn=<SumBackward0>)
1299 tensor(36.7007, grad_fn=<SumBackward0>)
1399 tensor(27.3948, grad_fn=<SumBackward0>)
1499 tensor(21.2004, grad_fn=<SumBackward0>)
1599 tensor(17.0755, grad_fn=<SumBackward0>)
1699 tensor(14.3275, grad_fn=<SumBackward0>)
1799 tensor(12.4959, grad_fn=<SumBackward0>)
1899 tensor(11.2745, grad_fn=<SumBackward0>)
1999 tensor(10.4596, grad_fn=<SumBackward0>)
Result: y = -0.017310447990894318 + 0.820694625377655 x + 0.002986340783536434 x^2 + -0.088203124

### PyTorch: Defining new autograd functions



In [7]:
import torch
import math

class LegendrePolynomial3(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)
    
    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)

In [9]:
dtype = torch.float
device = torch.device("cpu")

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # To apply our function, we use Function.apply method. We alias this as 'P3'.
    P3 = LegendrePolynomial3.apply

    # Forward pass: compute predicted y
    y_pred = a + b * P3(c + d * x)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    loss.backward()

    # Update weights using gradient descent
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
    
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 tensor(797.4432, grad_fn=<SumBackward0>)
199 tensor(648.5474, grad_fn=<SumBackward0>)
299 tensor(528.5093, grad_fn=<SumBackward0>)
399 tensor(420.1289, grad_fn=<SumBackward0>)
499 tensor(332.4771, grad_fn=<SumBackward0>)
599 tensor(283.4982, grad_fn=<SumBackward0>)
699 tensor(253.5729, grad_fn=<SumBackward0>)
799 tensor(231.1326, grad_fn=<SumBackward0>)
899 tensor(212.5706, grad_fn=<SumBackward0>)
999 tensor(196.3892, grad_fn=<SumBackward0>)
1099 tensor(181.8755, grad_fn=<SumBackward0>)
1199 tensor(168.6630, grad_fn=<SumBackward0>)
1299 tensor(156.5443, grad_fn=<SumBackward0>)
1399 tensor(145.3870, grad_fn=<SumBackward0>)
1499 tensor(135.0951, grad_fn=<SumBackward0>)
1599 tensor(125.5920, grad_fn=<SumBackward0>)
1699 tensor(116.8127, grad_fn=<SumBackward0>)
1799 tensor(108.6998, grad_fn=<SumBackward0>)
1899 tensor(101.2012, grad_fn=<SumBackward0>)
1999 tensor(94.2693, grad_fn=<SumBackward0>)
Result: y = 0.00034738899557851255 + -1.5705084800720215 * P3(4.448800154932542e-06 + 0.2500

## nn.Module

### PyTorch: nn

In [10]:
# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p) # 2000 x 3

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1) # torch.nn.Flatten(start_dim=0, end_dim=1) -> 1D tensor로 바뀜

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6

for t in range(2000):

    # Forward pass: compute predicted y
    y_pred = model(xx)

    # Compute and print loss
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backprop to compute gradients of a, b, c, d with respect to loss
    loss.backward()

    # Update weights using gradient descent
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 441.8584289550781
199 302.73828125
299 208.5313720703125
399 144.67039489746094
499 101.33369445800781
599 71.8927993774414
699 51.869876861572266
799 38.23670196533203
899 28.943798065185547
999 22.60218048095703
1099 18.26947593688965
1199 15.305959701538086
1299 13.276647567749023
1399 11.8854398727417
1499 10.930571556091309
1599 10.274443626403809
1699 9.8231201171875
1799 9.512300491333008
1899 9.298022270202637
1999 9.150129318237305
Result: y = -0.01600782759487629 + 0.8468155860900879 x + 0.0027616159059107304 x^2 + -0.09191860258579254 x^3


### PyTorch: optim

In [36]:
# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p) # 2000 x 3

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1) # torch.nn.Flatten(start_dim=0, end_dim=1) -> 1D tensor로 바뀜
)

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
for t in range(2000):

    # Forward pass: compute predicted y
    y_pred = model(xx)

    # Compute and print loss
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 2723.814697265625
199 2716.7783203125
299 2710.35888671875
399 2704.13427734375
499 2697.9677734375
599 2691.850341796875
699 2685.740966796875
799 2679.64013671875
899 2673.547119140625
999 2667.46728515625
1099 2661.4072265625
1199 2655.35546875
1299 2649.31201171875
1399 2643.277099609375
1499 2637.25
1599 2631.23095703125
1699 2625.220458984375
1799 2619.21826171875
1899 2613.22412109375
1999 2607.23828125
Result: y = -0.08636999875307083 + 0.29382213950157166 x + -0.07492367178201675 x^2 + 0.07046401500701904 x^3


### PyTorch: Custom nn Modules

In [15]:
import torch
import math

class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
    
    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x **3
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'

In [35]:
# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-8, momentum=0.9)
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = model(x)

    # Compute and print loss
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: {model.string()}')

99 8069.37060546875
199 7598.2109375
299 7265.4375
399 6966.8251953125
499 6683.89990234375
599 6413.07080078125
699 6153.33056640625
799 5904.1435546875
899 5665.064453125
999 5435.68359375
1099 5215.60400390625
1199 5004.44970703125
1299 4801.85791015625
1399 4607.4833984375
1499 4420.99072265625
1599 4242.060546875
1699 4070.38671875
1799 3905.6748046875
1899 3747.642822265625
1999 3596.0185546875
Result: y = 0.034619301557540894 + -0.987536609172821 x + -0.005972400773316622 x^2 + 0.1690019965171814 x^3


### PyTorch: Control Flow + Weight Sharing

In [22]:
import torch
import math
import random

class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(())) #weight sharing
    
    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'

In [33]:
# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynamicNet()
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-8, momentum=0.9)
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = model(x)

    # Compute and print loss
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: {model.string()}')


99 5301.6708984375
199 3124.14501953125
299 1017.3937377929688
399 683.411865234375
499 960.5345458984375
599 491.486083984375
699 452.1693420410156
799 443.3489685058594
899 463.5733337402344
999 411.8963317871094
1099 413.3352355957031
1199 386.67694091796875
1299 362.6190185546875
1399 348.4512023925781
1499 348.0978698730469
1599 327.8086242675781
1699 331.91448974609375
1799 305.2432556152344
1899 304.955322265625
1999 294.56170654296875
Result: y = -0.5461016893386841 + 0.7269793748855591 x + 0.09858264774084091 x^2 + -0.0732816532254219 x^3 + -0.008473627269268036 x^4 ? + -0.008473627269268036 x^5 ?
