https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html

In [None]:
import numpy as np
import math

# generate random input and output
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# randomly initialize weight
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
  # predict y
  # y = a + bx + cx^2 + dx^3
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  # caculate loss and print
  loss = np.square(y_pred - y).sum()
  if t % 100 == 99:
    print(t, loss)

  # caculating gradient of a, b, c, d by loss and backpropagation
  grad_y_pred = 2.0 * (y_pred - y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x ** 2).sum()
  grad_d = (grad_y_pred * x ** 3).sum()

  # renew weight
  a -= learning_rate * grad_a
  b -= learning_rate * grad_b
  c -= learning_rate * grad_c
  d -= learning_rate * grad_d

print(f'Result : y = {a} + {b} x + {c} x^2 + {d} x^3')


99 2925.306170856854
199 1998.8177004175175
299 1368.2065699536292
399 938.4974623665523
499 645.3527262284896
599 445.1421735434525
699 308.24618225999734
799 214.53447807516284
899 150.31074162200264
999 106.24571075189287
1099 75.9775249230035
1199 55.16290732374333
1299 40.83327382993528
1399 30.957294221960858
1499 24.143380418804668
1599 19.43711487691226
1699 16.18316604241915
1799 13.931054447857244
1899 12.370770304251756
1999 11.28873562040335
Result : y = 0.04560869543490431 + 0.8326347986221017 x + -0.007868258894488906 x^2 + -0.08990151413462892 x^3


In [None]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") if you want gpu

# generate input, output tensor
# default : requires_grad = False

x = torch.linspace(-math.pi, math.pi, 2000, device = device, dtype = dtype)
y = torch.sin(x)

a = torch.randn((), device = device, dtype = dtype , requires_grad = True)
b = torch.randn((), device = device, dtype = dtype , requires_grad = True)
c = torch.randn((), device = device, dtype = dtype , requires_grad = True)
d = torch.randn((), device = device, dtype = dtype , requires_grad = True)

learning_rate = 1e-6
for t in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss = (y_pred - y).pow(2).sum()
  if t % 100 == 99:
    print(t, loss.item())

  loss.backward()

  with torch.no_grad():
    a -= learning_rate * a.grad
    b -= learning_rate * b.grad
    c -= learning_rate * c.grad
    d -= learning_rate * d.grad

    # after renew weight, make 0 the degree of gradient
    a.grad = None
    b.grad = None
    c.grad = None
    d.grad = None
    
print(f'Result : y = {a.item()}  + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')


99 4224.29296875
199 2796.306640625
299 1852.0491943359375
399 1227.6573486328125
499 814.7772827148438
599 541.7596435546875
699 361.2262268066406
799 241.84835815429688
899 162.9095001220703
999 110.71099853515625
1099 76.19458770751953
1199 53.37056350708008
1299 38.27825927734375
1399 28.298442840576172
1499 21.699230194091797
1599 17.335481643676758
1699 14.44990062713623
1799 12.541831970214844
1899 11.280084609985352
1999 10.44577407836914
Result : y = 0.00036133499816060066  + 0.8175127506256104 x + -6.233552994672209e-05 x^2 + -0.08775053173303604 x^3


autograd Function

In [4]:
import torch
import numpy
import math


class LegendrePolyomial3(torch.autograd.Function):
  '''
  inherite torch.autograd.Function and implement autograd Function,
  backward and foward step.
  '''

  @staticmethod
  def forward(ctx, input):
    ctx.save_for_backward(input)  
    # save any parameter for using backpropagation steps 
    return 0.5 * (5 * input ** 3 - 3 * input)     

  @staticmethod
  def backward(ctx, grad_output):

    input, = ctx.saved_tensors
    return grad_output * 1.5 * (5 * input ** 2 - 1)


dtype = torch.float
device = torch.device('cpu')
# device = torch.device("cuda:0")

x = torch.linspace(-math.pi, math.pi, 2000, device = device, dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device = device, dtype = dtype, requires_grad=True)
b = torch.full((), -1.0, device = device, dtype = dtype, requires_grad=True)
c = torch.full((), 0.0, device = device, dtype = dtype, requires_grad=True)
d = torch.full((), 0.3, device = device, dtype = dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
  P3 = LegendrePolyomial3.apply

  y_pred = a + b * P3(c + d * x)

  loss = (y_pred - y).pow(2).sum()
  if t % 100 == 99:
    print(t, loss.item())

  loss.backward()

  with torch.no_grad():
    a -= learning_rate * a.grad
    b -= learning_rate * b.grad
    c -= learning_rate * c.grad
    d -= learning_rate * d.grad

  a.grad = None
  b.grad = None
  c.grad = None
  d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03519439697266
499 50.97850799560547
599 37.403133392333984
699 28.206867218017578
799 21.97318458557129
899 17.7457275390625
999 14.877889633178711
1099 12.93176555633545
1199 11.610918998718262
1299 10.71425724029541
1399 10.10548210144043
1499 9.692106246948242
1599 9.411375045776367
1699 9.220745086669922
1799 9.091285705566406
1899 9.003360748291016
1999 8.943639755249023
Result: y = -5.394172664097141e-09 + -2.208526849746704 * P3(1.367587154632588e-09 + 0.2554861009120941 x)


nn module

In [7]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Since y is linear function, we can think linear step neural network
# Ready tensor for (x, x^2, x^3)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# shape of xx is (2000, 3)

# Define the model as sequence of layers using nn pakage
# nn.Sequential is a module that contains other modules, which apply them sequentially to produce output.

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
    # flatten the output of linear layer for matching 'y' shape(1D tensor)
)

loss_fn = torch.nn.MSELoss(reduction = 'sum') 

learning_rate = 1e-6
for t in range(2000):
  y_pred = model(xx)

  loss = loss_fn(y_pred, y)
  if t % 100 == 99:
    print(t, loss.item())

  # For implement backpropagation, make gradient 0.
  model.zero_grad()

  # calculate gradient of loss
  loss.backward()

  with torch.no_grad():
    for param in model.parameters():
      param -= learning_rate * param.grad

linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


99 856.858642578125
199 576.7860717773438
299 389.48284912109375
399 264.13873291015625
499 180.2006072998047
599 123.95048522949219
699 86.22711181640625
799 60.908931732177734
899 43.90278244018555
999 32.47048568725586
1099 24.778440475463867
1199 19.598384857177734
1299 16.10674476623535
1399 13.751002311706543
1499 12.160022735595703
1599 11.08452033996582
1699 10.35669994354248
1799 9.863639831542969
1899 9.529302597045898
1999 9.30233097076416
Result: y = -0.015610501170158386 + 0.8408371210098267 x + 0.002693071262910962 x^2 + -0.09106822311878204 x^3


optim

In [9]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

# First parameter of RMSprop tells you which tensor should be updated.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)

for t in range(2000):
  y_pred = model(xx)

  loss = loss_fn(y_pred, y)
  if t % 100 == 99:
    print(t, loss.item())

  optimizer.zero_grad()

  loss.backward()

  optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 5490.78857421875
199 1112.47705078125
299 178.72592163085938
399 78.5672378540039
499 67.14202880859375
599 56.2918586730957
699 43.62319564819336
799 30.328166961669922
899 19.13492202758789
999 12.201615333557129
1099 9.418639183044434
1199 8.854554176330566
1299 8.817550659179688
1399 8.817341804504395
1499 8.839642524719238
1599 8.91847038269043
1699 8.887210845947266
1799 8.873652458190918
1899 8.9132661819458
1999 8.942415237426758
Result: y = -0.0005039672832936049 + 0.8561848402023315 x + -0.0005039726966060698 x^2 + -0.09388662129640579 x^3


Custom nn.Module

In [11]:
import torch
import math

class Polynomial3(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.a = torch.nn.Parameter(torch.randn(()))
    self.b = torch.nn.Parameter(torch.randn(()))
    self.c = torch.nn.Parameter(torch.randn(()))
    self.d = torch.nn.Parameter(torch.randn(()))
  
  def forward(self, x):
    return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

  def string(self):
    return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction = 'sum')
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-6)
for t in range(2000):
  y_pred = model(x)

  loss = criterion(y_pred, y)
  if t % 100 == 99:
    print(t, loss.item())

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

print(f'Result: {model.string()}')

99 97.65093231201172
199 68.128173828125
299 48.440940856933594
399 35.30442428588867
499 26.534313201904297
599 20.675945281982422
699 16.760162353515625
799 14.141233444213867
899 12.388460159301758
999 11.214608192443848
1099 10.427906036376953
1199 9.900266647338867
1299 9.546106338500977
1399 9.308185577392578
1499 9.14823055267334
1599 9.040600776672363
1699 8.968099594116211
1799 8.919228553771973
1899 8.88625431060791
1999 8.863980293273926
Result: y = -0.0043967105448246 + 0.8514556288719177 x + 0.0007585037383250892 x^2 + -0.09257861226797104 x^3


Control Flow + Weight Sharing

In [14]:
import torch
import math
import random

class DynamicNet(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.a = torch.nn.Parameter(torch.randn(()))
    self.b = torch.nn.Parameter(torch.randn(()))
    self.c = torch.nn.Parameter(torch.randn(()))
    self.d = torch.nn.Parameter(torch.randn(()))
    self.e = torch.nn.Parameter(torch.randn(()))
  
  def forward(self, x):
    y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
    for exp in range(4, random.randint(4, 6)):
      y = y + self.e * x ** exp
    return y

  def string(self):
    return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'



x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)


model = DynamicNet()

criterion = torch.nn.MSELoss(reduction = 'sum')
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-8, momentum = 0.9)
# Since training is so hard, use momentum.

for t in range(30000):
  y_pred = model(x)

  loss = criterion(y_pred, y)
  if t % 2000 == 1999:
    print(t, loss.item())

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

print(f"Result: {model.string()}")


1999 1668.876953125
3999 743.2471313476562
5999 337.8854675292969
7999 158.88058471679688
9999 77.25334167480469
11999 39.827362060546875
13999 573.9304809570312
15999 15.176431655883789
17999 11.811789512634277
19999 10.145736694335938
21999 9.276951789855957
23999 8.9099760055542
25999 8.955068588256836
27999 8.888725280761719
29999 8.633642196655273
Result: y = 0.005159955006092787 + 0.8541369438171387 x + -0.0014319702750071883 x^2 + -0.09319084882736206 x^3 + 0.00011997557885479182 x^4 ? + 0.00011997557885479182 x^5 ?
