In [2]:
import torch
import math
import glob
import os.path as osp
import random
import numpy as np
import json
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

dtype = torch.float
device = torch.device("cpu")

In [4]:
x = torch.linspace(-math.pi, math.pi, 1000, device=device, dtype=dtype)
y = torch.sin(x)

In [5]:
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

In [8]:
lr = 1e-6
for t in range(1000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    loss = (y_pred-y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)
    grad_y_pred = 2.0 * (y_pred-y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()
    a -= lr*grad_a
    b -= lr*grad_b
    c -= lr*grad_c
    d -= lr*grad_d

99 26.802072525024414
199 19.511363983154297
299 17.116107940673828
399 15.102049827575684
499 13.407673835754395
599 11.982234954833984
699 10.783051490783691
799 9.774195671081543
899 8.925474166870117
999 8.211463928222656


In [14]:
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)
lr = 1e-6
for t in range(1000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    loss = (y_pred-y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss)
    loss.backward()
    with torch.no_grad():
        a -= lr*grad_a
        b -= lr*grad_b
        c -= lr*grad_c
        d -= lr*grad_d
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

99 tensor(42348012., grad_fn=<SumBackward0>)
199 tensor(1.7590e+08, grad_fn=<SumBackward0>)
299 tensor(4.0070e+08, grad_fn=<SumBackward0>)
399 tensor(7.1677e+08, grad_fn=<SumBackward0>)
499 tensor(1.1241e+09, grad_fn=<SumBackward0>)
599 tensor(1.6227e+09, grad_fn=<SumBackward0>)
699 tensor(2.2126e+09, grad_fn=<SumBackward0>)
799 tensor(2.8937e+09, grad_fn=<SumBackward0>)
899 tensor(3.6661e+09, grad_fn=<SumBackward0>)
999 tensor(4.5298e+09, grad_fn=<SumBackward0>)


In [15]:
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction="sum")
for t in  range(1000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    model.zero_grad()
    loss.backward()
    with torch.no_grad():
        for param in model.parameters():
            param -= lr * param.grad
linear_layer = model[0]

99 184.34371948242188
199 153.2508544921875
299 127.59709167480469
399 106.39508819580078
499 88.86720275878906
599 74.37242889404297
699 62.38239669799805
799 52.46149826049805
899 44.250213623046875
999 37.4520378112793


In [18]:
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
for t in range(1000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
linear_layer = model[0]

99 37.31327819824219
199 37.27070617675781
299 37.23216247558594
399 37.19500732421875
499 37.15821838378906
599 37.12188720703125
699 37.08576965332031
799 37.049842834472656
899 37.01408386230469
999 36.97846221923828


In [21]:
class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
    def forward(self, x):
        return self.a+self.b*x+self.c*x**2 + self.d*x**3
model = Polynomial3()
criterion = torch.nn.MSELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
for t in range(1000):
    y_pred = model(x)
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

99 74695.265625
199 68656.8671875
299 63208.81640625
399 58249.265625
499 53711.16796875
599 49546.5625
699 45718.43359375
799 42196.30859375
899 38954.03125
999 35968.4765625


In [1]:
class BaseTransform():
    def __init__(self, resize, mean, std):
        self.base_transform = transform.Compose([
            transforms.Resize(resize),
            transforms.CenterCrop(resize),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
    def __call_(self, img):
        return self.base_transform(img)