In [2]:
import numpy as np
import torch
import torch.optim as optim

In [6]:
## Data

In [3]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

In [4]:
print(x_train)
print(x_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


In [5]:
print(y_train)
print(y_train.shape)

tensor([[2.],
        [4.],
        [6.]])
torch.Size([3, 1])


## Torch is NCHW

N: batch size
C: channel
H: height
W: width

## Weight Initialization

In [11]:
# Is True if gradients need to b e computed for this Tensor, False otherwise
W = torch.zeros(1, requires_grad = True)
print(W)

tensor([0.], requires_grad=True)


In [13]:
b = torch.zeros(1, requires_grad = True)
b

tensor([0.], requires_grad=True)

## Hypothesis
$$ H(x) = Wx + b $$

In [14]:
hypothesis = x_train * W - b; hypothesis

tensor([[0.],
        [0.],
        [0.]], grad_fn=<SubBackward0>)

## Cost
$$ cost(W, b) = \frac{1}{m} \sum^m_{i=1} \left( H(x^{(i)}) - y^{(i)} \right)^2 $$

In [16]:
y_train

tensor([[2.],
        [4.],
        [6.]])

In [17]:
hypothesis - y_train

tensor([[-2.],
        [-4.],
        [-6.]], grad_fn=<SubBackward0>)

In [18]:
(hypothesis - y_train) ** 2

tensor([[ 4.],
        [16.],
        [36.]], grad_fn=<PowBackward0>)

In [27]:
cost = torch.mean((hypothesis - y_train) ** 2)

## Gradient Descent

In [22]:
optimizer = optim.SGD([W, b], lr = 0.01); optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [28]:
# Clears the gradients of all optimized torch.Tensor s.
optimizer.zero_grad()

In [29]:
# gradient 계산
cost.backward()

In [30]:
# closure (callable) – A closure that reevaluates the model and returns
# the loss. Optional for most optimizers.
optimizer.step()

In [34]:
print(W, b)

tensor([0.1867], requires_grad=True) tensor([-0.0800], requires_grad=True)


In [35]:
hypothesis

tensor([[0.],
        [0.],
        [0.]], grad_fn=<SubBackward0>)

In [36]:
nb_epochs = 1000
for epoch in range(1, nb_epochs + 1):
    hypothesis  = x_train * W + b
    cost  = torch.mean((hypothesis - y_train) ** 2)
    optimizer.zero_grad() # 이걸로 gradient를 0으로 초기화
    cost.backward() # gradient 계산
    optimizer.step() # 경사하강
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch  100/1000 W: 1.792, b: 0.473 Cost: 0.032209
Epoch  200/1000 W: 1.837, b: 0.372 Cost: 0.019903
Epoch  300/1000 W: 1.872, b: 0.292 Cost: 0.012299
Epoch  400/1000 W: 1.899, b: 0.230 Cost: 0.007600
Epoch  500/1000 W: 1.921, b: 0.180 Cost: 0.004696
Epoch  600/1000 W: 1.938, b: 0.142 Cost: 0.002902
Epoch  700/1000 W: 1.951, b: 0.112 Cost: 0.001793
Epoch  800/1000 W: 1.961, b: 0.088 Cost: 0.001108
Epoch  900/1000 W: 1.970, b: 0.069 Cost: 0.000685
Epoch 1000/1000 W: 1.976, b: 0.054 Cost: 0.000423


## high-level

In [37]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [43]:
import torch.nn as nn
import torch.nn.functional as F

In [40]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)
        
    def forward(self, x):
        return self.linear(x)

In [41]:
model = LinearRegressionModel()

## Hypothesis

In [42]:
hypothesis = model(x_train); hypothesis

tensor([[-0.3106],
        [-0.5781],
        [-0.8456]], grad_fn=<AddmmBackward>)

## Cost

In [45]:
cost = F.mse_loss(hypothesis, y_train); cost

tensor(7.7175, grad_fn=<MseLossBackward>)

## Gradient Descent

In [47]:
optimizer = optim.SGD(model.parameters(), lr = 0.01)
optimizer.zero_grad()
cost.backward()
optimizer.step()

## Training with Full Code

In [69]:
# 이렇게 하는 건 별로인듯
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
# 모델 초기화
model = LinearRegressionModel()
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.mse_loss(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))


Epoch    0/1000 W: -0.662, b: 1.010 Cost: 9.028584
Epoch  100/1000 W: 0.495, b: 1.147 Cost: 0.189596
Epoch  200/1000 W: 0.603, b: 0.902 Cost: 0.117159
Epoch  300/1000 W: 0.688, b: 0.709 Cost: 0.072397
Epoch  400/1000 W: 0.755, b: 0.557 Cost: 0.044737
Epoch  500/1000 W: 0.807, b: 0.438 Cost: 0.027645
Epoch  600/1000 W: 0.849, b: 0.344 Cost: 0.017083
Epoch  700/1000 W: 0.881, b: 0.271 Cost: 0.010556
Epoch  800/1000 W: 0.906, b: 0.213 Cost: 0.006523
Epoch  900/1000 W: 0.926, b: 0.167 Cost: 0.004031
Epoch 1000/1000 W: 0.942, b: 0.131 Cost: 0.002491
