# lab03 Deeper Look at GD

In [7]:
import torch

## Hypothesis function
$$
H(x) = \mathbf{W} x + b
$$
```py
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
hypothesis = x_train * W + b
```

## Simpler Hypothesis function
$$
H(x) = \mathbf{W} x
$$
```py
W = torch.zeros(1, requires_grad=True)
# b = torch.zeros(1, requires_grad=True)
hypothesis = x_train * W
```


In [8]:
W = torch.zeros(1, requires_grad=True)
# b = torch.zeros(1, requires_grad=True)

## Dummy Data

In [9]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

## Cost function
Mean Squared Error (MSE)
$$
cost(\mathbf{W}, b) = \frac{1}{m} \sum_{i=1}^{m} (H(x^{(i)} - y^{(i)}))^2
$$

## Gradient Descent
$$
\frac{\partial cost}{\partial W} = \nabla W
$$

$$
cost(\mathbf{W}, b) = \frac{1}{m} \sum_{i=1}^{m} (H(x^{(i)} - y^{(i)}))^2
$$

$$
\nabla W = \frac{\partial cost}{\partial W} = \frac{2}{m} \sum_{i=1}^{m} (W x^{(i)} - y^{(i)})x^{(i)}
$$

$$
W := W - \alpha \nabla W
$$

In [11]:
gradient = 2 * torch.mean((W * x_train - y_train) * x_train)
lr = 0.1
W = W - lr * gradient

## Full code

In [13]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# 모델 초기화
W = torch.zeros(1)

# learning rate 설정
lr = 0.1

nb_epochs = 10
for epoch in range(nb_epochs+1):
    # H(x) 계산
    hypothesis = x_train * W

    # cost gradient 계산
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = torch.sum((W * x_train - y_train) * x_train)

    print(f'Epoch {epoch:4d}/{nb_epochs} W: {W.item():.3f}, Cost: {cost.item():.6f}')

    # 개선
    W = W - lr * gradient

Epoch    0/10 W: 0.000, Cost: 4.666667
Epoch    1/10 W: 1.400, Cost: 0.746666
Epoch    2/10 W: 0.840, Cost: 0.119467
Epoch    3/10 W: 1.064, Cost: 0.019115
Epoch    4/10 W: 0.974, Cost: 0.003058
Epoch    5/10 W: 1.010, Cost: 0.000489
Epoch    6/10 W: 0.996, Cost: 0.000078
Epoch    7/10 W: 1.002, Cost: 0.000013
Epoch    8/10 W: 0.999, Cost: 0.000002
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000


## Full code with torch.optim

In [15]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# 모델 초기화
W = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = torch.optim.SGD([W], lr=0.15)

nb_epochs = 10
for epoch in range(nb_epochs+1):
    # H(x) 계산
    hypothesis = x_train * W

    # cost gradient 계산
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = torch.sum((W * x_train - y_train) * x_train)

    print(f'Epoch {epoch:4d}/{nb_epochs} W: {W.item():.3f}, Cost: {cost.item():.6f}')

    # 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

Epoch    0/10 W: 0.000, Cost: 4.666667
Epoch    1/10 W: 1.400, Cost: 0.746667
Epoch    2/10 W: 0.840, Cost: 0.119467
Epoch    3/10 W: 1.064, Cost: 0.019115
Epoch    4/10 W: 0.974, Cost: 0.003058
Epoch    5/10 W: 1.010, Cost: 0.000489
Epoch    6/10 W: 0.996, Cost: 0.000078
Epoch    7/10 W: 1.002, Cost: 0.000013
Epoch    8/10 W: 0.999, Cost: 0.000002
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000
