# **Import**

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7fd69d789850>

# **Data**

In [2]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
print(x_train.shape, y_train.shape)

torch.Size([3, 1]) torch.Size([3, 1])


### Weight Initialization

In [3]:
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
print(W)
print(b)

tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)


# **Hypothesis**

In [4]:
hypothesis = x_train * W + b
print(hypothesis) # 현재는 y = 0*x + 0에 대한 출력이므로 0, 0, 0

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


# **Cost (=loss)**

In [5]:
print(hypothesis - y_train)

tensor([[-1.],
        [-2.],
        [-3.]], grad_fn=<SubBackward0>)


In [6]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(4.6667, grad_fn=<MeanBackward0>)


# Stochastic Gradient Descent

gradient descent:            한 번의 step마다 모든 data 확인\
stochastic gradient descent: 한 번의 step마다 하나의 mini batch 확인

In [7]:
optimizer = optim.SGD([W, b], lr=0.01)
print(W)
print(b)

tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)


In [8]:
optimizer.zero_grad()
cost.backward()
optimizer.step()

In [9]:
print(W)
print(b)

tensor([0.0933], requires_grad=True)
tensor([0.0400], requires_grad=True)


In [10]:
hypothesis = x_train * W + b
print(hypothesis)   #  better than before hypothesis!

tensor([[0.1333],
        [0.2267],
        [0.3200]], grad_fn=<AddBackward0>)


In [11]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost) # 4.6667 -> 3.6927

tensor(3.6927, grad_fn=<MeanBackward0>)


# **Traning with Full Code**

In [12]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
# 모델 초기화
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 3000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypothesis = x_train * W + b
    
    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()     # W와 b에 대한 미분값(기울기) 계산
    optimizer.step()    # W와 b에 기울기*lr 값만큼 반영

    # 300번마다 로그 출력
    if epoch % 300 == 0:
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch    0/3000 W: 0.093, b: 0.040 Cost: 4.666667
Epoch  300/3000 W: 0.921, b: 0.179 Cost: 0.004598
Epoch  600/3000 W: 0.962, b: 0.087 Cost: 0.001085
Epoch  900/3000 W: 0.981, b: 0.042 Cost: 0.000256
Epoch 1200/3000 W: 0.991, b: 0.020 Cost: 0.000060
Epoch 1500/3000 W: 0.996, b: 0.010 Cost: 0.000014
Epoch 1800/3000 W: 0.998, b: 0.005 Cost: 0.000003
Epoch 2100/3000 W: 0.999, b: 0.002 Cost: 0.000001
Epoch 2400/3000 W: 0.999, b: 0.001 Cost: 0.000000
Epoch 2700/3000 W: 1.000, b: 0.001 Cost: 0.000000
Epoch 3000/3000 W: 1.000, b: 0.000 Cost: 0.000000


# **High-level Implementation with nn.Module**

In [13]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [14]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)   # (input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)

In [15]:
model = LinearRegressionModel()

In [16]:
hypothesis = model(x_train)
print(hypothesis)

tensor([[0.0739],
        [0.5891],
        [1.1044]], grad_fn=<AddmmBackward0>)


In [17]:
cost = F.mse_loss(hypothesis, y_train)
print(cost)

tensor(2.1471, grad_fn=<MseLossBackward0>)


In [18]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

optimizer.zero_grad()
cost.backward()
optimizer.step()

hypothesis = model(x_train)
print(hypothesis)

tensor([[0.1650],
        [0.7432],
        [1.3213]], grad_fn=<AddmmBackward0>)


# **Traning with Full Code**

In [19]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
# 모델 초기화
model = LinearRegressionModel()
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 3000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.mse_loss(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 300 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))

Epoch    0/3000 W: -0.101, b: 0.508 Cost: 4.630286
Epoch  300/3000 W: 0.822, b: 0.404 Cost: 0.023505
Epoch  600/3000 W: 0.914, b: 0.196 Cost: 0.005546
Epoch  900/3000 W: 0.958, b: 0.095 Cost: 0.001309
Epoch 1200/3000 W: 0.980, b: 0.046 Cost: 0.000309
Epoch 1500/3000 W: 0.990, b: 0.022 Cost: 0.000073
Epoch 1800/3000 W: 0.995, b: 0.011 Cost: 0.000017
Epoch 2100/3000 W: 0.998, b: 0.005 Cost: 0.000004
Epoch 2400/3000 W: 0.999, b: 0.003 Cost: 0.000001
Epoch 2700/3000 W: 0.999, b: 0.001 Cost: 0.000000
Epoch 3000/3000 W: 1.000, b: 0.001 Cost: 0.000000
