In [2]:
import torch 
import numpy as np
import pandas as pd 
import torch.nn as nn 
import torch.optim as optim 
import torch.functional as F 

In [9]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

w = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

hypothesis = x_train * w  + b 

cost = torch.mean((hypothesis - y_train) ** 2)

In [10]:
optimizer = optim.SGD([w, b], lr = 0.01)


In [12]:
nb_epochs = 1000 

for epoch in range(nb_epochs + 1):

    # hypothesis
    hypothesis = x_train * w + b

    # cost 
    cost = torch.mean((hypothesis - y_train) ** 2)

    #cost 
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print("Epoch {:4d}/{} W{:.3f}m b:{:.3f} Cost : {:.6f}".format(epoch, nb_epochs, w.item(), b.item(), cost.item()))


Epoch    0/1000 W0.353m b:0.151 Cost : 14.770963
Epoch  100/1000 W1.746m b:0.577 Cost : 0.047939
Epoch  200/1000 W1.801m b:0.453 Cost : 0.029624
Epoch  300/1000 W1.843m b:0.356 Cost : 0.018306
Epoch  400/1000 W1.877m b:0.280 Cost : 0.011312
Epoch  500/1000 W1.903m b:0.220 Cost : 0.006990
Epoch  600/1000 W1.924m b:0.173 Cost : 0.004319
Epoch  700/1000 W1.940m b:0.136 Cost : 0.002669
Epoch  800/1000 W1.953m b:0.107 Cost : 0.001649
Epoch  900/1000 W1.963m b:0.084 Cost : 0.001019
Epoch 1000/1000 W1.971m b:0.066 Cost : 0.000630


#### Deeper Look at Gradient Descent 
- Hypothesis function 
$$Hypothesis (Linear Regression) \\\\
 H(x) = Wx + b \\\\
 W : Weight \\\\ 
 b : Bias $$
 
- Data
    - Create dummy data 
- Cost function
    - Mean Squared Error(MSE) 

$$ cost(W) = \frac{1}{m}\sum_\text{i = 1}^m (H(x^\text{(i)}) - y^\text{(i)}) ^2$$
- Gradient descent 
$$ \nabla{W}=\frac{\partial{cost}}{\partial{W}}= \frac{2}{m}\sum_\text{i=1}^m(Wx^\text{(i)} - y^\text{(i)})x^\text{(i)}$$
$$W : W - aâˆ‡W \\\\ 
a : Learning Rate\\\\
W : Gradient $$ 


In [33]:
# Create dummy data 
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
w = torch.zeros(1)
lr = 0.1
nb_epochs = 10 

for epoch in range(nb_epochs + 1):
    # Hypothesis function 
    hypothesis = x_train * w      
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = 2 * torch.mean((w * x_train - y_train) * x_train)
    print("Epoch {:4d}/{} W: {:.3f}, Cost {:.6f}".format(epoch, nb_epochs, w.item(), cost.item()))
    # Gradient Descent      
    w -= lr * gradient


Epoch    0/10 W: 0.000, Cost 4.666667
Epoch    1/10 W: 0.933, Cost 0.020741
Epoch    2/10 W: 0.996, Cost 0.000092
Epoch    3/10 W: 1.000, Cost 0.000000
Epoch    4/10 W: 1.000, Cost 0.000000
Epoch    5/10 W: 1.000, Cost 0.000000
Epoch    6/10 W: 1.000, Cost 0.000000
Epoch    7/10 W: 1.000, Cost 0.000000
Epoch    8/10 W: 1.000, Cost 0.000000
Epoch    9/10 W: 1.000, Cost 0.000000
Epoch   10/10 W: 1.000, Cost 0.000000


In [34]:
# Create dummy data 
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
w = torch.zeros(1, requires_grad=True)
optimizer = optim.SGD([w], lr=0.15)
#lr = 0.1
nb_epochs = 10 

for epoch in range(nb_epochs + 1):
    # Hypothesis function 
    hypothesis = x_train * w      
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = 2 * torch.mean((w * x_train - y_train) * x_train)
    print("Epoch {:4d}/{} W: {:.3f}, Cost {:.6f}".format(epoch, nb_epochs, w.item(), cost.item()))
    # Gradient Descent      
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

Epoch    0/10 W: 0.000, Cost 4.666667
Epoch    1/10 W: 1.400, Cost 0.746667
Epoch    2/10 W: 0.840, Cost 0.119467
Epoch    3/10 W: 1.064, Cost 0.019115
Epoch    4/10 W: 0.974, Cost 0.003058
Epoch    5/10 W: 1.010, Cost 0.000489
Epoch    6/10 W: 0.996, Cost 0.000078
Epoch    7/10 W: 1.002, Cost 0.000013
Epoch    8/10 W: 0.999, Cost 0.000002
Epoch    9/10 W: 1.000, Cost 0.000000
Epoch   10/10 W: 1.000, Cost 0.000000
