In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [2]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

In [3]:
w = torch.tensor(1.0, requires_grad = True)
y_hat = w * x
loss = (y - y_hat)**2
print(loss)

tensor(1., grad_fn=<PowBackward0>)


In [4]:
# backward step
loss.backward()
print(w.grad)

tensor(-2.)


In [5]:
X = np.array([1., 2., 3., 4.])

# we take the true value for weight equals 2, so:
Y = np.array([2., 4., 6., 8.])

In [6]:
# start value for w
w = 0.0

# model prediction
def forward(x):
    return w * x

# loss
def loss(y, y_hat):
    return ((y - y_hat)**2).mean()


# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2*(w*x - y) * x

def gradient(x, y, y_hat):
    return np.dot(2*x, y_hat-y).mean()


print(f'prediction before training: f(5) = {forward(5):.3f}')

prediction before training: f(5) = 0.000


In [7]:
# Trainig
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction
    y_pred = forward(X)
    
    #loss
    l = loss(Y, y_pred)
    
    # gradient  
    dw = gradient(X, Y, y_pred)
    
    # update
    w -= learning_rate * dw 
    
    if epoch % 2 == 0:
        print(f'epoch: {epoch + 1}; weight = {w:.3f}, loss = {l:.8f}')
print()        
print(f'prediction after training: f(5) = {forward(5):.3f}')

epoch: 1; weight = 1.200, loss = 30.00000000
epoch: 3; weight = 1.872, loss = 0.76800000
epoch: 5; weight = 1.980, loss = 0.01966080
epoch: 7; weight = 1.997, loss = 0.00050332
epoch: 9; weight = 1.999, loss = 0.00001288
epoch: 11; weight = 2.000, loss = 0.00000033
epoch: 13; weight = 2.000, loss = 0.00000001
epoch: 15; weight = 2.000, loss = 0.00000000
epoch: 17; weight = 2.000, loss = 0.00000000
epoch: 19; weight = 2.000, loss = 0.00000000

prediction after training: f(5) = 10.000


### With torch

In [8]:
X = torch.tensor([1., 2., 3., 4.])

# we take the true value for weight equals 2, so:
Y = torch.tensor([2., 4., 6., 8.])

In [9]:
# start value for w
w = torch.tensor(0.0, requires_grad = True)

# Trainig
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction
    y_pred = forward(X)
    
    #loss
    l = loss(Y, y_pred)
    
    # gradient  
    l.backward()
    
    # update
    with torch.no_grad():
        w -= learning_rate * w.grad
        
    # zero grad
    w.grad.zero_()
    
    if epoch % 10 == 0:
        print(f'epoch: {epoch}; weight = {w:.3f}, loss = {l:.8f}')
print()        
print(f'prediction after training: f(5) = {forward(5):.3f}')

epoch: 0; weight = 0.300, loss = 30.00000000
epoch: 10; weight = 1.665, loss = 1.16278565
epoch: 20; weight = 1.934, loss = 0.04506890
epoch: 30; weight = 1.987, loss = 0.00174685
epoch: 40; weight = 1.997, loss = 0.00006770
epoch: 50; weight = 1.999, loss = 0.00000262
epoch: 60; weight = 2.000, loss = 0.00000010
epoch: 70; weight = 2.000, loss = 0.00000000
epoch: 80; weight = 2.000, loss = 0.00000000
epoch: 90; weight = 2.000, loss = 0.00000000

prediction after training: f(5) = 10.000


### Training Pipeline: Model, Loss, and Optimizer

In [12]:
X = torch.tensor([1., 2., 3., 4.])
Y = torch.tensor([2., 4., 6., 8.])
w = torch.tensor(0.0, requires_grad = True)

In [13]:
def forward(x):
    return w * x
print(f'prediction before training: f(5) = {forward(5):.3f}')

prediction before training: f(5) = 0.000


In [17]:
learning_rate = 0.01
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w], lr = learning_rate)

In [18]:
n_iters = 100

for epoch in range(n_iters):
    # prediction
    y_pred = forward(X)
    
    #loss
    l = loss(Y, y_pred)
    
    # gradient  
    l.backward()
    
    # update
    optimizer.step()
        
    # zero grad
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        print(f'epoch: {epoch}; weight = {w:.3f}, loss = {l:.8f}')
print()        
print(f'prediction after training: f(5) = {forward(5):.3f}')

epoch: 0; weight = 0.300, loss = 30.00000000
epoch: 10; weight = 1.665, loss = 1.16278565
epoch: 20; weight = 1.934, loss = 0.04506890
epoch: 30; weight = 1.987, loss = 0.00174685
epoch: 40; weight = 1.997, loss = 0.00006770
epoch: 50; weight = 1.999, loss = 0.00000262
epoch: 60; weight = 2.000, loss = 0.00000010
epoch: 70; weight = 2.000, loss = 0.00000000
epoch: 80; weight = 2.000, loss = 0.00000000
epoch: 90; weight = 2.000, loss = 0.00000000

prediction after training: f(5) = 10.000


In [32]:
X = torch.tensor([[1.], 
                  [2.],
                  [3.], 
                  [4.]])
Y = torch.tensor([[2.], 
                  [4.],
                  [6.], 
                  [8.]])
n_samples, n_features = X.shape
print(f"n_samples: {n_samples}")
print(f"n_features: {n_features}")

n_samples: 4
n_features: 1


In [33]:
input_size = n_features
output_size = n_features
model = torch.nn.Linear(input_size, output_size)

X_test = torch.tensor([[5.]])
print(f'prediction before training: f(5) = {model(X_test).item():.3f}')

prediction before training: f(5) = -1.351


In [34]:
learning_rate = 0.1
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [35]:
n_iters = 200

for epoch in range(n_iters):
    # prediction
    y_pred = model(X)
    
    #loss
    l = loss(Y, y_pred)
    
    # gradient  
    l.backward()
    
    # update
    optimizer.step()
        
    # zero grad
    optimizer.zero_grad()
    
    if epoch % 20 == 0:
        [w, b] = model.parameters()
        print(f'epoch: {epoch}; weight = {w[0][0].item():.3f}, loss = {l:.8f}')
print()        
print(f'prediction after num_epochs = {n_iters}: f(5) = {model(X_test).item():.3f}')

epoch: 0; weight = 3.028, loss = 37.46440887
epoch: 20; weight = 1.830, loss = 0.04450297
epoch: 40; weight = 1.907, loss = 0.01319198
epoch: 60; weight = 1.950, loss = 0.00391085
epoch: 80; weight = 1.973, loss = 0.00115939
epoch: 100; weight = 1.985, loss = 0.00034371
epoch: 120; weight = 1.992, loss = 0.00010190
epoch: 140; weight = 1.996, loss = 0.00003021
epoch: 160; weight = 1.998, loss = 0.00000896
epoch: 180; weight = 1.999, loss = 0.00000265

prediction after num_epochs = 200: f(5) = 9.998


In [36]:
# the same with the new class:

class Linear_Regression(torch.nn.Module):
    
    def __init__(self, input_size, output_size):
        super(Linear_Regression, self).__init__()
        self.lin = torch.nn.Linear(input_size, output_size)
        
    def forward(self, x):
        return self.lin(x)

In [37]:
X = torch.tensor([[1.], 
                  [2.],
                  [3.], 
                  [4.]])
Y = torch.tensor([[2.], 
                  [4.],
                  [6.], 
                  [8.]])
n_samples, n_features = X.shape
print(f"n_samples: {n_samples}")
print(f"n_features: {n_features}")

n_samples: 4
n_features: 1


In [38]:
model = Linear_Regression(n_features, n_features)

learning_rate = 0.1
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

n_iters = 200

for epoch in range(n_iters):
    # prediction
    y_pred = model(X)
    
    #loss
    l = loss(Y, y_pred)
    
    # gradient  
    l.backward()
    
    # update
    optimizer.step()
        
    # zero grad
    optimizer.zero_grad()
    
    if epoch % 20 == 0:
        [w, b] = model.parameters()
        print(f'epoch: {epoch}; weight = {w[0][0].item():.3f}, loss = {l:.8f}')
print()        
print(f'prediction after num_epochs = {n_iters}: f(5) = {model(X_test).item():.3f}')

epoch: 0; weight = 3.288, loss = 40.66133118
epoch: 20; weight = 1.940, loss = 0.00563611
epoch: 40; weight = 1.967, loss = 0.00166953
epoch: 60; weight = 1.982, loss = 0.00049494
epoch: 80; weight = 1.990, loss = 0.00014673
epoch: 100; weight = 1.995, loss = 0.00004350
epoch: 120; weight = 1.997, loss = 0.00001290
epoch: 140; weight = 1.998, loss = 0.00000382
epoch: 160; weight = 1.999, loss = 0.00000113
epoch: 180; weight = 2.000, loss = 0.00000034

prediction after num_epochs = 200: f(5) = 9.999
