# PyTorch Tutorial 06 - Training Pipeline: Model, Loss, and Optimizer

In [2]:
import torch
import torch.nn as nn

In [None]:
# Steps:
# 1: Design our model (input_size, output_size, forward_pass)
# 2: Construct loss and optimizer
# 3: Training loop: we repeat in several epoch
# - forward pass: compute prediction
# - backward pass: calculate gradients
# - update our weights


In [8]:
# we use a linear regression as:
# f = w * x
# let: w = 2

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) # lets choose an initial weights for the start

# model prediction
def forward(x):
    return w * x

# we dont need to define the loss function manually
# # calculate loss
# def loss(y, y_predicted):
#     return ((y_predicted - y)**2).mean()



In [9]:
# Lets start our training

learning_rate = 0.01
n_iters = 80
loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

print(f"Prediction before training: f(5) = {forward(5):.3f}")
print("******************\n")

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)

    l.backward()  # this calculate dl/dw

    ## we dont need to manually update the weights anymore. Instead we use optimizer
    # # update weights
    # with torch.no_grad():
    #     w -= learning_rate * w.grad  # instead of dw, we use w.grad to denote gradients of the given epoch
    optimizer.step()

    # Instead we are using optimizer to clear the memory
    # we must clear our memory of w to prevent accumulation of the w values
    # w.grad.zero_()
    optimizer.zero_grad()

    if epoch%10 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {forward(5):.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {forward(5):.3f}")




Prediction before training: f(5) = 0.000
******************

epoch 1: w = 0.300, loss = 30.00000000
Prediction during training: f(5) = 1.500


epoch 11: w = 1.665, loss = 1.16278565
Prediction during training: f(5) = 8.327


epoch 21: w = 1.934, loss = 0.04506890
Prediction during training: f(5) = 9.671


epoch 31: w = 1.987, loss = 0.00174685
Prediction during training: f(5) = 9.935


epoch 41: w = 1.997, loss = 0.00006770
Prediction during training: f(5) = 9.987


epoch 51: w = 1.999, loss = 0.00000262
Prediction during training: f(5) = 9.997


epoch 61: w = 2.000, loss = 0.00000010
Prediction during training: f(5) = 10.000


epoch 71: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


******************

Prediction after training: f(5) = 10.000


# Implementation of Pytorch Model

In [16]:
# we use a linear regression as:
# f = w * x
# let: w = 2

# we need to change the shape of our inputs 
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape

print("n samples", n_samples)
print("n features", n_features)

## In pytorch model, we dont even use forward pass and weights variable. Instead we use nn
# w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) # lets choose an initial weights for the start

# # model prediction
# def forward(x):
#     return w * x

input_size = n_features
output_size = n_features

model = nn.Linear(input_size, output_size)


n samples 4
n features 1


In [17]:
# Lets start our training

learning_rate = 0.01
n_iters = 100
loss = nn.MSELoss()
# instead of w, we point to model.parameters()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# as it is pytorch model, we cant just use numbers instead we need to convert them into tensors
# 5 -> torch.tensor(5)
x_test = torch.tensor([5], dtype=torch.float32)

# we replace 5 with X_test variable
print(f"Prediction before training: f(5) = {model(x_test).item():.3f}")
print("******************\n")

for epoch in range(n_iters):
    # # prediction = forward pass
    # y_pred = forward(X)
    y_pred = model(X)
    
    # Loss
    l = loss(Y, y_pred)

    l.backward()  # this calculate dl/dw

    ## we dont need to manually update the weights anymore. Instead we use optimizer
    # # update weights
    # with torch.no_grad():
    #     w -= learning_rate * w.grad  # instead of dw, we use w.grad to denote gradients of the given epoch
    optimizer.step()

    # Instead we are using optimizer to clear the memory
    # we must clear our memory of w to prevent accumulation of the w values
    # w.grad.zero_()
    optimizer.zero_grad()

    if epoch%10 == 0:
        [w, b] = model.parameters() # here, w = weights, b = bias

        print(f"epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}")
        # we replace 5 with X_test variable
        print(f"Prediction during training: f(5) = {model(x_test).item():.3f}")
        print("\n")

print("******************\n")
# we replace 5 with X_test variable
print(f"Prediction after training: f(5) = {model(x_test).item():.3f}")




Prediction before training: f(5) = 0.793
******************

epoch 1: w = 0.343, loss = 23.94909668
Prediction during training: f(5) = 2.221


epoch 11: w = 1.470, loss = 0.76037621
Prediction during training: f(5) = 8.206


epoch 21: w = 1.659, loss = 0.15223551
Prediction during training: f(5) = 9.185


epoch 31: w = 1.697, loss = 0.12878561
Prediction during training: f(5) = 9.358


epoch 41: w = 1.711, loss = 0.12091216
Prediction during training: f(5) = 9.401


epoch 51: w = 1.720, loss = 0.11386467
Prediction during training: f(5) = 9.422


epoch 61: w = 1.728, loss = 0.10723686
Prediction during training: f(5) = 9.440


epoch 71: w = 1.736, loss = 0.10099515
Prediction during training: f(5) = 9.457


epoch 81: w = 1.744, loss = 0.09511670
Prediction during training: f(5) = 9.473


epoch 91: w = 1.752, loss = 0.08958043
Prediction during training: f(5) = 9.488


******************

Prediction after training: f(5) = 9.502


## Another example by creating Class

In [18]:
# we use a linear regression as:
# f = w * x
# let: w = 2

# we need to change the shape of our inputs 
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape

print("n samples", n_samples)
print("n features", n_features)


input_size = n_features
output_size = n_features

class LinearRegression(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)
    
# Now we refer to our class, instead of using nn.Linear class from pytorch
# model = nn.Linear(input_size, output_size)
model = LinearRegression(input_size, output_size)



n samples 4
n features 1


In [19]:
# Lets start our training

learning_rate = 0.01
n_iters = 100
loss = nn.MSELoss()
# instead of w, we point to model.parameters()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# as it is pytorch model, we cant just use numbers instead we need to convert them into tensors
# 5 -> torch.tensor(5)
x_test = torch.tensor([5], dtype=torch.float32)

# we replace 5 with X_test variable
print(f"Prediction before training: f(5) = {model(x_test).item():.3f}")
print("******************\n")

for epoch in range(n_iters):
    y_pred = model(X) # forward pass
    l = loss(Y, y_pred)  # loss computation
    l.backward() # backward pass
    optimizer.step() # weights update
    optimizer.zero_grad() # clearing the memory of the weight

    if epoch%10 == 0:
        [w, b] = model.parameters() # here, w = weights, b = bias

        print(f"epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {model(x_test).item():.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {model(x_test).item():.3f}")




Prediction before training: f(5) = -3.390
******************

epoch 1: w = -0.384, loss = 51.22496033
Prediction during training: f(5) = -1.301


epoch 11: w = 1.263, loss = 1.57860565
Prediction during training: f(5) = 7.454


epoch 21: w = 1.539, loss = 0.27939278
Prediction during training: f(5) = 8.884


epoch 31: w = 1.593, loss = 0.23189408
Prediction during training: f(5) = 9.135


epoch 41: w = 1.612, loss = 0.21758850
Prediction during training: f(5) = 9.196


epoch 51: w = 1.624, loss = 0.20490284
Prediction during training: f(5) = 9.225


epoch 61: w = 1.635, loss = 0.19297586
Prediction during training: f(5) = 9.249


epoch 71: w = 1.646, loss = 0.18174365
Prediction during training: f(5) = 9.271


epoch 81: w = 1.657, loss = 0.17116506
Prediction during training: f(5) = 9.293


epoch 91: w = 1.667, loss = 0.16120249
Prediction during training: f(5) = 9.314


******************

Prediction after training: f(5) = 9.332


In [23]:
# we use a linear regression as:
# f = w * x
# let: w = 2
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape
input_size = n_features
output_size = n_features

class LinearRegression(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)
    
model = LinearRegression(input_size, output_size)
learning_rate = 0.1
n_iters = 10
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

x_test = torch.tensor([5], dtype=torch.float32)

print(f"Prediction before training: f(5) = {model(x_test).item():.3f}")
print("******************\n")

for epoch in range(n_iters):
    y_pred = model(X) # forward pass
    l = loss(Y, y_pred)  # loss computation
    l.backward() # backward pass
    optimizer.step() # weights update
    optimizer.zero_grad() # clearing the memory of the weight

    if epoch%1 == 0:
        [w, b] = model.parameters() # here, w = weights, b = bias

        print(f"epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {model(x_test).item():.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {model(x_test).item():.3f}")




Prediction before training: f(5) = -3.158
******************

epoch 1: w = 3.658, loss = 56.66290283
Prediction during training: f(5) = 18.836


epoch 2: w = 0.898, loss = 25.44063568
Prediction during training: f(5) = 4.098


epoch 3: w = 2.747, loss = 11.42240047
Prediction during training: f(5) = 13.973


epoch 4: w = 1.508, loss = 5.12846947
Prediction during training: f(5) = 7.356


epoch 5: w = 2.338, loss = 2.30260992
Prediction during training: f(5) = 11.789


epoch 6: w = 1.782, loss = 1.03384876
Prediction during training: f(5) = 8.818


epoch 7: w = 2.154, loss = 0.46419698
Prediction during training: f(5) = 10.808


epoch 8: w = 1.904, loss = 0.20843235
Prediction during training: f(5) = 9.474


epoch 9: w = 2.072, loss = 0.09359787
Prediction during training: f(5) = 10.368


epoch 10: w = 1.959, loss = 0.04203819
Prediction during training: f(5) = 9.768


******************

Prediction after training: f(5) = 9.768
