In [1]:
# Import dependency 
import numpy as np 
import torch 
import torch.optim as optim
import torch.nn as nn 

In [2]:
# Data generation 
true_b = 1
true_w = 2 
N = 100

# set the random seed for numpy 
np.random.seed(43)

x= np.random.rand(N,1)
epsilon = (.1 * np.random.rand(N,1))

y = true_b + true_w *x + epsilon

In [3]:
#Generate training and validating sets
idx = np.arange(N)

# Use first 80 random indices for train 
train_idx = idx[:int(N*.8)]
val_idx = idx[int(N*.8):]

# Generate train and validation sets
x_train, y_train = x[train_idx], y[train_idx]


In [4]:
# Data preparation 

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Transform data from numpy array to torch tensor
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

In [37]:
#Define the train step

def make_train_step(model, loss_fn, optimizer):
    # Builds function that performs a step in the train loop 
    def perform_train_step(x,y):
        
        # Set the model to TRAIN mode
        model.train()
        
        # Step1: Compute the model's predicition - forward pass
        yhat = model(x)
        
        # Step2: Compute the loss
        loss = loss_fn(yhat, y)
        
        # Step3: Compute gradients for "b" and "w" parameters
        loss.backward()
        
        # Step4: Updates parameters using gradients and the learning rate
        optimizer.step()
        optimizer.zero_grad()
        
        print(model.state_dict())
        # Return the loss 
        return loss.item()
    #Return the function that will be called inside the train loop
    return perform_train_step

In [38]:
# Model config 

# Define the model 
class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1,1)
    def forward(self,x):
        return self.linear(x)


# Set learning rate 
lr = 0.1

torch.manual_seed(42)

# Create a model and send it to the device 
model = ManualLinearRegression().to(device)
print(model.state_dict())

# Define a SGD optimizer to update the parameters 
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# Define a MSE loss function 
loss_fn = nn.MSELoss(reduction="mean")


# Create a train_step 
train_step = make_train_step(model, loss_fn, optimizer)

OrderedDict([('linear.weight', tensor([[0.7645]])), ('linear.bias', tensor([0.8300]))])


In [41]:
# Model training

n_epochs = 1000
losses = []
for epoch in range(n_epochs):
    model.train()  #2)
    # Step 1: Computes the model's predicted output - forward pass
    # No more manula prediction 
    #yhat = b + w*x_train_tensor 
    yhat = model(x_train_tensor)  #3)
    
    # Step 2: Computes the loss
    # No more manual loss
    # error = (yhat - y_train_tensor)
    # loss = (error**2).mean()
    loss = loss_fn(yhat, y_train_tensor) #2
    
    # Step 3: Computes gradients for both 'b' and 'w' parameters
    loss.backward()
    
    # Step 4: Updates parameters using gradients and the learning rate
    # No more manual update
    # with torch.no_grade():
    #    b-=lr*b.grad
    #    w-=lr*w.grad
    optimizer.step()
    
    
    # Graident Zeroing
    # No more telling pytorch to let gradients go 
    #b.grad.zero()
    #w.grad.zero()
    optimizer.zero_grad()
    print(model.state_dict())
   

In [33]:
# Model's parameter before training 
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[1.9994]])), ('linear.bias', tensor([1.0510]))])


In [30]:
# Check model's parameters
#print(model.state_dict())

In [31]:
print(list(model.parameters()))

[Parameter containing:
tensor([[1.9994]], requires_grad=True), Parameter containing:
tensor([1.0510], requires_grad=True)]
