## Imports

In [97]:
import numpy as np
from sklearn.linear_model import LinearRegression

from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')

## Higher Order functions

In [98]:
# series of functions performing exponentiation to given power
def square(x):
    return x**2

def cube(x):
    return x**3

# make exponent and explicit argument
def generic_exp(x,exponent): # requires specify the exponent everytime you call the function
    return x**exponent



We need higher order function(function builder) to build functions (square,cube ...)

In [99]:
def skeleton_exponentiation(x):
    return x**exponent

In [100]:
# skeleton_exponentiation(2)

<span style="color:red">---------------------------------------------------------------------------</span>  
<span style="color:red">NameError</span>                                 Traceback (most recent call last)  
Cell In[9], line 1  
<span style="color:green">----> 1 skeleton_exponentiation(2)</span>  
  
Cell In[8], line 2  
      1 def skeleton_exponentiation(x):  
<span style="color:green">----> 2     return x**exponent</span>  
  
<span style="color:red">NameError</span>: name 'exponent' is not defined  

In [101]:
def exponentiation_builder(exponent):
    def skeleton_exponentiation(x):
        return x**exponent

    return skeleton_exponentiation

In [102]:
returned_function=exponentiation_builder(2)
returned_function

<function __main__.exponentiation_builder.<locals>.skeleton_exponentiation(x)>

In [103]:
exponentiation_builder(2)(4)

16

In [104]:
returned_function(5)

25

In [105]:
cube=exponentiation_builder(3)
cube(5)

125

In [106]:
# Helper functiom 1
def make_train_step(model,loss_fn,optimizer): 
    # Build a function that perform a step in the train loop
    def perform_train_step_fn(x,y):

        # set model to train state
        model.train()

        # forward pass
        y_hat=model(x)

        # compute loss
        loss=loss_fn(y_hat,y)

        # compute gradients
        loss.backward()

        # update parameters
        optimizer.step()
        optimizer.zero_grad()
        return loss.item()

    # return the function that will be called inside the train loop
    return perform_train_step_fn 

Run: Data preperation 

In [107]:
%run -i data_generation/simple_linear_regression.py

In [108]:
%run -i data_preparation/v0.py

In [109]:
%%writefile model_configuration/v1.py

device='cuda' if torch.cuda.is_available() else 'cpu'
lr=0.1

torch.manual_seed(42)
# create a model and send it to device
model=nn.Sequential(nn.Linear(1,1)).to(device)

# define SGD optmizer
optimizer=optim.SGD(model.parameters(),lr=lr)

# define loss function
loss_fn=nn.MSELoss(reduction='mean')

# create train step function
train_step_fn=make_train_step(model,loss_fn,optimizer)


Overwriting model_configuration/v1.py


In [110]:
%run -i model_configuration/v1.py

In [111]:
train_step_fn

<function __main__.make_train_step.<locals>.perform_train_step_fn(x, y)>

In [112]:
%%writefile model_training/v1.py

n_epochs=1000
losses=[]

for epoch in tqdm(range(n_epochs)):

    # perform train step and return corresponding loss
    loss=train_step_fn(X_train_tensor,y_train_tensor) # perform one training step
    losses.append(loss) # keep track of loss

Overwriting model_training/v1.py


In [113]:
%run -i model_training/v1.py

  0%|          | 0/1000 [00:00<?, ?it/s]

In [114]:
model.state_dict()

OrderedDict([('0.weight', tensor([[1.9690]])), ('0.bias', tensor([1.0235]))])

## Dataset

In [115]:
class CustomDataset(Dataset):
    # takes whatever arguments that need to create a list of tuples
    def __init__(self,x_tensor,y_tensor):
        self.x=x_tensor
        self.y=y_tensor

    # return a tuple corresponding to the index, load data on demand
    def __getitem__(self,index):
        return (self.x[index],self.y[index])

    # return the size of the dataset
    def __len__(self):
        return len(self.x)

X_train_tensor=torch.as_tensor(X_train).float() # we don't want to store whole training data into GPU tensors, it will eat up vram
y_train_tensor=torch.as_tensor(y_train).float()

train_data=CustomDataset(X_train_tensor,y_train_tensor)
print(train_data[[1,2,4]])

(tensor([[0.0636],
        [0.8631],
        [0.7320]]), tensor([[1.1928],
        [2.9128],
        [2.4732]]))


## Tensor Dataset

In [116]:
train_data=TensorDataset(X_train_tensor,y_train_tensor) # if dataset is couple of tensors, use TensorDataset
print(train_data[[1,2,4]])

(tensor([[0.0636],
        [0.8631],
        [0.7320]]), tensor([[1.1928],
        [2.9128],
        [2.4732]]))


## Dataloader

In [117]:
# Dataloader is an iterator that will load data on demand
train_loader=DataLoader(dataset=train_data,batch_size=16,shuffle=True)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2298606f370>

In [118]:
next(iter(train_loader)) # this will load the first batch of data

[tensor([[0.2809],
         [0.3253],
         [0.1560],
         [0.5924],
         [0.0651],
         [0.8872],
         [0.4938],
         [0.0055],
         [0.1409],
         [0.0885],
         [0.1849],
         [0.7290],
         [0.8662],
         [0.3117],
         [0.6842],
         [0.1987]]),
 tensor([[1.5846],
         [1.8057],
         [1.2901],
         [2.1687],
         [1.1559],
         [2.8708],
         [1.9060],
         [1.0632],
         [1.1211],
         [1.0708],
         [1.5888],
         [2.4927],
         [2.6805],
         [1.7637],
         [2.3492],
         [1.2654]])]

In [119]:
list(train_loader) # this will load all the data

[[tensor([[0.7852],
          [0.8022],
          [0.6075],
          [0.1997],
          [0.3309],
          [0.6376],
          [0.4722],
          [0.2809],
          [0.4938],
          [0.5427],
          [0.1560],
          [0.1987],
          [0.3745],
          [0.0885],
          [0.7320],
          [0.8872]]),
  tensor([[2.5283],
          [2.6229],
          [2.4037],
          [1.3651],
          [1.5427],
          [2.1930],
          [1.9857],
          [1.5846],
          [1.9060],
          [2.2161],
          [1.2901],
          [1.2654],
          [1.7578],
          [1.0708],
          [2.4732],
          [2.8708]])],
 [tensor([[0.6842],
          [0.0452],
          [0.1159],
          [0.0055],
          [0.3664],
          [0.7751],
          [0.7608],
          [0.7132],
          [0.8631],
          [0.6011],
          [0.1705],
          [0.0344],
          [0.9869],
          [0.0651],
          [0.2921],
          [0.9696]]),
  tensor([[2.3492],
          [0.

We need to add Dataloader and Dataset elements into Data prep file

In [120]:
%%writefile data_preparation/v1.py

X_train_tensor=torch.as_tensor(X_train).float()
y_train_tensor=torch.as_tensor(y_train).float()

# Builds Dataset
train_data=TensorDataset(X_train_tensor,y_train_tensor)

# Build Dataloader
train_loader=DataLoader(dataset=train_data,batch_size=16,shuffle=True)


Overwriting data_preparation/v1.py


In [121]:
%run -i data_preparation/v1.py

Now we need to introduce mini batch gradient descent to model training part

In [122]:
%run -i model_configuration/v1.py

In [123]:
%%writefile model_training/v2.py

# Define number of epochs
n_epochs=1000

losses=[]

for epoch in tqdm(range(n_epochs)):
    # inner loop
    mini_batch_losses=[]
    for x_batch,y_batch in train_loader:
        # the dataset lives on CPU, we need to send mini-batches to the device where our model lives
        x_batch=x_batch.to(device)
        y_batch=y_batch.to(device)
        
        # perform training step and return corresponding loss
        mini_batch_loss=train_step_fn(x_batch,y_batch)
        mini_batch_losses.append(mini_batch_loss)

    # compute average loss over all mini batches
    loss=np.mean(mini_batch_losses)
    losses.append(loss)


Overwriting model_training/v2.py


In [124]:
%run -i model_training/v2.py

  0%|          | 0/1000 [00:00<?, ?it/s]

In [125]:
model.state_dict()

OrderedDict([('0.weight', tensor([[1.9696]])), ('0.bias', tensor([1.0243]))])

Full pipeline

In [126]:
%run -i data_preparation/v1.py
%run -i model_configuration/v1.py
%run -i model_training/v2.py

  0%|          | 0/1000 [00:00<?, ?it/s]

## Mini Batch inner loop  
Mini batch depends on 3 elements.
1. device
2. dataloader
3. step function

In [127]:
# Helper function 2
def mini_batch(device,dataloader,step_fn):
    mini_batch_losses=[]
    for x_batch,y_batch in dataloader:
        x_batch=x_batch.to(device)
        y_batch=y_batch.to(device)

        mini_batch_loss=step_fn(x_batch,y_batch)
        mini_batch_losses.append(mini_batch_loss)

    loss=np.mean(mini_batch_losses)
    return loss

Now we only need 200 epoch to perform 1000 updates due to mini batch inner loop



In [128]:
%run -i data_preparation/v1.py
%run -i model_configuration/v1.py

Define model training v3

In [129]:
%%writefile model_training/v3.py

# Define number of epochs
n_epochs=200

losses=[]

for epoch in tqdm(range(n_epochs)):
    loss=mini_batch(device,train_loader,train_step_fn)
    losses.append(loss)


Overwriting model_training/v3.py


In [130]:
%run -i model_training/v3.py

  0%|          | 0/200 [00:00<?, ?it/s]

In [131]:
model.state_dict()

OrderedDict([('0.weight', tensor([[1.9684]])), ('0.bias', tensor([1.0219]))])