# Putting everything together


### 0.1. Importing pytorch

In [None]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import time
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
is_in_colab = input("Is the model being trained in a colab environment(y/n)?\n:")
if (is_in_colab.lower()=="y"):
  import google.colab
print(f"Imported Torch v{torch.__version__}")

### 0.2. Device Agnostic code

The major advantage of using a system equipped with a GPU is the ability of Tensors to be stored in the gpu and operated on by them. GPUs have large number of cored and albeit low power, can crunch to large amount of data rapidly. But it is not always guaranteed that the system where we are training our data is equipped a gpu. In that case we setup a device agnostic code that sets the device of our tensors as per the availability. 

While it is suggested to train our model in a system that is equipped with a dedicated gpu (usually CUDA cores). We may have to test our model's trainability or framework before it send it to get trained. In such situations running a preliminary training run on small amount of data to determine the efficiency can eliminate the need to attempt long failed runs repeatedly.

In [None]:
# Setting up device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

## 1. Data

### 1.1. Preparing data for training and testing

In [None]:
# Preparing arbitrary parameters
weight = 0.6 
bias = 0.2
start = 0
end = 1
step = 0.01
X = torch.arange(start, end, step).unsqueeze(dim=1)
# Not unsqueezing can cause errors
Y = weight * X + bias

### 1.2. Splitting our data into training and testing sets

In [None]:
train_split = int(0.6*len(X))
train_val_split = int(0.8*len(X))
X_train = X[:train_split]
X_test = X[train_val_split:]
Y_train = Y[:train_split]
Y_test = Y[train_val_split:]
X_val = X[train_split:train_val_split]
Y_val = Y[train_split:train_val_split]

### 1.3. Visualization

In [None]:
def plot_predictions(train_data = X_train.to("cpu"),
                     train_labels = Y_train.to("cpu"),
                     val_data = X_val.to("cpu"),
                     val_labels = Y_val.to("cpu") ,
                     test_data = X_test.to("cpu"), 
                     test_labels = Y_test.to("cpu"), 
                     predictions = None):
  plt.figure(figsize=(10,7)) # Figure dimensions in inches

  # (<data-x>, <data-y>, <color>, <scale>, <label>)
  plt.scatter(train_data, train_labels, c="b", s=4, label="Training Data") 
  plt.scatter(val_data, val_labels, c = "r", s=4, label="Validation Data")
  plt.scatter(test_data, test_labels, c="orange", s=4, label="Test Data")

  if predictions is not None:
    # Plot the predictions
    plt.scatter(test_data, predictions, c="black", s=4, label="Predictions")

  plt.legend(prop={"size" : 14}); # (<property dictionary>)

In [None]:
plot_predictions()

## 2. Building the Model

### 2.1. Defining the model by inheriting torch.nn.Module class

In [None]:
class LinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()
    # Using nn.Linear for creating model parameters
    self.linear_layer = nn.Linear(in_features=1, out_features=1)
    
  def forward(self, x : torch.Tensor)-> torch.Tensor:
    return self.linear_layer(x)

### 2.2. Creating a model instance, seeding the RNG for reproduciblity

In [None]:
torch.manual_seed(69)
model = LinearRegressionModel()
model.state_dict()

# Checking the device accessing the model
next(model.parameters())

# Sending the model to the available device (cuda preferred)
model.to(device)

### 2.3. Loss Function and Optimizer

In [None]:
loss_function = nn.L1Loss()
optimizer = torch.optim.SGD(params=model.parameters(), lr = 0.001)

### 2.4. Training Loop and Testing Loop

In [None]:
def train_model(tr_model = model, tr_features = X_train, tr_labels = Y_train, va_features = X_val, va_labels = Y_val, epochs = 10, opt = optimizer, loss = loss_function):
  # Training metrics
  epoch_c = []
  loss_val = []
  test_loss_val = []
  # Training
  time_start = time.time()
  for epoch in range(1, epochs+1):
    model.train()
    Y_preds_tr = tr_model(tr_features)
    tr_loss = loss(Y_preds_tr, tr_labels)
    opt.zero_grad()
    tr_loss.backward()
    opt.step()
    # Testing
    if epoch % 10 == 0:
      Y_preds_te = tr_model(va_features)
      te_loss = loss(Y_preds_te, va_labels)
      epoch_c.append(epoch)
      loss_val.append(tr_loss.item())
      test_loss_val.append(te_loss.item())
      print(f"Epoch: {epoch} | Weight: {model.linear_layer.weight.item()} | Bias: {model.linear_layer.bias.item()}\nLoss: {tr_loss} | Validation Loss: {te_loss}")
  time_end = time.time()
  print(f"Time Elapsed: {time_end - time_start}")
  return epoch_c, loss_val, test_loss_val
  
  

In [None]:
epoch_count, loss_v, test_loss_v = train_model(epochs=670)

In [None]:
with torch.inference_mode():
  y_preds_te = model(X_test)
  plot_predictions(predictions=y_preds_te)
