### Import the needed Libraries

#### Documentation: https://pytorch.org/docs/stable/index.html
#### Youtube Tutorial: https://www.learnpytorch.io/

In [None]:
# pytorch
import torch
from torch import nn
from torch.utils.data import DataLoader

# torchvision
import torchvision
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision import datasets

# torchtext
import tochtext

# torchaudio
import torchaudio

# standard data handling
import pandas as pd
import numpy as np

# plotting
import matplotlib.pyplot as plt
import seaborn as sns

# machine learning
import sklearn
from sklearn.model_selection import train_test_split

# system
from pathlib import Path
import requests

# timing and printing
from tqdm.auto import tqdm
from timeit import default_timer as timer

### Check the available pytorch and Cuda (GPU) Version

In [None]:
# pytroch and cuda version
print(torch.__version__)

# trochvision and cuda version
print(torchvision.__version__)

2.0.1+cu118
0.15.2+cu118


### Check the available device

1. CPU (Default)
2. Cuda (GPU acceleration is accessible)

In [None]:
# make device agnostic code (default is cpu)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Available device is: {device}")

Available device is: cuda


### Import Helper files

In [None]:
# filename
filenames = ["pytorch_helper_functions.py"]

for filename in filenames:
  # download helper functions from repo
  if Path(filename).is_file():
    print(f"{filename} already exists. Skipping download")
  else:
    print(f"Downlading {filename}.")
    request = requests.get("https://raw.githubusercontent.com/sl2000stat/PytorchIntroduction/master/pytorch_helper_functions.py")
    with open(filename, "wb") as f:
      f.write(request.content)

pytorch_helper_functions.py already exists. Skipping download


### General Pytorch Workflow

![](https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/images/01_a_pytorch_workflow.png)

1. **Getting data ready:** Data can be almost anything but to get started we're going to create a simple straight line
2. **Building a model:**	Here we'll create a model to learn patterns in the data, we'll also choose a loss function, optimizer and build a training loop.
3. **Fitting the model to data (training):** We've got data and a model, now let's let the model (try to) find patterns in the (training) data.
4. **Making predictions and evaluating a model (inference):**	Our model's found patterns in the data, let's compare its findings to the actual (testing) data.
5. **Tune the model:**	Fine tune the hyperparameter and select the optimal model.
6. **Saving and loading a model:**	You may want to use your model elsewhere, or come back to it later, here we'll cover that.


### 1. Get the Data

### We take the FashionMNIST Dataset for our example

In [None]:
# get the data
training_data = datasets.FashionMNIST(root="data",train=True,download=True,transform=ToTensor(), target_transform=None)

test_data = datasets.FashionMNIST(root="data",train=False,download=True,transform=ToTensor(), target_transform=None)

### 1.3 Check the data shapes

In [None]:
# check the dimensions
# print(f"Input shape: {X.shape} | Output shape: {y.shape}")

### 1.4 Visualize the data

### 1.5 Prepare the DataLoader

The DataLoader turns our data into a python iterable and allows us to divide the data into mini batches. (https://pytorch.org/tutorials/beginner/basics/data_tutorial.html)

Why? - computational more efficient & mini batches (size k) give the network more chances to update the gradients per epoch.

In [None]:
# batch size hyperparameter
BATCH_SIZE = 32

# construct mini batches for the train and test data
train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

# check the dimensions
print(f"Length of the train DataLoader: {len(train_dataloader)} batches of {BATCH_SIZE}. (Orignially {len(training_data)})")
print(f"Length of the test DataLoader: {len(test_dataloader)} batches of {BATCH_SIZE}.  (Orignially {len(test_data)})")

Length of the train DataLoader: 1875 batches of 32. (Orignially 60000)
Length of the test DataLoader: 313 batches of 32.  (Orignially 10000)


### 2. Build and train your model

1. Build your own model or use an existing architecture
1. Pick a loss function and optimizer
3. Train the model

### 2.1 Build your own model or use an existing architecture

1. Build the model
2. Create a model instance

In [None]:
# build the model (start with a baseline model and increase the complexity or use an exisiting model architecture)
class PytorchModel(nn.Module):
  """This is the Pytorch Model class. Since it inherits from nn.Module we have to override the forward() method"""

  def __init__(self, input_shape:int,hidden_units:int,output_shape:int):
    """Constructor Initialization. Calls the super constructor and initializes our model."""

    # call the super constructor
    super().__init__()

    # create the model in several blocks

    self.block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,out_channels=hidden_units, kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units, kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
    )


    self.block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units, kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units, kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
    )

    self.fully_connceted_layer = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=490, out_features=output_shape),
    )

  def forward(self,X):
    """
    This function is mandatory in each pytorch model and calculates the foward pass.

    :param X: tensor: The X data
    :return y: tensor: The y data (prediction)
    """

    # perform the calculations
    X = self.block_1(X)
    # print(f"Dimension of model output: {X.shape}")
    X = self.block_2(X)
    # print(f"Dimension of model output: {X.shape}")
    X = self.fully_connceted_layer(X)
    # print(f"Dimension of model output: {X.shape}")

    return X


In [None]:
# Create a model instance
model = PytorchModel(input_shape=1,hidden_units=10,output_shape = 10)

# send the model to the right device
model = model.to(device)

# print the model
print(f"{model}")

PytorchModel(
  (block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fully_connceted_layer): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)


In [None]:
# create a dummy tensor with the same dimensions as your data, add batch dimensiona nd send it to your device
dummy_tensor = torch.randn(size=(1,28,28)).unsqueeze(0).to(device)

# pass the data through your model
X_dummy = model(dummy_tensor)

tensor([[ -57.7645, -150.5729, -114.3710, -110.9344, -146.6168,   -8.3703,
          -76.7071, -246.3527,  -50.3308, -107.6575]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


### 2.2 Pick a loss function & optimizer

1. Available loss functions: https://pytorch.org/docs/stable/nn.html#loss-functions
2. Available optimizer: https://pytorch.org/docs/stable/optim.html#algorithms



In [None]:
# set up a loss function
loss_function = nn.CrossEntropyLoss()

# learning rate
LEARNING_RATE = 0.01

# setup an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

### 2.3 Creating a training loop and train a model on batches of data.

1. Loop through the epochs
2. Loop through the training batches, perform training steps and calculate the train loss.
3. Loop through testing batches, perform testing steps and calculate the test loss.

In [None]:
# timing
train_time_start = timer()

In [None]:
# number of epochs
EPOCHS = 3

# create a training and test loop
for epoch in tqdm(range(EPOCHS)):

  # printing
  print(f"Epoch: {epoch}\n-------")

  # train loss counter
  batch_train_loss = 0

  # model to train mode
  model.train()

  # training: loop thorugh the training batches
  for batch, (X_train,y_train) in enumerate(train_dataloader):

    # put data on device
    X_train,y_train = X_train.to(device), y_train.to(device)

    # calculate the forward pass
    y_pred_train = model(X_train)

    # calculate the training loss and add (accumulate) the loss to the counter
    training_loss = loss_function(y_pred_train,y_train)
    batch_train_loss += training_loss

    # optimizer zero grad
    optimizer.zero_grad()

    # calcuate the loss backwards (backpropagation)
    training_loss.backward()

    # optimizer step
    optimizer.step()

    # per batch printing every 1000
    if batch % 1000 == 0:
      print(f"Looked at {batch * len(X_train)} / {len(train_dataloader.dataset)} samples.")

  # divide total train loss by length of train dataloader: Average training loss per batch
  batch_train_loss /= len(train_dataloader)

  # validation loss counter
  batch_val_loss = 0

  # model to validation mode
  model.eval()

  # inference mode diasables gradient tracking
  with torch.inference_mode():

    # validation: loop thorugh the validation batches
    for batch, (X_val,y_val) in enumerate(test_dataloader):

      # put data on device
      X_val,y_val = X_val.to(device), y_val.to(device)

      # calculate the forward pass
      y_pred_val = model(X_val)

      # calculate the validation loss and add (accumulate) the loss to the counter
      val_loss = loss_function(y_pred_val,y_val)
      batch_val_loss += val_loss

    # divide total validation loss by length of val dataloader: Average validation loss per batch
    batch_val_loss /= len(test_dataloader)

  # print
  print(f"Train Loss: {batch_train_loss:.5f} | Validation Loss:{batch_val_loss:.5f}\n")

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
-------
Looked at 0 / 60000 samples.
Looked at 32000 / 60000 samples.
Train Loss: 0.45611 | Validation Loss:0.38335

Epoch: 1
-------
Looked at 0 / 60000 samples.
Looked at 32000 / 60000 samples.
Train Loss: 0.37083 | Validation Loss:0.39210

Epoch: 2
-------
Looked at 0 / 60000 samples.
Looked at 32000 / 60000 samples.
Train Loss: 0.35588 | Validation Loss:0.39481



In [None]:
from pytorch_helper_functions import print_train_time

# calculate the training time
train_time_end = timer()
total_train_time = print_train_time(train_time_start, train_time_end, device=str(next(model.parameters()).device))


Train time on cuda:0: 42.149 seconds
