<a href="https://colab.research.google.com/github/KonradGonrad/PyTorch-deep-learning/blob/main/03_PyTorch_computer_vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch Computer Vision

## 0. Computer vision libaries in PyTorch

* [`torchvision`](https://pytorch.org/vision/stable/index.html) - base domain libary for Pytorch computer vision
* `torchvision.datasets` - databases and data loading functions for computer vision
* `torchvision.models` - get pretrained computer vision models that can leverage your own problems
* `torchvision.transforms` - functions for manipulating your vision data (images) to be suitable for use with and ML model
* `torch.utils.data.Dataset` - Base dataset class for PyTorch
* `torch.utils.data.Dataloader` - Creates a Python iterable over a dataset


In [None]:
# Impoprt PyTorch
import torch
from torch import nn

# Import torchvision
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

# Import matplotlib
import matplotlib.pyplot as plt

# Import pandas (maybe will be needed)
import pandas as pd

# Check version
print(torch.__version__)
print(torchvision.__version__)

## 1. Getting a dataset

FashionMNIST is a dataset of greyscale images of clothingf

In [None]:
train_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
print(f"length of training data: {len(train_data)}")
print(f"length of testing data: {len(test_data)}")

In [None]:
data, label = train_data[0]
print(data.shape, label)

In [None]:
target_classes = train_data.classes
target_classes

In [None]:
classes_to_idx = train_data.class_to_idx
classes_to_idx

In [None]:
train_data.targets

### 1.1 Check Input and output shapes of data

In [None]:
print(f"data of shape {data.shape} -> [Color, Height, Weight] equals {target_classes[label]}")

### 1.2 Visualizing our data


In [None]:
torch.manual_seed(42) # Setting up a random seed to have same outputs
fig = plt.figure(figsize=(9, 9)) # Creating a figure, where we'll put our subplots
rows, cols = [4, 4] # Setting up dimension, there is 4x4 what gives us 16 pics
for i in range(1, rows*cols + 1): # Setting up loop to put our imgs into figure
  random_idx = torch.randint(0, len(train_data), size=[1]).item() # Getting random index from 0 to train_data max index
  img, label = train_data[random_idx] # assign img and label to variables
  fig.add_subplot(rows, cols, i) # Adding subplot to figure at i index
  plt.imshow(img.squeeze(), cmap='gray') # Creating image plot which is added to figure
  plt.title(target_classes[label]) # Adding title
  plt.axis(False) # Removing aaxis, beacouse it's useless in our case

Do you think these items of clothing (images) could be modelled with pure linear lines? Or do you think we'll need non-linearities

## 2. Prepare dataloader

Right now, our data is in the form of PyTorch datasets

DataLoader turns our dataset into Python iterable

More specifically, we want to turn our data into batches (or mini-batches).

Why would we do this?

1. It is more computationally efficient, as in, your computing hardware may not be able to look (store in memory) at 60000 images in one hit. So we break it down to 32 images at a time (batch size of 32)
2. It gives our neural network more chances to update its gradients per epoch

for more on [mini-batches](https://www.bilibili.com/video/BV1RE411Z7YW/)

In [None]:
train_data, test_data

In [None]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_dataloader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

test_dataloader = DataLoader(dataset=test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

train_dataloader, test_dataloader

In [None]:
print(f"train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}...")
print(f"test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}...")

In [None]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))
print(f"train_feature_batch shape: {train_features_batch.shape} and train_labels_batch shape: {train_labels_batch.shape}")

In [None]:
random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
img, title = train_features_batch[random_idx], train_labels_batch[random_idx]
plt.imshow(img.squeeze(), cmap='gray')
plt.title(target_classes[label])
plt.axis(False)
print(f"image shape: {img.shape}")
print(f"Label: {label}, label size: {title.shape}")

## 3.0 Model 0: Build a baseline model

In [None]:
# Create a flatten layer
flatten_layer = nn.Flatten()

# Get a single sample
x = train_features_batch[0]
x

# Flatten the sample
output = flatten_layer(x)
output

In [None]:
from torch import nn
class FashionMNISTModelV0(nn.Module):
  def __init__(self,
               input_features: int,
               hidden_layers: int,
               output_features: int):
    super().__init__()
    self.layer_1 = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=input_features, out_features=hidden_layers),
        nn.Linear(in_features=hidden_layers, out_features=output_features)
    )

  def forward(self, x):
    return self.layer_1(x)




In [None]:
model_0 = FashionMNISTModelV0(input_features=784,
                              hidden_layers=10,
                              output_features=len(target_classes))

In [None]:
model_0(x).shape

In [None]:
x_dummy = torch.rand([1, 1, 28, 28])
model_0(x_dummy).shape

In [None]:
model_0.state_dict()

### 3.1 Setup loss function, optimizer and evaluation metrics

* Loss Function - since we're working with multi-class data, our loss function will be 'nn.CrossEntropyLoss()'
* Optimizer - our optimizer 'torch.optim.SGD()' (stochastic gradient descent)
* Evaluation metric - since we're working on a classification problem, let's use accuracy as our evaluation metric



In [None]:
from pathlib import Path
import requests

if Path('helper_functions.py').is_file():
  print('helper_functions.py is already downloaded')
else:
  print("Downloading helper_functions.py")
  request = requests.get('https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py')
  with open('helper_functions.py', 'wb') as f:
    f.write(request.content)

In [None]:
# Import accuracy_fn
from helper_functions import accuracy_fn

# Loss fn and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.1)

### 3.2 Creating a function to time our experiments

Machine learning is very experimental.

Two of the main thing you'll often want to track are:
1. Model's performance(loss and accuracy values etc)
2. How fast it runs

In [None]:
from timeit import default_timer as timer

def time_start_end(start_time: float,
                   end_time: float,
                   device: torch.device = None):
  total_time = end_time - start_time
  print(f"Took about {total_time:.3f} seconds to execute the code on {device}")
  return total_time

In [None]:
start = timer()
# break
end = timer()

time_start_end(start_time=start,
               end_time=end,
               device='cpu')

### 3.3 Creating a training loop and training a model on batches of data

1. Loop through epochs.
2. Loop through training batches, perform training steps, calculate the train loss *per batch*
3. Loop through testing batches, perform testing steps, calculate the test loss *per batch*.
4. Print out what's happening.
5. Time it all

In [None]:
# Import tqdm for progress bar
from tqdm.auto import tqdm

# Set the seed and start the timer
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
train_loop_start_time = timer()

# Set the number of epochs(we'll keep this small for faster training time)
epochs = 3

# Create training and test loop
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n-----")
  #Training
  train_loss, train_acc = 0, 0
  # Add a loop to loop thorught the training batches
  for batch, (X, y) in enumerate(train_dataloader):
    model_0.train()
    # 1. Forward pass
    y_pred = model_0(X)

    # 2. Calculate loss
    loss = loss_fn(y_pred, y)
    train_loss += loss

    acc = accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
    train_acc += acc

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4.Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    # Print out what's happening
    if batch % 400 == 0:
      print(f"Progress: {batch * len(X)}/{len(train_dataloader.dataset)} samples")

  # divide total train loss by length of train dataloader
  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)

  ### Testing
  test_loss, test_acc = 0, 0
  model_0.eval()
  with torch.inference_mode():

    for X, y in test_dataloader:
      # 1. Forward pas
      test_pred = model_0(X)

      # 2. Calculate the loss
      test_loss += loss_fn(test_pred, y)

      # 3. Calculate accuracy
      test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))

    # Calculate the test loss average per batch
    test_loss /= len(test_dataloader)

    # Calculate the test acc average per batch
    test_acc /= len(test_dataloader)

  # Print out what's happening
  print(f"Train_loss: {train_loss:.5f}, Train_acc: {train_acc:.5f} | Test_loss: {test_loss:.5f}, Test_acc: {test_acc:.5f}")

# Calculate training time
train_loop_end_time = timer()
total_train_time_model_0 = time_start_end(start_time=train_loop_start_time,
               end_time=train_loop_end_time,
               device=str(next(model_0.parameters()).device))


In [None]:
next(model_0.parameters()).device

### 4. Make predictions and get model 0 results

In [None]:
torch.manual_seed(42)

def eval_mode(data: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              acc_fn,
              device: torch.device
              ):
  loss, acc = 0, 0
  model.eval()
  with torch.inference_mode():
    for X, y in tqdm(data):
      X, y = X.to(device), y.to(device)
      y_pred = model(X)

      loss += loss_fn(y_pred, y)
      acc += acc_fn(y, y_pred.argmax(dim=1))
    loss /= len(data)
    acc /= len(data)
  return {"model": model.__class__.__name__,
          "loss": round(loss.item(), 2),
          "acc": round(acc, 2)}

In [None]:
model_0_results = eval_mode(data=test_dataloader,
          model = model_0,
          loss_fn=loss_fn,
          acc_fn=accuracy_fn,
          device='cpu')

## 5. Setup device agnostic-code (for using a gpu if there is one)


In [None]:
# Device agnostic code
DEVICE_DESTINATION = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE_DESTINATION

In [None]:
if torch.cuda.is_available():
  !nvidia-smi
else:
  print('no gpu found')

## 6. Model 1: Building a better model with non-linearity

We learned about the power of non-linearity

In [None]:
class FashionMNISTModelV1(nn.Module):
  def __init__(self,
               input_layers: int,
               hidden_layers: int,
               output_layers: int):
    super().__init__()
    self.layer_1 = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=input_layers, out_features=hidden_layers),
        nn.ReLU(),
        nn.Linear(in_features=hidden_layers, out_features=output_layers ),
        nn.ReLU()
    )

  def forward(self, x):
    return self.layer_1(x)

In [None]:
model_1 = FashionMNISTModelV1(input_layers=784,
                              hidden_layers=10,
                              output_layers=len(target_classes)).to(DEVICE_DESTINATION)
model_1

In [None]:
next(model_1.parameters()).device

### 6.1 Setup loss and evaluation metrics

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(),
                            lr=0.1)

### 6.2 Functionizing training and evaluation/testing loops
Let's create a function for:
* training loop - train_step()
* testing loop - test_step

In [None]:
def train_step(model: torch.nn.Module,
               data: torch.utils.data.DataLoader,
               loss_fn: torch.nn,
               acc_fn: any,
               optimizer: torch.optim,
               device: torch.device = DEVICE_DESTINATION):
  """Performs a training loop step on model going over data_loader"""
  train_loss, train_acc = 0, 0
  model.train()
  for batch, (X, y) in enumerate(data):
    # 0. Data on target device
    X, y = X.to(device), y.to(device)
    # 1. forward pass
    y_pred = model(X) # Logits

    # 2. Calculate the loss and accuracy
    loss = loss_fn(y_pred, y)
    acc = acc_fn(y, y_pred.argmax(dim=1))

    train_loss += loss
    train_acc += acc

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    loss.backward()

    # 5. Optimzizer step step step
    optimizer.step()
  train_loss /= len(data)
  train_acc /= len(data)

  print(f"Train loss: {train_loss:.4f} | Train acc: {train_acc:.4f}%\n")

In [None]:
def test_step(model: torch.nn.Module,
              data: torch.utils.data.DataLoader,
              acc_fn: any,
              loss_fn: torch.nn,
              device: torch.device = DEVICE_DESTINATION):
  """Performs a testing loop step on model going over data_loader"""
  model.eval()
  loss, acc = 0, 0
  with torch.inference_mode():
    for X, y in data:
      # 0. Data into device
      X, y = X.to(device), y.to(device)
      # 1. Forward pass
      y_pred = model(X)

      # 2. Calculate the loss and acc
      loss += loss_fn(y_pred, y)
      acc += acc_fn(y_pred.argmax(dim=1), y)

    loss /= len(data)
    acc /= len(data)
    print(f"Test loss: {loss:.4f} | Test acc: {acc:.4f}%\n")

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

from timeit import default_timer as timer
train_time_start_on_gpu = timer()

epochs = 3
for epoch in tqdm(range(epochs)):
  print(f'Epoch {epoch}\n -----')
  train_step(model=model_1,
             data=train_dataloader,
             loss_fn=loss_fn,
             acc_fn=accuracy_fn,
             optimizer=optimizer,
             device=DEVICE_DESTINATION)

  test_step(model=model_1,
            data=test_dataloader,
            acc_fn=accuracy_fn,
            loss_fn=loss_fn,
            device=DEVICE_DESTINATION)

train_time_end_on_gpu = timer()
total_train_time_model_1 = time_start_end(start_time = train_time_start_on_gpu,
               end_time=train_time_end_on_gpu,
                                          device=next(model_1.parameters()).device)

In [None]:
print(f"GPU time: {total_train_time_model_1} seconds")

In [None]:
print(f"GPU time: {total_train_time_model_0} seconds")

In [None]:
model_1_results = eval_mode(data=test_dataloader,
                            model=model_1,
                            loss_fn=loss_fn,
                            acc_fn=accuracy_fn,
                            device='cuda')

In [None]:
model_0_results

In [None]:
model_1_results

## Model 2: Building a Convolutional Neural Network (CNN)

CNN's are also known ConvNets
CNN's are known for their capabilities to find patterns in visual data
To find out what's happening inside a CNN, see this [website](https://poloclub.github.io/cnn-explainer/)

In [None]:
# Create a convolutional neural network
class FashionMNISTModelV2(nn.Module):
  """
  Model architecture that replicates the TinyVGG
  model from CNN explainer website
  """
  def __init__(self, input_shape: int, hidden_layers: int, output_shape: int):
    super().__init__()
    self.layer_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape, #Number of color channels
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_layers,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.layer_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_layers,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_layers,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features = hidden_layers*7*7,
                  out_features=output_shape
    ))
  def forward(self, x):
    x = self.layer_1(x)
    #print(f"x shape of layer_1: {x.shape}")
    x = self.layer_2(x)
    #print(f"x shape of layer_2: {x.shape}")
    x = self.classifier(x)
    #print(f"x shape of classifier: {x.shape}")
    return x

In [None]:
model_2 = FashionMNISTModelV2(input_shape = 1, # number of color channels
                              hidden_layers=10,
                              output_shape=len(target_classes))

In [None]:
img.shape # FashionMNIST image shape
img_dummy = torch.randn(size=(1,28,28))
img_dummy.shape

In [None]:
model_2(img_dummy.unsqueeze(0))

### 7.1 Stepping through `nn.Conv2d()`

Documentation: [Conv2d()](https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html)

In [None]:
torch.manual_seed(42)

# Create a batch of images
images = torch.randn(size = [1, 3, 64, 64])
image = images[0]
print(f"images shape: {images.shape}")
print(f"image shape: {image.shape}")
print(f"image: \n{image}")

In [None]:
# Create a single conv2d layer
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0)

# Pass the data through the convolutional network
conv_output = conv_layer(image.unsqueeze(dim=0))
conv_output.shape

### 7.2 Stepping through 'nn.MaxPool2d()'

In [None]:
image.shape

In [None]:
#print out original image shape without unsqueezed dimension
print(f"test image original shape: {image.shape}")
print(f"Test image with unsqueezed dimension: {image.unsqueeze(0).shape}")

# Create a sample nn.MaxPool2d layer
max_pool_layer = nn.MaxPool2d(kernel_size=2)

# Pass data through just the conv_layer
test_image_through_conv = conv_layer(image.unsqueeze(dim=0))
print(f"Shape after going through conv_layer(): {test_image_through_conv.shape}")

# Pass data through the max pool layer
test_image_through_conv_and_max_pool = max_pool_layer(test_image_through_conv)
print(f"Shape after going through conv_layer() and max_pool_layer(): {test_image_through_conv_and_max_pool.shape}")

In [None]:
torch.manual_seed(42)
random_tensor = torch.randn(size=(1,1,2,2))
print(f"Random tensor: \n{random_tensor}")
print(f"Random tensor shape: {random_tensor.shape}")

max_pool_tensor = max_pool_layer(random_tensor)
print(f"max pool tensor: \n{max_pool_tensor}")
print(f"max pool tensor shape: {max_pool_tensor.shape}")

### 7.3 Setup a loss function and optimizer for `model_2`

In [None]:
# Setup loss function/eval metrics/optimzier
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_2.parameters(),
                            lr=0.1)

### 7.4 Training and testing `model_2` using our training and test functions

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Measure time
from timeit import default_timer as timer
train_time_start_model_2 = timer()

# Train and test model
epochs = 3
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n-----")
  train_step(model=model_2.to(DEVICE_DESTINATION),
             data=train_dataloader,
             loss_fn=loss_fn,
             acc_fn=accuracy_fn,
             optimizer=optimizer,
             device=DEVICE_DESTINATION)
  test_step(model=model_2.to(DEVICE_DESTINATION),
            data=test_dataloader,
            loss_fn=loss_fn,
            acc_fn=accuracy_fn,
            device=DEVICE_DESTINATION)

train_time_end_model_2 = timer()
total_train_time_model_2 = time_start_end(start_time=train_time_start_model_2,
                                          end_time=train_time_end_model_2,
                                          device=DEVICE_DESTINATION)

In [None]:
# Get model_2 results

model_2_results = eval_mode(
    data=test_dataloader,
    model=model_2.to(DEVICE_DESTINATION),
    loss_fn=loss_fn,
    acc_fn=accuracy_fn,
    device=DEVICE_DESTINATION
)
model_2_results