<a href="https://colab.research.google.com/github/Kasrakko/PyTorch-Deep-Learning/blob/main/CNN_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Important imports
import torch 
from torch import nn
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.datasets import make_circles
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
torch.__version__

'1.13.1+cu116'

In [207]:
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
from timeit import default_timer as timer
from tqdm.auto import tqdm
torchvision.__version__

'0.14.1+cu116'

## Defien Device 

In [4]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
device

device(type='cuda')

## Geting Dataset

In [5]:
# Setup training data
train_data = datasets.FashionMNIST(
    root="data", # where to download data to?
    train=True, # get training data
    download=True, # download data if it doesn't exist on disk
    transform=ToTensor(), # images come as PIL format, we want to turn into Torch tensors
    target_transform=None # you can transform labels as well
)

# Setup testing data
test_data = datasets.FashionMNIST(
    root="data",
    train=False, # get test data
    download=True,
    transform=ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



## Prepare a DataLoader

In [6]:
from torch.utils.data import DataLoader

# Setup the batch size hyperparameter
BATCH_SIZE = 32

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False # don't necessarily have to shuffle the testing data
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")
     

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x7f1261ad0dc0>, <torch.utils.data.dataloader.DataLoader object at 0x7f1261ad05b0>)
Length of train dataloader: 1875 batches of 32
Length of test dataloader: 313 batches of 32


In [7]:
# Checkout wats inside the training dataloader 
train_features_batch, train_label_batch = next(iter(train_dataloader))
train_features_batch.shape, train_label_batch.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32]))

## Creating convolutional neural network model

In [215]:
# Create Model
class FashionMNISTModelV2(nn.Module):
  """
  Model architecture that replicates TinyVGG
  Model from CNN explainer website
  """
  def __init__(self, 
               input_shape: int,
               hidden_units: int,
               output_shape: int):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        # Create a con layer
        nn.Conv2d(in_channels = input_shape, 
                  out_channels = hidden_units,
                  kernel_size = 3,
                  stride = 1,
                  padding = 1),
        nn.ReLU(),
        nn.Conv2d(in_channels = hidden_units,
                  out_channels = hidden_units,
                  kernel_size = 3,
                  stride = 1,
                  padding = 1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2)
    )
    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(in_channels = hidden_units,
                  out_channels = hidden_units,
                  kernel_size = 3,
                  stride = 1,
                  padding = 1),
        nn.ReLU(),
        nn.Conv2d(in_channels = hidden_units,
                  out_channels = hidden_units,
                  kernel_size = 3,
                  stride = 1,
                  padding = 1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features = hidden_units*7*7, ## The trich part is here, before the 7*7 it was only *0
                  out_features = output_shape)
    )

  def forward(self, x):
    x = self.conv_block_1(x)
    #print(f"Output of conv_block_1: {x.shape}")
    x = self.conv_block_2(x)
    #print(f"Output of conv_block_2: {x.shape}")
    x = self.classifier(x)
    #print(f"Output of classifier: {x.shape}")
    return x

### Create an instance

In [216]:
# what are our model classes for output
class_names = train_data.classes
class_names

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [217]:
torch.manual_seed(42)

model_2 = FashionMNISTModelV2(input_shape = 1,
                              hidden_units = 10,
                              output_shape = len(class_names)).to(device)

In [218]:
# showing state dictionary
model_2.state_dict

<bound method Module.state_dict of FashionMNISTModelV2(
  (conv_block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)>

## Stepping through the model

### Stepping through `nn.Conv2d()`



#### Create some Dummy data

In [219]:
torch.manual_seed(42)
# Create a batch of images 
images = torch.randn(size=(32, 3, 64, 64))
test_image = images[0]

print(f"Image batch shape: {images.shape}")
print(f"Single image shape: {test_image.shape}")
#print(f"Test image:\n {test_image}")

Image batch shape: torch.Size([32, 3, 64, 64])
Single image shape: torch.Size([3, 64, 64])


In [220]:
# Create a single conv2d laayer
conv_layer = nn.Conv2d(in_channels=3, 
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0)

# Pass the data through the convolutional layer 
conv_output = conv_layer(test_image)
# conv_output,
conv_output.shape

torch.Size([10, 62, 62])

In [221]:
# Create a single conv2d laayer
conv_layer_2 = nn.Conv2d(in_channels=3, 
                       out_channels=10,
                       kernel_size=5,
                       stride=2,
                       padding=0)

# Pass the data through the convolutional layer 
conv_output_2 = conv_layer_2(test_image)
# conv_output_2, 
conv_output_2.shape

torch.Size([10, 30, 30])

### Stepping through `nn.MaxPool2d()`

In [222]:
# Create a sample maxpool layer
maxPool_layer = nn.MaxPool2d(kernel_size=2)

# Pass data through conv
test_image_through_conv = conv_layer(test_image) 

# Pass it through the maxpool
test_image_through_conv_and_maxPool = maxPool_layer(test_image_through_conv)

test_image_through_conv.shape, test_image_through_conv_and_maxPool.shape

(torch.Size([10, 62, 62]), torch.Size([10, 31, 31]))

In [223]:
# Create a sample maxpool layer
maxPool_layer_2 = nn.MaxPool2d(kernel_size=4)

# Pass data through conv
test_image_through_conv = conv_layer(test_image) 

# Pass it through the maxpool
test_image_through_conv_and_maxPool = maxPool_layer_2(test_image_through_conv)

test_image_through_conv.shape, test_image_through_conv_and_maxPool.shape

(torch.Size([10, 62, 62]), torch.Size([10, 15, 15]))

In [224]:
# Create a sample maxpool layer
maxPool_layer_2 = nn.MaxPool2d(kernel_size=4)

# Pass data through conv
test_image_through_conv = conv_layer_2(test_image) 

# Pass it through the maxpool
test_image_through_conv_and_maxPool = maxPool_layer_2(test_image_through_conv)

test_image_through_conv.shape, test_image_through_conv_and_maxPool.shape

(torch.Size([10, 30, 30]), torch.Size([10, 7, 7]))

In [225]:
# Lets back to the modeltorch.manual_seed(42)

model_2 = FashionMNISTModelV2(input_shape = 1,
                              hidden_units = 10,
                              output_shape = len(class_names)).to(device)

## Tricky part

***The Mystery of our model that we lefted there***

 We are going to fix this error:

 /usr/local/lib/python3.8/dist-packages/torch/nn/init.py:405: UserWarning: Initializing zero-element tensors is a no-op
  warnings.warn("Initializing zero-element tensors is a no-op")


In [226]:
model_2.to(device)

FashionMNISTModelV2(
  (conv_block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)

In [227]:
# See first training sample
image, label = train_data[0]
image.shape, label  

(torch.Size([1, 28, 28]), 9)

In [228]:
# and a random image
rand_image_tensor = torch.randn(size=(1, 28, 28))
rand_image_tensor.shape

torch.Size([1, 28, 28])

In [229]:
model_2(image.unsqueeze(0).to(device))

tensor([[ 0.0543,  0.0287,  0.0495,  0.0641, -0.0159, -0.0149, -0.0198,  0.0537,
          0.0062,  0.0311]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [230]:
# Test the image in model_2
model_2(rand_image_tensor.unsqueeze(0).to(device))

tensor([[ 0.0679,  0.0144,  0.0486,  0.0745, -0.0155,  0.0074, -0.0231,  0.0546,
          0.0044,  0.0307]], device='cuda:0', grad_fn=<AddmmBackward0>)

***As we see we have an Error so we are going to fix it

For this: 

we have to set atrich after the `nn.Flatten()` is called by our model_2.
Becuase the flatten make a multiplication we have to multiply **in_feature** of the `nn.Linear()` of the **classifier** function at "7*7" which is the **out_feature** of **conv_block_2** function in model_2.

## Setup a loss function and optimizer and accuracy function

In [231]:
# setup loss functions
loss_fn = nn.CrossEntropyLoss()

In [232]:
### Optimizer
optimizer = torch.optim.SGD(params = model_2.parameters(),
                            lr = 0.1)

In [233]:
# Accuracy
def accuracy_fn(y_true, y_preds):
  correct = torch.eq(y_true, y_preds).sum().item()
  acc = (correct/len(y_preds))*100
  return acc

## Train and test

### `train_step()`

In [234]:
def train_step(
    model : torch.nn.Module,
    data_loader : torch.utils.data.DataLoader,
    loss_fn : torch.nn.Module,
    optimizer : torch.optim.Optimizer,
    accuracy_fn,
    device : torch.device = device):

  train_loss, train_acc = 0, 0
  # Put model on training loop 
  model.train()
  # Add a loop to loop through training batches
  for batch, (X, y) in enumerate(data_loader):
      # Put data on target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      y_preds = model(X)

      # 2. Calculate loss (per batch)
      loss = loss_fn(y_preds, y)
      train_loss += loss # accumulatively add up the loss per epoch 
      train_acc += accuracy_fn(y_true=y,
                              y_preds = y_preds.argmax(dim=1))

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward()

      # 5. Optimizer step
      optimizer.step()

  # Divide total train loss by length of train dataloader (average loss per batch per epoch)
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  print(f"Train loss: {train_loss:.5f}, | Train acc: {train_acc:.5f}%")

### `test_step()`

In [235]:
def test_step(model : torch.nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_fn : torch.nn.Module,
              accuracy_fn,
              device : torch.device=device):
  ### Testing
  # Setup variables for accumulatively adding up loss and accuracy 
  test_loss, test_acc = 0, 0 
    
  # Put model on test mode
  model.eval()

  # Turn on inference mode
  with torch.inference_mode():
    for X, y in data_loader:
      # Send data to target device
      X, y = X.to(device), y.to(device)
      # 1. Forward pass
      test_preds = model(X)
      
      # 2. Calculate loss (accumatively)
      test_loss += loss_fn(test_preds, y) # accumulatively add up the loss per epoch
      # 3. Calculate accuracy (preds need to be same as y_true)
      test_acc += accuracy_fn(y_true=y,
                              y_preds=test_preds.argmax(dim=1))
    
    ## Print out what's happening
    test_loss /= len(data_loader)
    test_acc /= len(data_loader)
    print(f"Test loss: {test_loss:.5f} | Test acc:{test_acc:.5f}%")

### Print Time function (Optional)

In [236]:
def print_train_time(start: float, end: float, device: torch.device = None):
    """Prints difference between start and end time.

    Args:
        start (float): Start time of computation (preferred in timeit format). 
        end (float): End time of computation.
        device ([type], optional): Device that compute is running on. Defaults to None.

    Returns:
        float: time between start and end in seconds (higher is longer).
    """
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

### Train and Test loop

In [238]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Set start for timer
START_TIME = timer()

# Train and Test model
epochs = 5
for epoch in tqdm(range(epochs)):
  print(f"Epochs: {epoch}\n----------")
  train_step(model = model_2,
             data_loader = train_dataloader,
             loss_fn = loss_fn,
             optimizer = optimizer,
             accuracy_fn = accuracy_fn,
             device = device)
  test_step(model = model_2,
            data_loader = test_dataloader,
            loss_fn = loss_fn,
            accuracy_fn = accuracy_fn,
            device = device)
  
# Set end for timer
END_TIME = timer()
total_train_time_model_2 = print_train_time(start = START_TIME,
                                            end = END_TIME,
                                            device = device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epochs: 0
----------
Train loss: 0.28551, | Train acc: 89.61667%
Test loss: 0.30023 | Test acc:88.98762%
Epochs: 1
----------
Train loss: 0.27456, | Train acc: 90.06500%
Test loss: 0.29369 | Test acc:89.51677%
Epochs: 2
----------
Train loss: 0.26784, | Train acc: 90.29667%
Test loss: 0.28442 | Test acc:89.78634%
Epochs: 3
----------
Train loss: 0.25965, | Train acc: 90.52500%
Test loss: 0.29071 | Test acc:89.54673%
Epochs: 4
----------
Train loss: 0.25653, | Train acc: 90.68500%
Test loss: 0.28791 | Test acc:89.60663%
Train time on cuda: 57.507 seconds


### Make predcition of model_2

In [250]:
# Move values to device
torch.manual_seed(42)
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn, 
               device: torch.device = device):
    """Evaluates a given model on a given dataset.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.
        device (str, optional): Target device to compute on. Defaults to device.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            # Send data to the target device
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, y_preds=y_pred.argmax(dim=1))
        
        # Scale loss and acc
        loss /= len(data_loader)
        acc /= len(data_loader)
        #print(classification_report(y.to("cpu"), y_pred.to("cpu")))
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

In [251]:
# Get model_2 results 
model_2_results = eval_model(
    model=model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)
model_2_results

{'model_name': 'FashionMNISTModelV2',
 'model_loss': 0.28791090846061707,
 'model_acc': 89.60662939297124}