<a href="https://colab.research.google.com/github/PJunior17/MNIST-Fashion/blob/main/MNIST_Fashion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.4/840.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.10.1 torchmetrics-1.3.1


In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torchmetrics
from torchmetrics import Accuracy

device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Prepare and Load Data

In [None]:
train_data = datasets.FashionMNIST(root='data',
                                   train=True,
                                   download=True,
                                   transform=ToTensor(),
                                   target_transform=None)

test_data = datasets.FashionMNIST(root='data',
                                  train=False,
                                  download=True,
                                  transform=ToTensor())

image, label = train_data[0]

In [None]:
BATCH_SIZE = 32
#this will turn the dataset into an iterable (batches)
train_dataloader = DataLoader(train_data,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

test_dataloader = DataLoader(test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

In [None]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))

## Create the Model

In [None]:
flatten_model = nn.Flatten()
x = train_features_batch[0]
output = flatten_model(x)

### Train and Test functions

In [None]:
def train_step(model:torch.nn.Module,
               data_loader:torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimizer:torch.optim.Optimizer,
               accuracy_fn,
               device:torch.device = device):
  train_loss, train_acc = 0, 0
  model.to(device)
  for batch, (X, y) in enumerate(data_loader):
    X, y = X.to(device), y.to(device) #send data to GPU
    y_pred = model(X) #forward propagation
    loss = loss_fn(y_pred, y) #calculate the loss
    train_loss += loss
    train_acc += accuracy_fn(y_pred, y)
    optimizer.zero_grad() #optimizer zero grad
    loss.backward() #backward propagation
    optimizer.step() #optimizer step

  #calculate the loss and accuracy per epoch, which is why we divide by the len(data_loader)
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  print('Train Loss: %s | Train Accuracy: %s' % (train_loss, train_acc))

def test_step(model:torch.nn.Module,
              data_loader:torch.utils.data.DataLoader,
              loss_fn:torch.nn.Module,
              accuracy_fn,
              device:torch.device = device):
  test_loss, test_acc = 0, 0
  model.to(device)
  model.eval() #put the mode to eval mode

  #turn on inference context manager
  with torch.inference_mode():
    for X, y in data_loader:
      X, y = X.to(device), y.to(device)
      test_pred = model(X) #forward propagation
      test_loss += loss_fn(test_pred, y)
      test_acc += accuracy_fn(test_pred, y)
    test_loss /= len(data_loader)
    test_acc /= len(data_loader)
    print('Test Loss: %s | Test Accuracy: %s' % (test_loss, test_acc))


https://poloclub.github.io/cnn-explainer/
use this link to understand more about convolutional neural networks

### Convolutional Neural Network

In [None]:
class Model(nn.Module):
  def __init__(self,
               input_shape:int,
               hidden_units:int,
               output_shape:int):
    super(Model, self).__init__()
    self.block1 = nn.Sequential(nn.Conv2d(in_channels=input_shape,
                                          out_channels=hidden_units,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1),
                                nn.ReLU(),
                                nn.Conv2d(in_channels=hidden_units,
                                          out_channels=hidden_units,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1),
                                nn.ReLU(),
                                nn.MaxPool2d(kernel_size=2,
                                             stride=2))
    self.block2 = nn.Sequential(nn.Conv2d(in_channels=hidden_units,
                                          out_channels=hidden_units,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1),
                                nn.ReLU(),
                                nn.Conv2d(in_channels=hidden_units,
                                          out_channels=hidden_units,
                                          kernel_size=3,
                                          stride=1,
                                          padding=1),
                                nn.ReLU(),
                                nn.MaxPool2d(kernel_size=2,
                                             stride=2))
    self.classifier = nn.Sequential(nn.Flatten(),
                                    nn.Linear(in_features=hidden_units*7*7,
                                              out_features=output_shape))

  def forward(self, x):
    x = self.block1(x)
    #print('First Sequence Shape: %s' % str(x.shape))
    x = self.block2(x)
    #print('Second Sequence Shape: %s' % str(x.shape))
    x = self.classifier(x)
    #print('Third Sequence Shape: %s' % str(x.shape))
    return x

torch.manual_seed(17)
class_names = train_data.classes
model = Model(input_shape=1, hidden_units=10, output_shape=len(class_names)).to(device)

model

Model(
  (block1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)

### Stepping throuh nn.Conv2d()

nn.Conv2d() is known as the convolutional layer

nn.MaxPool2d() is known as the pooling layer

In [None]:
torch.manual_seed(17)

#lets create a test batch and test image and see convolution in action
images = torch.randn(size=(32, 3, 64, 64)) #batch size, color channel, height, width
test_image = images[0]
images.shape, test_image.shape

(torch.Size([32, 3, 64, 64]), torch.Size([3, 64, 64]))

In [None]:
torch.manual_seed(17)
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0)
conv_layer(test_image)


tensor([[[ 0.2188,  0.1430, -0.1163,  ...,  0.5358,  0.5758,  0.6098],
         [-0.3033, -0.2936,  0.3397,  ...,  0.1682,  0.4830,  0.2618],
         [-0.2086, -0.9499, -0.1853,  ..., -0.3184,  0.2549, -0.6586],
         ...,
         [-0.3269,  0.2665,  0.0794,  ..., -1.3813, -1.3286, -0.6917],
         [-0.1882,  0.1220, -0.1316,  ..., -0.8738, -1.0345, -0.9743],
         [-0.5635, -0.0239, -0.6342,  ..., -0.3326,  0.2546,  0.5386]],

        [[-0.4617, -0.9907, -0.6248,  ..., -0.2021, -0.7061,  0.3379],
         [-0.0382, -0.3555, -0.0692,  ..., -0.6126,  0.2066,  0.3084],
         [ 0.7285,  0.1415,  0.5137,  ..., -1.3283, -0.5452,  0.1768],
         ...,
         [ 0.5590,  0.2779,  0.0404,  ..., -0.5979,  0.6375, -0.0293],
         [-0.6820,  0.5112,  0.4071,  ..., -0.2713, -1.0799, -0.1871],
         [ 0.1917, -0.1398, -0.3813,  ...,  0.1202,  0.1425, -0.8015]],

        [[ 0.9488, -0.1764, -0.2073,  ..., -0.2424,  0.8406,  0.3455],
         [ 0.1788, -0.0309, -0.6543,  ...,  0

If on PyTorch 1.11.0 or below this would error out but we dont have to worry about that. Right now our test image only has 3 dimensions: color-channels, height, width. If we needed to fix this we can unsqueeze at dimension=0.

In [None]:
test_image.unsqueeze(dim=0).shape

torch.Size([1, 3, 64, 64])

In [None]:
#notice the shape change
conv_layer(test_image.unsqueeze(dim=0)).shape

torch.Size([1, 10, 62, 62])

In [None]:
#here lets change the convolutional layer kernel size to a 5x5 square instead of a 3x3 square
conv_layer_2 = nn.Conv2d(in_channels=3,
                         out_channels=10,
                         kernel_size=(5,5),
                         stride=2,
                         padding=0)
conv_layer_2(test_image.unsqueeze(dim=0)).shape

torch.Size([1, 10, 30, 30])

Behind the scenes, our nn.Conv2d() is compressing the information stored in the image.

It does this by performing operations on the input (our test image) against its internal parameters.

The goal of this is similar to all of the other neural networks we've been building.

Data goes in and the layers try to update their internal parameters (patterns) to lower the loss function thanks to some help of the optimizer.

The only difference is how the different layers calculate their parameter updates or in PyTorch terms, the operation present in the layer forward() method.

In [None]:
conv_layer_2.weight.shape, conv_layer_2.bias.shape

(torch.Size([10, 3, 5, 5]), torch.Size([10]))

### Stepping through nn.MaxPool2d()

In [None]:
print('Test Image original shape: %s' % str(test_image.shape))
print('Test Image with unsqueezed dimension: %s' % str(test_image.unsqueeze(dim=0).shape))

maxpool_layer = nn.MaxPool2d(kernel_size=2,
                             stride=2) #the default value of stride is to match the kernel size but i like putting it for practice until i get it

#passing data through the conv layer
test_image_conv = conv_layer(test_image.unsqueeze(dim=0))
print('Shape after Conv Layer: %s' % str(test_image_conv.shape))

#passing data though the pooling layer
test_image_conv_pool = maxpool_layer(test_image_conv)
print('Shape after Pooling Layer: %s' % str(test_image_conv_pool.shape))

Test Image original shape: torch.Size([3, 64, 64])
Test Image with unsqueezed dimension: torch.Size([1, 3, 64, 64])
Shape after Conv Layer: torch.Size([1, 10, 62, 62])
Shape after Pooling Layer: torch.Size([1, 10, 31, 31])


With a kernel size of 2 and stride of 2 we can see that the pooling layer will halve the image. If we were to change the kernel size and stride then the number shape will change further too

Essentially, every layer of a neural network is trying to compress data from a higher dimensional space to a lower dimensional space

## Loss Function and Optimizer

In [None]:
# we will use CrossEntropyLoss as the loss function cuz we are doing multi-class classification
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-1) #we are using SGD here but you can also use Adam
                                                                #the difference is that you use SGD when you have a large dataset that is simpiler and you
                                                                # want to compute quickly
                                                                #use Adam when you have a large complex dataset and it will take longer to computer but
                                                                # that's cuz Adam is more complex and does more things like auto tunes the learning rate



## Training the Model using the Train and Test Functions we made earlier

In [None]:
from timeit import default_timer as timer
def print_train_time(start: float, end: float, device: torch.device = None):
    """Prints difference between start and end time.

    Args:
        start (float): Start time of computation (preferred in timeit format).
        end (float): End time of computation.
        device ([type], optional): Device that compute is running on. Defaults to None.

    Returns:
        float: time between start and end in seconds (higher is longer).
    """
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

In [None]:
torch.manual_seed(17)

from tqdm.auto import tqdm
accuracy_fn = Accuracy(task='multiclass', num_classes=len(class_names)).to(device)

train_time_start_time = timer()

epochs = 3
for epoch in tqdm(range(epochs)):
  print('Epoch: %s \n-----------' % (epoch))
  train_step(model=model,
             data_loader=train_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             accuracy_fn=accuracy_fn,
             device=device)

  test_step(model=model,
            data_loader=test_dataloader,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn,
            device=device)

train_time_end_time = timer()
total_train_time = print_train_time(start=train_time_start_time,
                                    end=train_time_end_time,
                                    device=device)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0 
-----------
Train Loss: tensor(0.6041, device='cuda:0', grad_fn=<DivBackward0>) | Train Accuracy: tensor(0.7841, device='cuda:0')
Test Loss: tensor(0.4019, device='cuda:0') | Test Accuracy: tensor(0.8582, device='cuda:0')
Epoch: 1 
-----------
Train Loss: tensor(0.3530, device='cuda:0', grad_fn=<DivBackward0>) | Train Accuracy: tensor(0.8740, device='cuda:0')
Test Loss: tensor(0.3866, device='cuda:0') | Test Accuracy: tensor(0.8605, device='cuda:0')
Epoch: 2 
-----------
Train Loss: tensor(0.3157, device='cuda:0', grad_fn=<DivBackward0>) | Train Accuracy: tensor(0.8861, device='cuda:0')
Test Loss: tensor(0.3441, device='cuda:0') | Test Accuracy: tensor(0.8787, device='cuda:0')
Train time on cuda: 41.647 seconds
