<a href="https://colab.research.google.com/github/Sovit1056/Pytorch_practice/blob/main/03_v2_Computervision_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Model 2: Building a Convolutional Neural Network (CNN)

CNN's are also known ConvNets

CNN's are known for thier capabilities to find pattern in visual data.

To find out what's happening inside CNN, website: https://poloclub.github.io/cnn-explainer/

In [1]:
# Import PyTorch
import torch
from torch import nn

# import torch vision
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

# import Matplotlib for visualization
import matplotlib.pyplot as plt

In [2]:
# Setting up training data

train_data=datasets.FashionMNIST(
    root = "data",            # where to download data to?
    train = True,             # do we want the trining datasets
    download = True,          # do we want to download Y/N?
    transform = ToTensor(),   # how to transform the data? https://docs.pytorch.org/vision/stable/generated/torchvision.transforms.ToTensor.html
    target_transform = None   # how to transform the labels/tragets?
)

test_data= datasets.FashionMNIST(
    root= "data",
    train = False,
    download = True,
    transform = ToTensor(),
    target_transform = None
)

100%|██████████| 26.4M/26.4M [00:01<00:00, 14.4MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 230kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.81MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 8.99MB/s]


In [3]:
class_names= train_data.classes
class_names

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [4]:
class_to_idx = train_data.class_to_idx
class_to_idx

{'T-shirt/top': 0,
 'Trouser': 1,
 'Pullover': 2,
 'Dress': 3,
 'Coat': 4,
 'Sandal': 5,
 'Shirt': 6,
 'Sneaker': 7,
 'Bag': 8,
 'Ankle boot': 9}

In [5]:
from torch.utils.data import DataLoader

# Setup the batch size hyperparameter
BATCH_SIZE = 32

# Trun datasets into interables (batches)
train_dataloader = DataLoader( dataset=train_data,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
test_dataloader= DataLoader( dataset=test_data,
                            batch_size= BATCH_SIZE,
                             shuffle= False)

train_dataloader, test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x7c9ad1918410>,
 <torch.utils.data.dataloader.DataLoader at 0x7c9bd5df8e50>)

In [6]:
# Create a convolutional neural network

class FashionMNISTV2(nn.Module):
  """
  Model architecture that replicates the TinyVGG
  model for CNN explainer website.
  """

  def __init__(self,
                 input_shape: int,
                 hidden_units: int,
                 output_shape: int
                 ):
        super().__init__()
        self.conv_block_1 = nn.Sequential(              ### combination of different/multiple layer is known as conv_block
                                          # create the conv layer- https://docs.pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
            nn.Conv2d(in_channels=input_shape,          ### where as overal architecture are combine of multiple block
                      out_channels=hidden_units,        ### the deeper and deeper our model get it contain many blocks and more layer are within the blocks
                      kernel_size=3,     ## it can be tuple as (3,3) also its similar
                      stride=1,
                      padding=1),         # the values that we can set ourselves in our NN are called hyperparameter
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7,          ## *7*7 it is the thing that we need to know
                      out_features=output_shape)
        )

  def forward(self, x):
    x = self.conv_block_1(x)
    print(x.shape)
    x = self.conv_block_2(x)
    print(x.shape)
    x = self.classifier(x)
    return x                ## replicating the code to the website linked above


In [7]:
torch.manual_seed(42)
model_2 = FashionMNISTV2(
    input_shape=1,
    hidden_units=10,
    output_shape=len(class_names)
)

In [8]:
model_2.state_dict()

OrderedDict([('conv_block_1.0.weight',
              tensor([[[[ 0.2548,  0.2767, -0.0781],
                        [ 0.3062, -0.0730,  0.0673],
                        [-0.1623,  0.1958,  0.2938]]],
              
              
                      [[[-0.2445,  0.2897,  0.0624],
                        [ 0.2463,  0.0451,  0.1607],
                        [-0.0471,  0.2570,  0.0493]]],
              
              
                      [[[-0.1556,  0.0850, -0.1536],
                        [-0.0391, -0.1354,  0.2211],
                        [-0.2631, -0.1537, -0.0941]]],
              
              
                      [[[-0.2004,  0.0315, -0.3292],
                        [ 0.3010, -0.2832,  0.2573],
                        [ 0.0555, -0.1082,  0.2060]]],
              
              
                      [[[ 0.0520,  0.2693,  0.0364],
                        [-0.1051,  0.0896, -0.0904],
                        [ 0.1403,  0.2976,  0.1927]]],
              
              
      

### 1.1 Steeping through `nn.Conv2d()`

Docs - https://docs.pytorch.org/docs/stable/generated/torch.nn.Conv2d.html



In [9]:
torch.manual_seed(42)

# Creating the batches of images

images = torch.rand(size=(32,3,64,64))
test_image= images[0]

print(f"Image batch shape: {images.shape}\nTest or single image shape: {test_image.shape}")
test_image

Image batch shape: torch.Size([32, 3, 64, 64])
Test or single image shape: torch.Size([3, 64, 64])


tensor([[[0.8823, 0.9150, 0.3829,  ..., 0.1587, 0.6542, 0.3278],
         [0.6532, 0.3958, 0.9147,  ..., 0.2083, 0.3289, 0.1054],
         [0.9192, 0.4008, 0.9302,  ..., 0.5535, 0.4117, 0.3510],
         ...,
         [0.1457, 0.1499, 0.3298,  ..., 0.9624, 0.6400, 0.7409],
         [0.1709, 0.5797, 0.6340,  ..., 0.6885, 0.2405, 0.5956],
         [0.9199, 0.1247, 0.3573,  ..., 0.6752, 0.2058, 0.5027]],

        [[0.1458, 0.9024, 0.9217,  ..., 0.1868, 0.6352, 0.8431],
         [0.9549, 0.4435, 0.6924,  ..., 0.1168, 0.7160, 0.5462],
         [0.1616, 0.1054, 0.8614,  ..., 0.4531, 0.4736, 0.9448],
         ...,
         [0.4309, 0.3986, 0.1907,  ..., 0.9444, 0.2848, 0.3776],
         [0.7948, 0.6855, 0.1009,  ..., 0.6147, 0.7747, 0.2323],
         [0.5840, 0.9795, 0.3277,  ..., 0.3549, 0.1263, 0.1280]],

        [[0.5027, 0.4195, 0.8893,  ..., 0.3084, 0.1567, 0.7860],
         [0.7310, 0.9307, 0.2847,  ..., 0.8432, 0.8307, 0.0897],
         [0.7021, 0.5967, 0.7744,  ..., 0.8485, 0.4520, 0.

In [10]:
torch.manual_seed(42)
# Create the random single conv2d layer

conv_layer1 = nn.Conv2d(in_channels=3,
                    out_channels=10,
                    kernel_size=3,
                    stride=1,
                    padding=0)

# Pass the data through the convolutional layer

conv_layer1(test_image.unsqueeze(dim=0))  # add the extra dimension at order 0 with 1


tensor([[[[ 0.6552,  0.5381,  0.4456,  ...,  0.3679,  0.1734,  0.2343],
          [ 0.4011,  0.2772,  0.4309,  ...,  0.0474,  0.3547,  0.4406],
          [ 0.5072,  0.6963,  0.3967,  ...,  0.5034,  0.3260,  0.4359],
          ...,
          [ 0.3767,  0.4173,  0.7551,  ...,  0.5548,  0.5815,  0.2547],
          [ 0.4143,  0.6770,  0.5922,  ...,  0.7285,  0.3320,  0.5752],
          [ 0.1725,  0.3980,  0.5999,  ...,  0.4316,  0.5133,  0.3385]],

         [[ 0.0954, -0.0688, -0.1089,  ...,  0.0185, -0.4996, -0.0184],
          [-0.0188, -0.1703, -0.1344,  ..., -0.2991, -0.0339,  0.1632],
          [-0.1585, -0.0837, -0.0159,  ...,  0.0026,  0.1540, -0.1372],
          ...,
          [-0.0297, -0.0980, -0.0731,  ...,  0.0623, -0.1080, -0.0946],
          [-0.1508, -0.1934,  0.2011,  ...,  0.0415, -0.0989,  0.2556],
          [ 0.0447,  0.1020, -0.1685,  ...,  0.0908, -0.0682, -0.2266]],

         [[ 0.0098,  0.1790,  0.3092,  ...,  0.2502,  0.2292, -0.0542],
          [-0.0979,  0.1598,  

### 1.2 Stepping through `nn.MaxPool2d()

doc- https://docs.pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#maxpool2d

In [11]:
test_image.shape

torch.Size([3, 64, 64])

In [12]:
# print out orginal image shape without unsqueezed dimension
print(f"Test image orginal shape: {test_image.shape}\n")
print(f"Test image unsqueezed shape: {test_image.unsqueeze(dim=0).shape}\n")

# Sample nn.Maxpool2d layer
max_pool_layer= nn.MaxPool2d(kernel_size=2)

print(max_pool_layer(test_image.unsqueeze(dim=0)).shape)

# Pass data through just the conv_layer
test_image_through_conv = conv_layer1(test_image.unsqueeze(dim=0))
print(f"\nShape after going through conv_layer(): {test_image_through_conv.shape}")

# Pass data through the max pool layer
test_image_through_conv_and_max_pool = max_pool_layer(test_image_through_conv)
print(f"\nShape after going through conv_layer() and max_pool_layer(): {test_image_through_conv_and_max_pool.shape}")

Test image orginal shape: torch.Size([3, 64, 64])

Test image unsqueezed shape: torch.Size([1, 3, 64, 64])

torch.Size([1, 3, 32, 32])

Shape after going through conv_layer(): torch.Size([1, 10, 62, 62])

Shape after going through conv_layer() and max_pool_layer(): torch.Size([1, 10, 31, 31])
