# CNN architectures

In [39]:
import torch
import numpy as np
import PIL
import torch, torchvision, torchvision.models
import sklearn.datasets
import cv2
import bokeh, bokeh.plotting, bokeh.palettes, bokeh.transform
import ipywidgets
import IPython.display
import matplotlib.pyplot as plt


# Convolutional layer
<img src="plots/cnn_architectures/cnn_layer.png" alt="drawing" heigth="300"/>

<img src="plots/cnn_architectures/cnn_layer2.png" alt="drawing" width="600"/>

source: https://www.researchgate.net/figure/Overview-of-convolutional-layers-A-convolutional-layer-consists-of-one-or-more-channels_fig4_339170912

https://anhreynolds.com/blogs/cnn.html

In [3]:
x = np.asarray([[i for _ in range(10)] for i in range(10)], dtype=np.float32)
x = torchvision.transforms.ToTensor()(x)

In [4]:
layer = torch.nn.Conv2d(
    in_channels=1,
    out_channels=100,
    kernel_size=3,
    stride=1,
    padding=1,
    dilation=1,
    bias=True
)

layer_output = layer(x)
print(f"Shape of the input: {x.shape}")
print(f"Shape of the output: {layer_output.shape}")

Shape of the input: torch.Size([1, 10, 10])
Shape of the output: torch.Size([100, 10, 10])


# Pooling layer
<img src="plots/cnn_architectures/pooling_layer.png" alt="drawing" width="500"/>

source: https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451

In [5]:
layer = torch.nn.MaxPool2d(
    kernel_size=2,
    stride=2
)

layer_output = layer(x)
print(f"Shape of the input: {x.shape}")
print(f"Shape of the output: {layer_output.shape}")

Shape of the input: torch.Size([1, 10, 10])
Shape of the output: torch.Size([1, 5, 5])


# Fully-connected layer
<img src="plots/cnn_architectures/fully_connected_layer.png" alt="drawing" width="400"/>
<img src="plots/cnn_architectures/fully_connected_equation.png" alt="drawing" width="500"/>


source: https://www.oreilly.com/library/view/tensorflow-for-deep/9781491980446/ch04.html

In [6]:
layer = torch.nn.Linear(
    in_features=100,
    out_features=2,
    bias=True
)

x_flattened = torch.flatten(x, 1)
layer_output = layer(x_flattened)
print(f"Shape of the input: {x.shape}")
print(f"Shape of the flattened input: {x_flattened.shape}")
print(f"Shape of the output: {layer_output.shape}")

Shape of the input: torch.Size([1, 10, 10])
Shape of the flattened input: torch.Size([1, 100])
Shape of the output: torch.Size([1, 2])


# LeNet
<img src="plots/cnn_architectures/lenet.png" alt="drawing" width="1000"/>

<img src="plots/cnn_architectures/lenet2.png" alt="drawing" width="500"/>

In [7]:
class OurLenNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 6, 5, 1, 2)
        self.pool = torch.nn.AvgPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(6, 16, 5)
        self.fc1 = torch.nn.Linear(16 * 5 * 5, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.nn.functional.sigmoid(self.conv1(x)))
        x = self.pool(torch.nn.functional.sigmoid(self.conv2(x)))
        x = torch.flatten(x)
        x = torch.nn.functional.sigmoid(self.fc1(x))
        x = torch.nn.functional.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

num_trainable_params = sum(p.numel() for p in OurLenNet().parameters() if p.requires_grad)
print(f'Number of parameters: {num_trainable_params}')

Number of parameters: 61706


In [8]:
x28 = torchvision.transforms.Resize((28, 28), antialias=False)(x)
model = OurLenNet()
model_output = model.forward(x28)
probabilities = torch.nn.functional.softmax(model_output, dim=0).detach().numpy()
print(probabilities)

[0.06747673 0.09090362 0.0965177  0.12375745 0.08202932 0.11759167
 0.13666739 0.0929094  0.08581796 0.10632885]


# AlexNet

- stacked convolutional layers
- dropout

<img src="plots/cnn_architectures/alexnet2.png" alt="drawing" width="800"/>

<img src="plots/cnn_architectures/alexnet.png" alt="drawing" width="500"/>

In [9]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
print(f'Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
model.eval()

Using cache found in C:\Users\jbilski/.cache\torch\hub\pytorch_vision_v0.10.0


Number of parameters: 61100840


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

# VGG

- 3x3 filters

In [14]:
model = torchvision.models.vgg16(pretrained=True)
print(f'Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
model.eval()



Number of parameters: 138357544


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

<img src="plots/cnn_architectures/vgg3x3.png" alt="drawing" width="500"/>

source: https://towardsdatascience.com/the-w3h-of-alexnet-vggnet-resnet-and-inception-7baaaecccc96#:~:text=VGG16%20has%20a%20total%20of,with%20a%20stride%20of%20two.

# ResNet

- residual connections
- batch normalization

In [79]:
model = torchvision.models.resnet152(pretrained=True)
print(f'Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
model.eval()

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to C:\Users\jbilski/.cache\torch\hub\checkpoints\resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:10<00:00, 22.7MB/s] 


Number of parameters: 60192808


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# Inception

- multiple filter sizes in a single layer

<img src="plots/cnn_architectures/inception.png" alt="drawing" width="700"/>

<img src="plots/cnn_architectures/googlenet.png" alt="drawing" width="1200"/>

In [71]:
model = torchvision.models.googlenet(pretrained=True)
print(f'Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
model.eval()

Number of parameters: 6624904




GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

<img src="plots/cnn_architectures/acccomparison.png" alt="drawing" width="900"/>

# DeepDream

<img src="plots/cnn_architectures/deepdream.jpg" alt="drawing" width="600"/>

source: https://github.com/PJ-Finlay/pytorch-deepdream