In [1]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from torch.optim import SGD, Adam
from torchvision import datasets
from torchsummary import summary
import numpy as np
import matplotlib.pyplot as plt

# Global Variables

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# Sample dataset

In [3]:
X_train = torch.tensor([[[[1,2,3,4],[2,3,4,5], [5,6,7,8],[1,3,4,5]]],
                        [[[-1,2,3,-4],[2,-3,4,5], [-5,6,-7,8],[-1,-3,-4,-5]]]
                       ]).to(device).float()
X_train.shape

torch.Size([2, 1, 4, 4])

Input shape is **N×C×H×W** such that  

**N:** Batch size  
**C:** Channel numbers  
**H:** Height  
**W:** Width  

In [4]:
X_train /= X_train.max() # Scale to the range [-1, 1]
y_train = torch.tensor([0, 1]).to(device).float()

# Model Architecture

In [5]:
def get_model():
    model = nn.Sequential(
        nn.Conv2d(1, 1, kernel_size=3),
        nn.MaxPool2d(2),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(1, 1),
        nn.Sigmoid()
    ).to(device)
    loss_function = nn.BCELoss()
    optimizer = Adam(model.parameters(), lr=1e-3)
    return model, loss_function, optimizer

Summarize the architecture

In [6]:
model, loss_function, optimizer = get_model()
summary(model, X_train);

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 1, 2, 2]             10
├─MaxPool2d: 1-2                         [-1, 1, 1, 1]             --
├─ReLU: 1-3                              [-1, 1, 1, 1]             --
├─Flatten: 1-4                           [-1, 1]                   --
├─Linear: 1-5                            [-1, 1]                   2
├─Sigmoid: 1-6                           [-1, 1]                   --
Total params: 12
Trainable params: 12
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


**Conv2d has 10 parameters**: 3×3 = 9 weights + 1 bias.  
**MaxPool2d, ReLU, Flattern do not have any parameters**: Because these are just operations that we perform on top of the output of convolution layer

# Train the model

In [7]:
def train_batch(X, y, model, optimizer, loss_function):
    model.train()
    prediction = model(X)
    batch_loss = loss_function(prediction.squeeze(0), y)
    batch_loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return batch_loss.item()

In [8]:
train_dataloader = DataLoader(TensorDataset(X_train, y_train))

for epoch in range(2000):
    for batch in train_dataloader:
        X, y = batch
        batch_loss = train_batch(X, y, model, optimizer, loss_function)

In [9]:
model(X_train[:1])

tensor([[0.5310]], device='cuda:0', grad_fn=<SigmoidBackward>)

# Forward propagating the output
Note that **We don't need to perform the following steps in a real-world scenario**

In [10]:
# Extract the various layers of the model
list(model.children())

[Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1)),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 ReLU(),
 Flatten(start_dim=1, end_dim=-1),
 Linear(in_features=1, out_features=1, bias=True),
 Sigmoid()]

In [11]:
(cnn_w, cnn_b), (linear_w, linear_b) = [(layer.weight.data, layer.bias.data) for layer in list(model.children())
                                        if hasattr(layer, 'weight')]

In [12]:
h_im, w_im = X_train.shape[2:] # 0 is for batch size and 1 is for channel numbers
h_conv, w_conv  = cnn_w.shape[2:]
sumprod = torch.zeros((h_im - h_conv + 1, w_im - w_conv + 1))

Dimension of `sumprod` comes from the following formula:
$$n_{out} = [\frac{n_{in} + 2p - k}{s}] + 1$$
$n_{in}$: Number of input features  
$n_{out}$: Number of output features  
**k**: Convolution kernel size  
**p**: Convolution padding size  
**s**: Convolution stride size  

In [13]:
# Filling sumprod by convolving the filter accross the input and summing up the filter bias term

kernel_size = 3

for i in range(h_im - h_conv + 1):
    for j in range(w_im - w_conv + 1):
        img_subset = X_train[0, 0, i:(i+kernel_size), j:(j+kernel_size)]
        model_filter = cnn_w.reshape(3, 3)
        val = torch.sum(img_subset * model_filter) + cnn_b
        sumprod[i, j] = val

sumprod

tensor([[-0.2691, -0.3445],
        [-0.4178, -0.4846]])

In [14]:
# ReLU
sumprod.clamp_min_(0)

sumprod

tensor([[0., 0.],
        [0., 0.]])

In [15]:
pooling_layer_output = torch.max(sumprod) # Since it's just 2×2
pooling_layer_output

tensor(0.)

In [16]:
intermediate_output_value = pooling_layer_output * linear_w * linear_b
intermediate_output_value

tensor([[-0.]], device='cuda:0')

In [17]:
# pass the output through sigmoid
torch.sigmoid(intermediate_output_value)

tensor([[0.5000]], device='cuda:0')