# Convolutional Neural Network (CNN)

A convolutional neural network (CNN) is a regularized type of feed-forward neural network that learns features by itself via filter (or kernel) optimization. Vanishing gradients and exploding gradients, seen during backpropagation in earlier neural networks, are prevented by using regularized weights over fewer connections.

For example, for each neuron in the fully-connected layer, 10,000 weights would be required for processing an image sized 100 × 100 pixels. However, applying cascaded convolution (or cross-correlation) kernels, only 25 neurons are required to process 5x5-sized tiles. Higher-layer features are extracted from wider context windows, compared to lower-layer features.

Some documentation
- [Wikipedia - CNN](https://en.wikipedia.org/wiki/Convolutional_neural_network)


# Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Model

In [2]:
# Create the model
class CNN(nn.Module):
  def __init__(self, in_channels = 1, num_classes = 10):  # 28x28 = 784
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1)) # 28x28
    self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)) # 14x14
    self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1))
    self.fc1 = nn.Linear(16*7*7, num_classes)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool(x)
    x = F.relu(self.conv2(x))
    x = self.pool(x)
    x = x.reshape(x.shape[0], -1)
    x = self.fc1(x)
    return x

# Check if it gives the correct shapes for some random data
model = CNN()
x = torch.rand(64, 1, 28, 28) # mini batch size x features
print(model(x).shape) # he hope to have the number of examples x the number of classes (64x10)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Hyperparameter
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 5 

torch.Size([64, 10])
cpu


$$n_{out}=
\lfloor
\frac{n_{in}+2p-k}{s}+1
\rfloor
=
\lfloor
\frac{28+2-3}{1}+1
\rfloor
=
28
$$
where:

$n_{in}$ : number of input features

$n_{out}$ : number of output features

$k$ : convolution kernel size

$p$ : convolution padding size

$s$ : convolution stride size

# Load Data

In [7]:
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

# Training

In [4]:
# Intialize NN
# model = CNN(input_size=input_size, num_classes=num_classes).to(device)
model = CNN().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the NN
for epoch in range(num_epochs):
  for batch_idx, (data, targets) in enumerate(train_loader):
    # Get data to device
    data = data.to(device=device)
    targets = targets.to(device=device)

    # Forward propagation
    scores = model(data)
    loss = criterion(scores, targets)

    # Backward propagation
    optimizer.zero_grad() # initialize all gradients to zero for each batch
    loss.backward()

    # Gradient descent or Adam step
    optimizer.step()

# Performance

In [5]:
# Check accuracy on training and test sets
def check_accuracy(loader, model):
  if loader.dataset.train:
    print("Checking accuracy on training data")
  else:
    print("Checking accuracy on test data")
  num_correct = 0
  num_samples = 0
  model.eval()

  with torch.no_grad():
    for x, y in loader:
      x = x.to(device=device)
      y = y.to(device=device)

      scores = model(x)
      _, predictions = scores.max(1)  # scores is 64x10 and we want to know which one of those the is the maximum value, so in max: dim=1
      num_correct += (predictions == y).sum()
      num_samples += predictions.size(0)

    print(f'got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')

  model.train()

In [6]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
got 58966 / 60000 with accuracy 98.28
Checking accuracy on test data
got 9817 / 10000 with accuracy 98.17
