# Convolutional Neural Network

In this notebook you will compare the performance of a Fully Connected vs simple Convolutional Neural Networks.

In [None]:
import os
import time
import tqdm
import torch
import functools
import numpy as np
import torchvision
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader

In [None]:
!pip install pytorch_lightning

In [None]:
import pytorch_lightning as pl
import torchmetrics
from pytorch_lightning.utilities.seed import seed_everything

seed_everything(123)

# Start

#### Download dataset

In this notebook you are going to work with [Fashion-MNIST dataset](https://github.com/zalandoresearch/fashion-mnist). `Fashion-MNIST` is a dataset of [Zalando](https://jobs.zalando.com/tech/)'s article images—consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.


In [None]:
DATA_DIR = './data'

In [None]:
from torchvision import datasets
import torchvision.transforms as transforms


class FashionMNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir=DATA_DIR):
        super().__init__()
        
        # Hardcode some dataset specific attributes
        self.transform = transforms.Compose([transforms.ToTensor(),
                                             transforms.Normalize((0.5,), (0.5,)),
                                             ])
    
        # Set our init args as class attributes
        self.data_dir = data_dir
    
    def prepare_data(self):
        # download
        datasets.FashionMNIST(self.data_dir, train=True, download=True)
        datasets.FashionMNIST(self.data_dir, train=False, download=True)

    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
        if stage == 'fit' or stage is None:
            data_full = datasets.FashionMNIST(self.data_dir, train=True, transform=self.transform)
            self.fmnist_train, self.fmnist_val = torch.utils.data.random_split(data_full, [50000, 10000])            

        # Assign test dataset for use in dataloader
        if stage == 'test' or stage is None:
            self.fmnist_test = datasets.FashionMNIST(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        return torch.utils.data.DataLoader(self.fmnist_train, batch_size=32,
                                           shuffle=True, num_workers=2)

    def val_dataloader(self):
        return torch.utils.data.DataLoader(self.fmnist_val, batch_size=32,
                                           shuffle=False, num_workers=2)

    def test_dataloader(self):
        return torch.utils.data.DataLoader(self.fmnist_test, batch_size=32,
                                           shuffle=False, num_workers=2)

In [None]:
label2description_list = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot"
]


fmnist_datamodule = FashionMNISTDataModule()
fmnist_datamodule.prepare_data()

Lets display a few of dataset items:

In [None]:
# helper function to show an image
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5  # unnormalize
    npimg = img.cpu().numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
fmnist_datamodule.setup('fit')

dataiter = iter(fmnist_datamodule.train_dataloader())
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# show images
matplotlib_imshow(img_grid, one_channel=True)

#### Checking for available GPU device.

In [None]:
def get_free_gpu():
    from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetCount
    nvmlInit()

    return np.argmax([
        nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(i)).free
        for i in range(nvmlDeviceGetCount())
    ])


if torch.cuda.is_available():
    cuda_id = get_free_gpu()
    device = 'cuda:%d' % (get_free_gpu(),)
    print('Selected %s' % (device,))
else:
    device = 'cpu'
    print('WARNING: using cpu!')

### please, don't remove the following line
x = torch.tensor([1], dtype=torch.float32).to(device)

# Building a baseline FC-network

## Network

Let's start with a simple baseline Fully Connected network to compare with:

In [None]:
class FashionMNISTModelFC(pl.LightningModule):

    def __init__(self, learning_rate=0.01):

        super(FashionMNISTModelFC, self).__init__()
        self.save_hyperparameters()

        # Set our init args as class attributes
        self.hparams.learning_rate = learning_rate

        # Define PyTorch model layers
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = x.flatten(1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = torchmetrics.functional.accuracy(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        # Here we just reuse the validation_step for testing
        return self.validation_step(batch, batch_idx)

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer
    

By using the `Trainer` you automatically get:
1. Tensorboard logging
2. Model checkpointing
3. Training and validation loop
4. early-stopping

As in our basic tutorial, we train our model with negative log-likelihood aka crossentropy.

In [None]:
# Init out datamodule
dm = FashionMNISTDataModule()
dm.prepare_data()

# Init our model
fmnist_model = FashionMNISTModelFC().to(device)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=10, progress_bar_refresh_rate=20)

# Train the model ⚡
dm.setup(stage='fit')
trainer.fit(fmnist_model, dm)

## Testing

To test a model, call `trainer.test(model)`.

Or, if you've just trained a model, you can just call `trainer.test()` and Lightning will automatically test using the best saved checkpoint (conditioned on val_loss).

In [None]:
dm.setup(stage='test')
trainer.test(datamodule=dm)

In [None]:
from scipy.special import softmax

# helper function to show an image
def matplotlib_imshow_apply_model(img, true_label, prediction):
    plt.imshow(img, cmap="Greys")
    plt.show()
    print(f'True label: {label2description_list[true_label]}')
    print('Top 3 predictions:')
    probs = softmax(prediction.cpu().detach().numpy(), axis=None)
    top3_probs = np.argsort(probs)[-3:]
    for label in top3_probs[::-1]:
        print(f'\t{label2description_list[label]}: {probs[label]:.3f}')

dataiter = iter(dm.test_dataloader())
images, labels = dataiter.next()
predictions = fmnist_model(images)

# show images
for idx in range(images.size()[0]):
    img = images[idx, 0]
    label = labels[idx]
    prediction = predictions[idx]
    matplotlib_imshow_apply_model(img, label, prediction)

# Building a baseline CNN-network

## Network

Let's change our network to the CNN:

In [None]:
class FashionMNISTModelCNN(FashionMNISTModelFC):

    def __init__(self, data_dir=DATA_DIR, learning_rate=0.01):

        super(FashionMNISTModelCNN, self).__init__(learning_rate=learning_rate)

        # Define PyTorch model layers
        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=4,
                               kernel_size=3,
                               stride=1,
                               padding=1)
        self.fc1 = nn.Linear(28*28*4, 100)
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = x.flatten(1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [None]:
# Init our model
fmnist_model = FashionMNISTModelCNN().to(device)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=10, progress_bar_refresh_rate=20)

# Train the model ⚡
dm.setup(stage='fit')
trainer.fit(fmnist_model, dm)

## Testing

In [None]:
dm.setup(stage='test')
trainer.test(datamodule=dm)

dataiter = iter(dm.test_dataloader())
images, labels = dataiter.next()
predictions = fmnist_model(images)

# show images
for idx in range(images.size()[0]):
    img = images[idx, 0]
    label = labels[idx]
    prediction = predictions[idx]
    matplotlib_imshow_apply_model(img, label, prediction)

## Result

We used only one convolutional layer, and we got better accuracy obtained using a fully connected network (although the parameters of the convolutional network were less than that of a fully connected network - **315K** versus **454K**) 

## Task 1

Now it's your turn. Impove your CNN network to achive **0.895 accuracy** on validation.

1. add one more CNN layer (with ReLU);
2. increate the number of out channels in the second CNN layer up to 16 *(so: 4 in and 16 out)*; 
3. add a fully connected layer. You should have three consecutive FC-layers like:
  * nn.Linear(12544, 1024)
  * nn.Linear(1024, 100)
  * nn.Linear(100, 10)

In [None]:
class MySuperFashionMNISTModelCNN(FashionMNISTModelFC):

    def __init__(self, data_dir=DATA_DIR, learning_rate=0.01):

        super(MySuperFashionMNISTModelCNN, self).__init__(learning_rate=learning_rate)

        # Define PyTorch model layers
        self.conv1 = # your code here
        self.conv2 = # your code here
        self.fc1 = # your code here
        self.fc2 = # your code here
        self.fc3 = # your code here

    def forward(self, x):
        # your code here
        
        return F.log_softmax(x, dim=1)


The number of learning parameters has increased significantly, so we will increase the number of training steps to 20:

In [None]:
# Init our model
fmnist_model = MySuperFashionMNISTModelCNN().to(device)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=20, progress_bar_refresh_rate=20)

# Train the model ⚡
dm.setup(stage='fit')
trainer.fit(fmnist_model, dm)

## Task 1 testing

In [None]:
dm.setup(stage='test')
trainer.test(datamodule=dm)

dataiter = iter(dm.test_dataloader())
images, labels = dataiter.next()
predictions = fmnist_model(images)

# show images
for idx in range(images.size()[0]):
    img = images[idx, 0]
    label = labels[idx]
    prediction = predictions[idx]
    matplotlib_imshow_apply_model(img, label, prediction)

# Add Pooling, Batchnorm and Dropout

## Network

Let's add pooling, batchnorm and dropout layers. Adding a batchnorm layer allows us to increase the learning rate without losing stability. Take, for example, the learning rate 0.03 instead 0.01 (you can independently check what happens if you increase the learning rate without using a batchnorm layer).

In [None]:
class FashionMNISTModelCNNPoolingBN(FashionMNISTModelFC):

    def __init__(self, data_dir=DATA_DIR, learning_rate=0.03):

        super(FashionMNISTModelCNNPoolingBN, self).__init__(learning_rate=learning_rate)

        # Define PyTorch model layers
        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=4,
                               kernel_size=3,
                               stride=1,
                               padding=1)
        self.conv1_bn = nn.BatchNorm2d(4)
        self.fc1 = nn.Linear(784, 100)
        self.fc1_bn = nn.BatchNorm1d(100)
        self.fc2 = nn.Linear(100, 10)
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        x = F.relu(self.conv1_bn(self.conv1(x)))
        x = F.max_pool2d(x, 2)
        x = x.flatten(1)
        x = self.dropout(x)
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [None]:
# Init our model
fmnist_model = FashionMNISTModelCNNPoolingBN().to(device)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=10, progress_bar_refresh_rate=20)

# Train the model ⚡
dm.setup(stage='fit')
trainer.fit(fmnist_model, dm)

## Testing

In [None]:
dm.setup(stage='test')
trainer.test(datamodule=dm)

dataiter = iter(dm.test_dataloader())
images, labels = dataiter.next()
predictions = fmnist_model(images)

# show images
for idx in range(images.size()[0]):
    img = images[idx, 0]
    label = labels[idx]
    prediction = predictions[idx]
    matplotlib_imshow_apply_model(img, label, prediction)

## Result

Admire how more efficient convolutional networks are! We used only one convolutional layer with pooling, batchnorm and dropout. As result, we got much better accuracy obtained using a fully connected network with significantly fewer parameters (**80.8K** versus **454K**).

## Task 2

Now it's your turn again. Impove your CNN network with pooling and batchnorm to achive **0.91 accuracy** on validation.

1. add one more CNN layer with Batchnorm, ReLU and max pooling;
2. add one more FC layer with Batchnorm and ReLU; 
3. add Batchnorm to the penultimate FC-layer;
4. add Dropout layers after Flatten and FC layres (see example above).

Eventually, the network should be look like:

```
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [-1, 4, 28, 28]              40
            Conv2d-2           [-1, 16, 28, 28]             592
       BatchNorm2d-3           [-1, 16, 28, 28]              32
           Dropout-4                 [-1, 3136]               0
            Linear-5                 [-1, 1024]       3,212,288
       BatchNorm1d-6                 [-1, 1024]           2,048
           Dropout-7                 [-1, 1024]               0
            Linear-8                  [-1, 100]         102,500
       BatchNorm1d-9                  [-1, 100]             200
          Dropout-10                  [-1, 100]               0
           Linear-11                   [-1, 10]           1,010
================================================================
Total params: 3,318,710
Trainable params: 3,318,710
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.27
Params size (MB): 12.66
Estimated Total Size (MB): 12.93
----------------------------------------------------------------
```

In [None]:
class MySuperFashionMNISTModelCNNPoolingBN(FashionMNISTModelFC):

    def __init__(self, data_dir=DATA_DIR, learning_rate=0.03):

        super(MySuperFashionMNISTModelCNNPoolingBN, self).__init__(learning_rate=learning_rate)

        # Define PyTorch model layers
        self.conv1 = # your code here
        self.conv2 = # your code here
        self.conv2_bn = # your code here
        self.fc1 = # your code here
        self.fc1_bn = # your code here
        self.fc2 = # your code here
        self.fc2_bn = # your code here
        self.fc3 = # your code here
        self.dropout = # your code here

    def forward(self, x):
        # your code here
        
        return F.log_softmax(x, dim=1)


In [None]:
from torchsummary import summary

fmnist_model = MySuperFashionMNISTModelCNNPoolingBN().to(device)
summary(fmnist_model, (1, 28, 28))

In [None]:
# Init our model
fmnist_model = MySuperFashionMNISTModelCNNPoolingBN().to(device)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=20, progress_bar_refresh_rate=20)

# Train the model ⚡
dm.setup(stage='fit')
trainer.fit(fmnist_model, dm)

## Task 2 testing

In [None]:
dm.setup(stage='test')
trainer.test(datamodule=dm)

dataiter = iter(dm.test_dataloader())
images, labels = dataiter.next()
predictions = fmnist_model(images)

# show images
for idx in range(images.size()[0]):
    img = images[idx, 0]
    label = labels[idx]
    prediction = predictions[idx]
    matplotlib_imshow_apply_model(img, label, prediction)

# Conclusion

In this notebook, we switched from a fully-connected architecture to a convolutional one, which significantly increased the efficiency of the neural network. We also made sure that adding batchnorm, pooling and dropout layers can increase the training speed and reduce the amount of network weights.