# Refining covnet structure

From [exploration](./2-ja-covnet.ipynb) we have found a structure:

**784 - 32C5-32C5S2-64C5-64C5S2 - 128 - 10**

which performs reasonably well on validation data set (>99%).

This noteboook will explore variations on this structure for improvements.

## Setup

In [5]:
import os
import sys
from pathlib import Path

sys.path.append("../")
from dotenv import find_dotenv, load_dotenv
from torch.utils.data import DataLoader
import torchvision.transforms as T
import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard.writer import SummaryWriter

import logging

logger = logging.getLogger()
fhandler = logging.FileHandler(filename="mylog.log", mode="a")
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.INFO)
logger.info("Setting up logging...")


from src.utils import CustomMnistDataset, train_covnet

load_dotenv(find_dotenv())

DATA_DIR = os.getenv("DATA_DIR")
SEED = int(os.getenv("SEED"))  # type: ignore
TENSORBOARD_DIR = Path(os.getenv("TENSORBOARD_DIR"))
MODEL_DIR = Path(os.getenv("MODEL_DIR"))
MODEL_DIR.mkdir(parents=True, exist_ok=True)

augmentor = nn.Sequential(
    T.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1))
)
train_dataset = CustomMnistDataset(
    img_dir=DATA_DIR, type="train", transform=torch.jit.script(augmentor)
)
val_dataset = CustomMnistDataset(img_dir=DATA_DIR, type="validation")

BATCH_SIZE = 10
SHUFFLE = True
EPOCHS = 50
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
optimizer_wrapper = lambda x: optim.SGD(x, lr=0.01, momentum=0.90)


## **784 - 32C5-32C5S2-64C5-64C5S2- 128 - 10** with Batch normalization and Dropout. 


In [None]:
exp_name='32C5-32C5S2-64C5-64C5S2-128-10-BN-DO'
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5) # 32, 24, 24, 
        self.pool1 = nn.Conv2d(32, 32, 5, 2, padding=2) 
        self.conv2 = nn.Conv2d(32, 64, 5) 
        self.pool2 = nn.Conv2d(64, 64, 5, 2, padding=2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 10)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn1b = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn2b = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm1d(128)
        self.dropout = nn.Dropout(p=0.4)
        

    def forward(self, x):
        # layer 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn1b(self.pool1(x)))
        x = self.dropout(x) #(32, 12, 12)
        # layer 2
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn2b(self.pool2(x)))
        x = self.dropout(x) #(64, 4, 4)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.dropout(F.relu(self.bn3(self.fc1(x))))
        x = self.fc2(x)
        return x
  
train_dict = train_covnet(
    net = Net(),
    dataloader = train_dataloader,
    epochs = EPOCHS,
    optimizer_wrapper = optimizer_wrapper,
    criterion = nn.CrossEntropyLoss(),
    writer = SummaryWriter(Path(TENSORBOARD_DIR, exp_name)), #type; ignore
    val_dataloader = val_dataloader
)
torch.save(train_dict.get('model').state_dict(), Path(MODEL_DIR, exp_name) )

## **784 - 32C3-32C3-32C5S2-64C3-64C3-64C5S2- 128 - 10** with Batch normalization and Dropout. 

Replacing the 5x5 convolution layer with two 3x3.


In [None]:
exp_name='32C3-32C3-32C5S2-64C3-64C3-64C5S2-128-10-BN-DO'
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3) 
        self.conv1a = nn.Conv2d(32,32,3) # 32, 24, 24, 
        self.pool1 = nn.Conv2d(32, 32, 5, 2, padding=2) 
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv2a = nn.Conv2d(64, 64, 3) 
        self.pool2 = nn.Conv2d(64, 64, 5, 2, padding=2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 10)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn1a = nn.BatchNorm2d(32)
        self.bn1b = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn2a = nn.BatchNorm2d(64)
        self.bn2b = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm1d(128)
        self.dropout = nn.Dropout(p=0.4)

    def forward(self, x):
        # layer 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn1a(self.conv1a(x)))
        x = F.relu(self.bn1b(self.pool1(x)))
        x = self.dropout(x) #(32, 12, 12)
        # layer 2
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn2a(self.conv2a(x)))
        x = F.relu(self.bn2b(self.pool2(x)))
        x = self.dropout(x) #(64, 4, 4)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.dropout(F.relu(self.bn3(self.fc1(x))))
        x = self.fc2(x)
        return x
  
train_dict = train_covnet(
    net = Net(),
    dataloader = train_dataloader,
    epochs = EPOCHS,
    optimizer_wrapper = optimizer_wrapper,
    criterion = nn.CrossEntropyLoss(),
    writer = SummaryWriter(Path(TENSORBOARD_DIR, exp_name)), #type; ignore
    val_dataloader = val_dataloader
)
torch.save(train_dict.get('model').state_dict(), Path(MODEL_DIR, exp_name) )

## **784 - 32C5-32C5S2-64C5-64C5S2- 128 - 10** with Batch normalization and Dropout per layer

In [7]:
exp_name='32C5-32C5S2-D1-64C5-64C5S2-D2-128-D3-10-BN'
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5) # 32, 24, 24, 
        self.pool1 = nn.Conv2d(32, 32, 5, 2, padding=2) 
        self.conv2 = nn.Conv2d(32, 64, 5) 
        self.pool2 = nn.Conv2d(64, 64, 5, 2, padding=2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 10)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn1b = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn2b = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm1d(128)
        self.d1 = nn.Dropout(p=0.1)
        self.d2 = nn.Dropout(p=0.25)
        self.d3 = nn.Dropout(p=0.4)
        

    def forward(self, x):
        # layer 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn1b(self.pool1(x)))
        x = self.d1(x) #(32, 12, 12)
        # layer 2
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn2b(self.pool2(x)))
        x = self.d2(x) #(64, 4, 4)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.d3(F.relu(self.bn3(self.fc1(x))))
        x = self.fc2(x)
        return x
  
train_dict = train_covnet(
    net = Net(),
    dataloader = train_dataloader,
    epochs = EPOCHS,
    optimizer_wrapper = optimizer_wrapper,
    criterion = nn.CrossEntropyLoss(),
    writer = SummaryWriter(Path(TENSORBOARD_DIR, exp_name)), #type; ignore
    val_dataloader = val_dataloader
)
torch.save(train_dict.get('model').state_dict(), Path(MODEL_DIR, exp_name) )

## **784 - 32C5-32C5S2-64C5-64C5S2- 128C4 - 10** with Batch normalization and Dropout per layer with Test Time Augmentation.


In [9]:
exp_name='32C5-32C5S2-D1-64C5-64C5S2-D2-128C4-D3-10-BN-TTA'
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5) # 32, 24, 24, 
        self.pool1 = nn.Conv2d(32, 32, 5, 2, padding=2) 
        self.conv2 = nn.Conv2d(32, 64, 5) 
        self.pool2 = nn.Conv2d(64, 64, 5, 2, padding=2)
        self.conv3 = nn.Conv2d(64, 128, 4)
        self.bn3 = nn.BatchNorm2d(128)
        self.fc2 = nn.Linear(128, 10)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn1b = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn2b = nn.BatchNorm2d(64)
        self.d1 = nn.Dropout(p=0.1)
        self.d2 = nn.Dropout(p=0.25)
        self.d3 = nn.Dropout(p=0.4)
        self.augmentor = augmentor
        self.n_augs = 8
        

    def forward_model(self, x):
        # layer 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn1b(self.pool1(x)))
        x = self.d1(x) #(32, 12, 12)
        # layer 2
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn2b(self.pool2(x)))
        x = self.d2(x) #(64, 4, 4)
        # layer 3
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.d3(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc2(x)
        return x
    
    def forward(self, x):
        out = self.forward_model(x)
        if self.train:
            return out
        for _ in range(self.n_augs):
            out += self.forward_model(self.augmentor(x))
        return out / (self.n_augs + 1)

  
train_dict = train_covnet(
    net = Net(),
    dataloader = train_dataloader,
    epochs = EPOCHS,
    optimizer_wrapper = optimizer_wrapper,
    criterion = nn.CrossEntropyLoss(),
    writer = SummaryWriter(Path(TENSORBOARD_DIR, exp_name)), #type; ignore
    val_dataloader = val_dataloader
)
torch.save(train_dict.get('model').state_dict(), Path(MODEL_DIR, exp_name) )