# Arquitecture Experiments 

En este notebook probaremos varias arquitecturas  y haremos experimentos para encontrar la mejor arquitectura.

Para estos esperimentos nos basaremos en el articulo de [25 Million Images! [0.99757] MNIST](https://www.kaggle.com/code/cdeotte/25-million-images-0-99757-mnist/notebook) que nos muestra algunos ejemplos de arquitectura y nos referencia a otras fuentes con modelos que funcionan muy bien para la tarea de clasificación. Para los experimentos nos basaremos en el articulo [How to choose CNN Architecture MNIST](https://www.kaggle.com/code/cdeotte/how-to-choose-cnn-architecture-mnist/) que se encuentra referenciado en el primer articulo; usaremos las conclusiones que llegarón, los replicaremos con nuestro conjunto de datos y plantearemos nuevos experimentos.


Las arquitecturas las plantearemos en base a los experimentos y las arquitecturas que ya han sido construidas.

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Packages-and-Functions" data-toc-modified-id="Packages-and-Functions-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Packages and Functions</a></span><ul class="toc-item"><li><span><a href="#Packages" data-toc-modified-id="Packages-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Packages</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Functions</a></span></li></ul></li><li><span><a href="#Project-Config" data-toc-modified-id="Project-Config-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Project Config</a></span><ul class="toc-item"><li><span><a href="#Configuring-GPU-or-CPU-usage-based-on-available-resources" data-toc-modified-id="Configuring-GPU-or-CPU-usage-based-on-available-resources-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Configuring GPU or CPU usage based on available resources</a></span></li><li><span><a href="#Seed-Config" data-toc-modified-id="Seed-Config-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Seed Config</a></span></li></ul></li><li><span><a href="#Creating-Dataset-Structure" data-toc-modified-id="Creating-Dataset-Structure-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Creating Dataset Structure</a></span><ul class="toc-item"><li><span><a href="#Load-dataset" data-toc-modified-id="Load-dataset-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Load dataset</a></span></li><li><span><a href="#Constantes-del-proyecto" data-toc-modified-id="Constantes-del-proyecto-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Constantes del proyecto</a></span></li><li><span><a href="#Manejo-de-imagenes" data-toc-modified-id="Manejo-de-imagenes-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Manejo de imagenes</a></span></li></ul></li><li><span><a href="#Experiments" data-toc-modified-id="Experiments-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Experiments</a></span><ul class="toc-item"><li><span><a href="#How-many-convolution-subsambling-pairs" data-toc-modified-id="How-many-convolution-subsambling-pairs-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>How many convolution-subsambling pairs</a></span></li></ul></li></ul></div>

## Packages and Functions

In [1]:
CUDA_LAUNCH_BLOCKING="1"

### Packages

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import typing as ty
from numpy.typing import NDArray
import os

from skimage import io
from torchvision.transforms.functional import resize
import cv2
import torchvision



from sklearn.model_selection import train_test_split


import torch
from torch import nn
from torch.nn import CrossEntropyLoss
from torch.utils.data import Dataset
from torchsummary import summary
from torch import Tensor
import torch.nn.functional as F




### Functions

In [30]:
sys.path.append("custom_utils/")

In [31]:
from bounding_boxes import MNISTDataset, normalize_bbox, draw_predictions, draw_bbox
from models import FeatureExtractor, ClassificationHead, RegressionHead, train
from constants import load_config

In [32]:
import importlib

importlib.reload(sys.modules["bounding_boxes"])
importlib.reload(sys.modules["models"])
importlib.reload(sys.modules["constants"])

<module 'constants' from '/workspace/study/datascience_master/Analytics Fundamentals 2/challenges/second_challange/constants.py'>

## Project Config

### Configuring GPU or CPU usage based on available resources

In [33]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device}')

Using cuda


In [34]:
test = torch.ones((100, 100)).to(device)
del test
torch.cuda.empty_cache()

### Seed Config

In [35]:
seed = 42

torch.backends.cudnn.enabled = True
torch.manual_seed(seed)
np.random.seed(seed)

if(device == 'cuda'):
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

## Creating Dataset Structure

### Load dataset

In [36]:
df = pd.read_csv("datasets/train.csv")

In [37]:
df.rename(columns={"class": "class_id"}, inplace=True)

In [38]:
train_df, test_df = train_test_split(
    df, stratify=df['class_id'], test_size=0.15, random_state=seed
)

### Constantes del proyecto

In [39]:
config_dict = load_config()

### Manejo de imagenes

In [40]:
transform_func_inp_signature = ty.Dict[str, NDArray[np.float_]]

transform_func_signature = ty.Callable[
    [transform_func_inp_signature],
    transform_func_inp_signature
]

In [41]:
class MNISTDataset(Dataset):
    """
    Location MNIST dataset.

    This class is used to load the MNIST dataset and to apply the transformations to the images.

    Source: https://www.kaggle.com/code/sebastingarcaacosta/tutoria-1
    
    Args:
        df (pandas.DataFrame): DataFrame with the data.
        root_dir (string): Root directory of dataset where directory
            ``train`` and  ``test`` exist.
        labeled (bool): If True, the dataset is labeled.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version.
    """
    def __init__(
        self, 
        df: pd.DataFrame, 
        root_dir: str, 
        labeled: bool = True,
        transform: ty.Optional[ty.List[transform_func_signature]] = None,
        gray: bool= False
    ) -> None:
        self.df = df
        self.root_dir = root_dir
        self.transform = transform
        self.labeled = labeled
        self.gray = gray
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx: int) -> transform_func_signature: 
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Read image
        img_name = os.path.join(self.root_dir, self.df.filename.iloc[idx])
        image = io.imread(img_name)
        
        # Convert from gray to RGB
        if not self.gray:
            image = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)

        sample = {'image': image}
        
        if self.labeled:
            # Read labels
            img_class = self.df.class_id.iloc[idx]
            img_bbox = self.df.iloc[idx, 2:]

            img_bbox = np.array([img_bbox]).astype('float')
            img_class = np.array([img_class]).astype('int')
            sample.update({'bbox': img_bbox, 'class_id': img_class})
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [42]:
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""
    
    def __init__(self, img_size, gray: False):
        self.img_size = img_size
        self.gray = gray

    def __call__(self, sample):
        image = sample['image']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        if(self.gray):
            image = np.expand_dims(image, axis=2)
            
        image = image.transpose((2, 0, 1))
        image = torch.from_numpy(image).float()
        image = resize(image, (self.img_size, self.img_size))
        sample.update({'image': image})
        return sample


In [43]:
gray_images = True

In [44]:
train_ds = MNISTDataset(train_df, root_dir=config_dict['train_images_dir'], gray=gray_images, transform=ToTensor(img_size=config_dict['img_size'], gray=gray_images))
train_data = torch.utils.data.DataLoader(train_ds, batch_size=config_dict['batch_size'])

test_ds = MNISTDataset(test_df, root_dir=config_dict['train_images_dir'], gray=gray_images, transform=ToTensor(img_size=config_dict['img_size'], gray=gray_images))
test_data = torch.utils.data.DataLoader(test_ds, batch_size=config_dict['batch_size'])

for x in train_data:
    print(x['image'].size())
    break

torch.Size([32, 1, 28, 28])


In [45]:
train_data

<torch.utils.data.dataloader.DataLoader at 0x7f605ffbbd00>

## Experiments

### How many convolution-subsambling pairs

In [46]:
nets = 3
models = [0] *nets

for j in range(nets): 
    modules = []
    modules.append(nn.Conv2d(1, 24, kernel_size=5, padding=(2,2)))
    modules.append(nn.ReLU())
    modules.append(nn.MaxPool2d(kernel_size=5))
    out_shape = 600

    if(j > 0):
        modules.append(nn.Conv2d(24, 48, kernel_size=5, padding=(2,2)))
        modules.append(nn.ReLU())
        modules.append(nn.MaxPool2d(kernel_size=5))
        out_shape = 48
    if(j > 1):
        modules.append(nn.Conv2d(48, 64, kernel_size=5, padding=(2,2)))
        modules.append(nn.ReLU())
        modules.append(nn.MaxPool2d(kernel_size=5, padding=(2,2)))
        out_shape = 64
    modules.append(nn.Flatten()) 
    model = nn.Sequential(*modules)    
    models[j] = {'model': model, 'output_shape': out_shape}


    
#     model[j] = Sequential()
#     model[j].add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
#             input_shape=(28,28,1)))
#     model[j].add(MaxPool2D())
#     if j>0:
#         model[j].add(Conv2D(48,kernel_size=5,padding='same',activation='relu'))
#         model[j].add(MaxPool2D())
#     if j>1:
#         model[j].add(Conv2D(64,kernel_size=5,padding='same',activation='relu'))
#         model[j].add(MaxPool2D(padding='same'))
#     model[j].add(Flatten())
#     model[j].add(Dense(256, activation='relu'))
#     model[j].add(Dense(10, activation='softmax'))
#     model[j].compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [47]:
names = ["(C-P)x1","(C-P)x2","(C-P)x3"]

for j in range(nets):
    print(names[j])
    summary(models[j]['model'].to(device), input_size=(1,28,28))

(C-P)x1
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 28, 28]             624
              ReLU-2           [-1, 24, 28, 28]               0
         MaxPool2d-3             [-1, 24, 5, 5]               0
           Flatten-4                  [-1, 600]               0
Total params: 624
Trainable params: 624
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.30
Params size (MB): 0.00
Estimated Total Size (MB): 0.30
----------------------------------------------------------------
(C-P)x2
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 28, 28]             624
              ReLU-2           [-1, 24, 28, 28]               0
         MaxPool2d-3             

In [48]:
def loss_fn(
    y_true, 
    y_preds, 
    cls_loss_fn: CrossEntropyLoss, 
    bbox_loss_fn: ty.Callable[[ty.Dict[str, torch.Tensor]], torch.Tensor],  
    alpha: float = 0.5
):
    cls_y_true, cls_y_pred = y_true['class_id'].long(), y_preds['class_id'].float().unsqueeze(-1)
    reg_y_true, reg_y_pred = y_true['bbox'].float().squeeze(), y_preds['bbox'].float().squeeze()
    
    cls_loss = F.cross_entropy(cls_y_pred, cls_y_true)
    reg_loss = F.mse_loss(reg_y_pred, reg_y_true)
    
    # Adds weights to both tasks
    total_loss = (1 - alpha) * cls_loss + alpha * reg_loss
    return dict(loss=total_loss, reg_loss=reg_loss,cls_loss=cls_loss)

In [49]:
def iou(y_true: Tensor, y_pred: Tensor):
    pairwise_iou = torchvision.ops.box_iou(y_true.squeeze(), y_pred.squeeze())
    result = torch.trace(pairwise_iou) / pairwise_iou.size()[0]
    return result

In [50]:
def accuracy(y_true: Tensor, y_pred: Tensor):
    pred = torch.argmax(y_pred, axis=-1)
    y_true = y_true.squeeze()
    correct = torch.eq(pred, y_true).float()
    total = torch.ones_like(correct)
    result = torch.divide(torch.sum(correct), torch.sum(total))
    return result

In [51]:
def printer(logs: ty.Dict[str, ty.Any]):
    # print every 5 steps
    if logs['iters'] % 5 != 0:
        return
    print('Iteration #: ',logs['iters'])
    for name, value in logs.items():
        if name == 'iters':
            continue
        
        if type(value) in [float, int]:
            value = round(value, 4)
        elif type(value) is torch.Tensor:
            value = value.detach().cpu().numpy()
            value = np.round(value, 4)
            value = torch.from_numpy(np.asarray(value))
            value.to(device)
            
#             value = torch.round(value, decimals=4)
        
        print(f'\t{name} = {value}')
    print()

In [52]:
class RegressionHead(nn.Module):
    """Regression head for the model."""
    def __init__(self, input_size: int):
        super().__init__()
        self.input_size = input_size
        self.model = nn.Sequential(
#             nn.Linear(self.input_size, 768),
#             nn.ReLU(),
            nn.Linear(self.input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 4)
        )
    
    def forward(self, x):
        return self.model(x)

In [53]:
class ClassificationHead(nn.Module):
    """Classification head for the model."""
    def __init__(self, input_size: int, n_classes: int):
        super().__init__()
        self.input_size = input_size
        self.n_classes = n_classes
        
        self.model = nn.Sequential(
            nn.Linear(in_features=input_size, out_features=256),
            nn.ReLU(),
            nn.Linear(256, config_dict['num_classes']),
            nn.Softmax()
        )
    
    def forward(self, x):
        return self.model(x)

In [54]:
class Model(nn.Module):
    def __init__(self, backbone: FeatureExtractor, classifier: ClassificationHead, regressor: RegressionHead):
        super().__init__()
        self.backbone = backbone
        self.cls_head = classifier
        self.reg_head = regressor
        
    def forward(self, x):
        features = self.backbone(x)
        cls_logits = self.cls_head(features)
        pred_bbox = self.reg_head(features)
        predictions = {'bbox': pred_bbox, 'class_id': cls_logits}
        return predictions

In [55]:
trained_models = [0] * nets
for j in range(nets):
    feature_extractor_model =  models[j]['model'].to(device)
    classifier = ClassificationHead(input_size=models[j]['output_shape'], n_classes=config_dict['num_classes']).to(device)
    regressor = RegressionHead(input_size=models[j]['output_shape']).to(device)
    model = Model(feature_extractor_model, classifier, regressor).to(device)
    
    optimizer = torch.optim.Adam(lr=config_dict['learning_rate'], params=models[j]['model'].parameters(), weight_decay=config_dict['weight_decay'])

    trained_models[j] =  train(
        model,
        optimizer,
        train_data,
        eval_datasets=[('val', test_data)],
        loss_fn=loss_fn,
        metrics={
            'bbox': [('iou', iou)],
            'class_id': [('accuracy', accuracy)]
        },
        callbacks=[printer],
        device=device,
        train_steps=30,
        eval_steps=1,
        alpha=0.5
    ) 

  input = module(input)


Iteration #:  0
	train_loss = 19.970699310302734
	train_reg_loss = 37.71860122680664
	train_cls_loss = 2.2228000164031982
	train_iou = 0.0
	train_accuracy = 0.21879999339580536
	val_loss = 18.432100296020508
	val_reg_loss = 34.496700286865234
	val_cls_loss = 2.3673999309539795
	val_iou = 0.0
	val_accuracy = 0.0625

Iteration #:  5
	train_loss = 20.339500427246094
	train_reg_loss = 38.38479995727539
	train_cls_loss = 2.2941999435424805
	train_iou = 0.0001
	train_accuracy = 0.15620000660419464
	val_loss = 18.41670036315918
	val_reg_loss = 34.46609878540039
	val_cls_loss = 2.3673999309539795
	val_iou = 0.0
	val_accuracy = 0.0625

Iteration #:  10
	train_loss = 22.706300735473633
	train_reg_loss = 43.1161003112793
	train_cls_loss = 2.2964000701904297
	train_iou = 0.0011
	train_accuracy = 0.15620000660419464
	val_loss = 18.394399642944336
	val_reg_loss = 34.42129898071289
	val_cls_loss = 2.367500066757202
	val_iou = 0.0
	val_accuracy = 0.0625

Iteration #:  15
	train_loss = 14.3817996978759