# Testing Model from Segmentation Model

**Objectif :** le but de ce notebook est de tester le modèle unet simple et les différents modèles disponibles depuis la librairie segmentation_modèle.pytorch. 

### Root Variables 

In [1]:
import os 

In [2]:
root = '/home/ign.fr/ttea/Code_IGN/AerialImageDataset'
train_dir = os.path.join(root,'train/images')
gt_dir = os.path.join(root,'train/gt')
test_dir = os.path.join(root,'test/images')

In [3]:
import sys 

In [4]:
sys.path.insert(0, '/home/ign.fr/ttea/stage_segmentation_2021/Code')

In [5]:
from dataloader.dataloader import InriaDataset
from model.model import UNet
from train import train_segmentation, eval_segmentation, train_full_segmentation, train, eval, train_full
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn

### Import Libraries 

In [6]:
import pandas as pd 

import torch
import torch.nn.functional as F
import torch.nn as nn
from prettytable import PrettyTable

In [7]:
var= pd.read_json('variables.json')

## Inria Dataset

In [8]:
tile_size = (512,512)
train_dataset = InriaDataset(var['variables']['root'],tile_size,'train',None,False,1)
val_dataset = InriaDataset(var['variables']['root'],tile_size,'validation',None,False,1)

## U-Net Model

![title](../img/Unet.png)

For Unet Model, the architecture will be based on this link : 
https://towardsdatascience.com/understanding-semantic-segmentation-with-unet-6be4f42d4b47

## Import Segmentation Model 

https://github.com/qubvel/segmentation_models

### UNet

In [9]:
unet = smp.Unet(
    in_channels=3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=2,                      # model output channels (number of classes in your dataset)
)

In [10]:
print(unet)

Unet(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

### Resnet

In [11]:
resnet = smp.Unet(
    encoder_name="resnet18",        # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",     # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=2,                      # model output channels (number of classes in your dataset)
)

In [12]:
print(resnet)

Unet(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

### EfficientNet

In [13]:
efficientnet = smp.Unet(
    encoder_name="efficientnet-b0",        # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",     # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=2,                      # model output channels (number of classes in your dataset)
)

In [14]:
print(efficientnet)

Unet(
  (encoder): EfficientNetEncoder(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_

### Arguments & Hyperparamètres 

In [15]:
hparam = {
    'lr':0.0001,
    'n_epoch':40,
    'n_epoch_test':int(5),
    'n_class':2,
    'batch_size':8,
    'n_channel':3,
    'conv_width':[16,32,64,128,256,128,64,32,16],
}

In [16]:
tile_size = (512,512)

weights = [0.5, 1.0]
class_weights = torch.FloatTensor(weights).cuda()

args = {
    #'nn_loss':nn.BCEWithLogitsLoss(reduction="mean"),
    #'nn_loss':nn.CrossEntropyLoss(weight = class_weights,reduction="mean"),
    #'nn_loss':BinaryDiceLoss,
    #'loss_name':'BinaryDiceLoss',
    # 'loss_name': 'BinaryCrossentropy',
    'loss_name': 'Crossentropy',
    'threshold':0.5,
    'cuda':1,
    'class_names':['None','Batiment'],
    'save_model':False,
    'save_model_name':"unet_test_8_1.pth",
    'train_dataset':InriaDataset(var['variables']['root'],tile_size,'train',None,False,1),
    'val_dataset':InriaDataset(var['variables']['root'],tile_size,'validation',None,False,1),
    'model_name': 'Resnet18',
}

### Training Segmentation Model 

In [18]:
model = {'UNet':unet,'Resnet18':resnet,'EfficientNet':efficientnet}

In [20]:
#trained_model, metric_train,metric_test = train_full_segmentation(args, model['Resnet18'],hparam['lr'],hparam['n_epoch'],
#                                    hparam['n_epoch_test'],hparam['batch_size'],hparam['n_class'],
#                                    hparam['n_channel'])

### Modèles - Nombre de paramètres 

https://towardsdatascience.com/understanding-and-calculating-the-number-of-parameters-in-convolution-neural-networks-cnns-fc88790d530d

https://openaccess.thecvf.com/content_CVPRW_2020/papers/w22/Beheshti_Squeeze_U-Net_A_Memory_and_Energy_Efficient_Image_Segmentation_Network_CVPRW_2020_paper.pdf

In [21]:
def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

Pour le calcul des paramètres sur chaque convolution, on définit la formule suivante : 

Conv = (width * height * nombre de filtres dans la couche précédente) * nombre de filtres dans la couche actuelle

Dans notre cas (width & height) sont définis dans la kernel size et le nombre de filtres sont les entrées et sorties des canaux dans la conv2d. 

### ResNet Paramètres 

![title](../img/resnet_archi.png)

**Resnet18 Architecture :** https://medium.com/@14prakash/understanding-and-implementing-architectures-of-resnet-and-resnext-for-state-of-the-art-image-cf51669e1624

$C1 = (7*7*3)*64 = 9408$

$b1 = 64 $

$C2.1 = (3*3*64)*64 = 36 864$

$b2.1 = 64 $

$C2.2 = (3*3*64)*64 = 36 864$

$b2.2 = 64 $

$C3.1 = (3*3*64)*128 = 73 728$

$b3.1 = 128 $

$C3.2 = (3*3*128)*128 = 147 456$

$b3.2 = 128 $

$C4.1 = (3*3*128)*256 = 294 912$

$b4.1 = 256 $

$C4.2 = (3*3*256)*256 = 589 824$

$b4.2 = 256 $

$C5.1 = (3*3*256)*512 = 1 179 648$

$b5.1 = 512 $

$C5.2 = (3*3*512)*512 = 2 359 296 $

$b5.2 = 512 $

**Nombre de paramètres pour l'encodeur Resnet18**

In [22]:
9408+64

9472

In [23]:
resnet_param = [9408,64,36864,64,36864,64,73728,128,147456,128,294912,256,589824,256,1179648,512,2359296,512]

In [24]:
2359296+512

2359808

In [25]:
resnet_encoder_param = sum(resnet_param)
resnet_encoder_param

4729984

<p style="text-align: center;"> <b>Nombre de paramètres pour Resnet18 </b></p>

| Number  | Modules     | In/Out Channel | # Parameters  |
| --------|-----|---------------|-------|
| 1 |Conv1 (f=7) |(3,64) | 9472 |
| 2 |Conv2.1 (f=3) |(64,64) | 36928 |
| 3 |Conv2.2 (f=3) |(64,64) | 36928 |
| 4 |Conv3.1 (f=3) |(64,128) | 73856 |
| 5 |Conv3.2 (f=3) |(128,128)| 147584 |
| 6 |Conv4.1 (f=3) |(128,256)| 295168 |
| 7 |Conv4.2 (f=3) |(256,256)| 590080 |
| 8 |Conv5.1 (f=3) |(256,512)| 1180160|
| 9 |Conv5.2 (f=3) |(512,512)| 2359808|

f : nombre de filtres (largeur, hauteur)

Pour chaque opération de pooling, on a 0 paramètre car cela calcule qu'un nombre spécifique et on n'utilise pas la rétropropagation du gradient pour l'apprentissage.

**Nombre total de paramètres Resnet18**

In [26]:
count_parameters(resnet)

+--------------------------------------+------------+
|               Modules                | Parameters |
+--------------------------------------+------------+
|         encoder.conv1.weight         |    9408    |
|          encoder.bn1.weight          |     64     |
|           encoder.bn1.bias           |     64     |
|    encoder.layer1.0.conv1.weight     |   36864    |
|     encoder.layer1.0.bn1.weight      |     64     |
|      encoder.layer1.0.bn1.bias       |     64     |
|    encoder.layer1.0.conv2.weight     |   36864    |
|     encoder.layer1.0.bn2.weight      |     64     |
|      encoder.layer1.0.bn2.bias       |     64     |
|    encoder.layer1.1.conv1.weight     |   36864    |
|     encoder.layer1.1.bn1.weight      |     64     |
|      encoder.layer1.1.bn1.bias       |     64     |
|    encoder.layer1.1.conv2.weight     |   36864    |
|     encoder.layer1.1.bn2.weight      |     64     |
|      encoder.layer1.1.bn2.bias       |     64     |
|    encoder.layer2.0.conv1.

14328354

### EfficientNet Paramètres 

https://datamonje.medium.com/image-classification-with-efficientnet-better-performance-with-computational-efficiency-f480fdb00ac6

argument_block = [ 
BlockArgs(kernel_size=3, num_repeat=1, input_filters=32, output_filters=16, expand_ratio=1, id_skip=True, strides=[1, 1], se_ratio=0.25),

BlockArgs(kernel_size=3, num_repeat=2, input_filters=16, output_filters=24, expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25),

BlockArgs(kernel_size=5, num_repeat=2, input_filters=24, output_filters=40, expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25),

BlockArgs(kernel_size=3, num_repeat=3, input_filters=40, output_filters=80, expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25),

BlockArgs(kernel_size=5, num_repeat=3, input_filters=80, output_filters=112, expand_ratio=6, id_skip=True, strides=[1, 1], se_ratio=0.25),

BlockArgs(kernel_size=5, num_repeat=4, input_filters=112, output_filters=192, expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25),

BlockArgs(kernel_size=3, num_repeat=1, input_filters=192, output_filters=320, expand_ratio=6, id_skip=True, strides=[1, 1], se_ratio=0.25)
]

### MBConv param 

$MB1 = (3*3*32+1)*16 = 4624$

$MB2.1 = (3*3*16+1)*24 = 3480$

$MB2.2 = (3*3*24+1)*24 = 5208$

$MB3.1 = (3*3*24+1)*40 = 8680$

$MB3.2 = (3*3*40+1)*40 = 14440$

$MB4.1 = (3*3*40+1)*80 = 28880$

$MB4.2 = (3*3*80+1)*80 = 57680$

$MB4.3 = (3*3*80+1)*80 = 57680$

$MB5.1 = (3*3*80+1)*112 = 80752$

$MB5.2 = (3*3*112+1)*112 = 113008$

$MB5.3 = (3*3*112+1)*112 = 113008$

$MB6.1 = (3*3*112+1)*192 = 193728$

$MB6.2 = (3*3*192+1)*192 = 331968$

$MB6.3 = (3*3*192+1)*192 = 331968$

$MB7.1 = (3*3*192+1)*320 = 553280$

$MB7.2 = (3*3*320+1)*320 = 921920$

$MB7.3 = (3*3*320+1)*320 = 921920$

<p style="text-align: center;"> <b>Nombre de paramètres pour EfficientNet </b></p>

| Number  | Modules     | In/Out Channel | # Parameters  |
| --------|-----|---------------|-------|
| 1 |MB1 (f=3) |(32,16) | 4624 |
| 2 |MB2.1 (f=3) |(16,24) | 3480 |
| 3 |MB2.2 (f=3) |(24,24) | 5208 |
| 4 |MB3.1 (f=3) |(24,40) | 8680 |
| 5 |MB3.2 (f=3) |(40,40)| 14440 |
| 6 |MB4.1 (f=3) |(40,80)| 28880 |
| 7 |MB4.2 (f=3) |(80,80)| 57680 |
| 8 |MB4.3 (f=3) |(80,80)| 57680|
| 9 |MB5.1 (f=3) |(80,112)| 80752|
| 10 |MB5.2 (f=3) |(112,112)| 113008|
| 11 |MB5.3 (f=3) |(112,112)| 113008|
| 12 |MB6.1 (f=3) |(112,192)| 193728|
| 13 |MB6.2 (f=3) |(192,192)| 331968|
| 14 |MB6.3 (f=3) |(192,192)| 331968|
| 15 |MB7.1 (f=3) |(192,320)| 553280|
| 16 |MB7.2 (f=3) |(320,320)| 921920|
| 17 |MB7.3 (f=3) |(320,320)| 921920|

**Nombre de paramètres pour l'encodeur efficientnet**

In [27]:
mbconv_param = [4624,3480,5208,8680,14440,28880,57680*2,80752,113008*2,193728,331968*2,553280,921920]

In [28]:
efficientnet_encode_param = sum(mbconv_param)
efficientnet_encode_param

2820304

**Nombre total de paramètres pour EfficientNet**

In [29]:
count_parameters(efficientnet)

+-------------------------------------------+------------+
|                  Modules                  | Parameters |
+-------------------------------------------+------------+
|         encoder._conv_stem.weight         |    864     |
|            encoder._bn0.weight            |     32     |
|             encoder._bn0.bias             |     32     |
|  encoder._blocks.0._depthwise_conv.weight |    288     |
|       encoder._blocks.0._bn1.weight       |     32     |
|        encoder._blocks.0._bn1.bias        |     32     |
|    encoder._blocks.0._se_reduce.weight    |    256     |
|     encoder._blocks.0._se_reduce.bias     |     8      |
|    encoder._blocks.0._se_expand.weight    |    256     |
|     encoder._blocks.0._se_expand.bias     |     32     |
|   encoder._blocks.0._project_conv.weight  |    512     |
|       encoder._blocks.0._bn2.weight       |     16     |
|        encoder._blocks.0._bn2.bias        |     16     |
|   encoder._blocks.1._expand_conv.weight   |    1536   

6251614