# 1 CNNs

## 1.1 With MobileNetV2 as the encoder in a U-Net

### 1.1.1 Fixed pre-trained parameters

In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import numpy as np
import pandas as pd

In [7]:
class UNetMobileNetV2fixed(nn.Module):
    def __init__(self, num_classes):
        super(UNetMobileNetV2fixed, self).__init__()

    
        self.encoder = models.mobilenet_v2(pretrained=True).features
        
        ## Steps where we will extract the outputs for skip connections, can be changed
        self.encoder_layers = [
            self.encoder[0:2],
            self.encoder[2:4],
            self.encoder[4:7],
            self.encoder[7:14],
            self.encoder[14:19],
            self.encoder[19:24],
            self.encoder[24:],
        ]
        
        ## The classifier part can be changed, it probably needs to be more complex when the the parameters of the pretrained model are fixed
        self.classifier = nn.Sequential(
            nn.Conv2d(320, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        # Encoder
        skips = []
        for layer in self.encoder_layers:
            x = layer(x)
            skips.append(x)

        # Decoder
        x = skips[-1]
        for skip in reversed(skips[:-1]):
            x = self.upsample(x)
            x = torch.cat((x, skip), dim=1)
        
        ## Classifier
        x = self.classifier(x)

        return x
    
num_classes = 3
model = UNetMobileNetV2fixed(num_classes)



### 1.1.2 Non-fixed pre-trained parameters

In [8]:
class UNetMobileNetV2unfixed(nn.Module):
    def __init__(self, num_classes):
        super(UNetMobileNetV2unfixed, self).__init__()

    
        self.encoder = models.mobilenet_v2(pretrained=True).features
        
        ## The MobileNetV2 parameters are not fixed anymore
        for param in model.encoder.parameters():
            param.requires_grad = True
        
        ## Steps where we will extract the outputs for skip connections, can be changed
        self.encoder_layers = [
            self.encoder[0:2],
            self.encoder[2:4],
            self.encoder[4:7],
            self.encoder[7:14],
            self.encoder[14:19],
            self.encoder[19:24],
            self.encoder[24:],
        ]
        
        ## The classifier part can be changed, it probably needs to be more complex when the the parameters of the pretrained model are fixed
        self.classifier = nn.Sequential(
            nn.Conv2d(320, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        # Encoder
        skips = []
        for layer in self.encoder_layers:
            x = layer(x)
            skips.append(x)

        # Decoder
        x = skips[-1]
        for skip in reversed(skips[:-1]):
            x = self.upsample(x)
            x = torch.cat((x, skip), dim=1)
        
        ## Classifier
        x = self.classifier(x)

        return x
    
num_classes = 3
model = UNetMobileNetV2unfixed(num_classes)

### 1.1.3 No pretrained weights

In [9]:
class UNetMobileNetV2untrained(nn.Module):
    def __init__(self, num_classes):
        super(UNetMobileNetV2untrained, self).__init__()

    
        self.encoder = models.mobilenet_v2(pretrained=False).features
        
        ## The MobileNetV2 parameters are not fixed anymore
        for param in model.encoder.parameters():
            param.requires_grad = True
        
        ## Steps where we will extract the outputs for skip connections, can be changed
        self.encoder_layers = [
            self.encoder[0:2],
            self.encoder[2:4],
            self.encoder[4:7],
            self.encoder[7:14],
            self.encoder[14:19],
            self.encoder[19:24],
            self.encoder[24:],
        ]
        
        ## The classifier part can be changed, it probably needs to be more complex when the the parameters of the pretrained model are fixed
        self.classifier = nn.Sequential(
            nn.Conv2d(320, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        # Encoder
        skips = []
        for layer in self.encoder_layers:
            x = layer(x)
            skips.append(x)

        # Decoder
        x = skips[-1]
        for skip in reversed(skips[:-1]):
            x = self.upsample(x)
            x = torch.cat((x, skip), dim=1)
        
        ## Classifier
        x = self.classifier(x)

        return x
    
num_classes = 3
model = UNetMobileNetV2untrained(num_classes)



## 1.2 Custom UNet

In [10]:
class CustomUnet_utils(nn.Module):
    def __init__(self):
        super(CustomUnet_utils, self).__init__()
        
    def down(self, in_channels, out_channels):
        
        return nn.Sequential(
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def up(self, in_channels, mid_channels, out_channels):
        
        return nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        
        

In [11]:
class CustomUnet(nn.Module):
    def __init__(self, num_channels, num_classes):
        super(CustomUnet, self).__init__()
        
        self.num_channels = num_channels
        self.num_classes = num_classes

        self.input_layer = nn.Sequential(
            nn.
            (num_channels, num_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(3),
            nn.ReLU(inplace=True),
            nn.Conv2d(num_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.down1 = CustomUnet_utils().down(64, 128)
        self.down2 = CustomUnet_utils().down(128, 256)
        self.down3 = CustomUnet_utils().down(256, 512)
        self.down4 = CustomUnet_utils().down(512, 512)
        
        
        self.up1 = CustomUnet_utils().up(1024, 512, 256)
        self.up2 = CustomUnet_utils().up(512, 256, 128)
        self.up3 = CustomUnet_utils().up(256, 128, 64)
        self.up3 = CustomUnet_utils().up(128, 64, 64)
        
        ## the kernel size is weird, we'll probably change it
        self.output_layer = nn.Conv2d(64, num_classes, kernel_size=1)

    def forward(self, x):
        
        x1 = self.input_layer(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.output_layer(x)
        
        return logits
    
num_channels = 3
num_classes = 3
model = CustomUnet(num_channels, num_classes)

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train_one_epoch(epoch_index, training_loader, loss_fn = torch.nn.CrossEntropyLoss(), optimizer = torch.optim.Adam(model.parameters(), lr=0.001)):
    running_loss = 0.
    last_loss = 0.

    for i, data in enumerate(training_loader):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = loss_fn(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.

    return last_loss

In [None]:
def train_all(model, epochs = 5, training_loader, validation_loader):
    
    for epoch in range(epochs):
        print('EPOCH {}:'.format(epoch + 1))

        model.train(True)
        avg_loss = train_one_epoch(epoch, training_loader)

        running_vloss = 0.0
        model.eval()

        with torch.no_grad():
            for i, vdata in enumerate(validation_loader):
                vinputs, vlabels = vdata
                voutputs = model(vinputs)
                vloss = loss_fn(voutputs, vlabels)
                running_vloss += vloss

        avg_vloss = running_vloss / (i + 1)
        print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

In [23]:
a = np.array([[5,1],[2,3]])
np.unravel_index(np.argmax(a), a.shape)

(0, 0)

In [7]:
a = torch.tensor([[0,1],[0,1]])
b = torch.tensor([[1,1],[0,1]])
torch.sum((a == 1) and (b == 1))

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

In [None]:
torch.sum()

# 2 Transformers

## 2.1 With HuggingFace's AutoModelForSemanticSegmentation