# Semantic Segmentation

À rendre pour le 15/03/2024

## Dependencies

In [2]:
%pip install torch torchvision matplotlib
TRAIN = True


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam

## Dataset

In [4]:
with open('dataset/train.csv') as f:
    ids = np.loadtxt(f, delimiter=',', skiprows=1, usecols=(0,), dtype=str)
    print(ids.size, "elements:", ids)

4000 elements: ['2c45b152f1' '3cb59a4fdc' 'e185ab5dc1' ... '26527458de' '25fb3a895a'
 'f30c36bf6b']


## Model

In [12]:
class UNet(nn.Module):
    def __init__(self, padding):
        self.padding = padding
        super(UNet, self).__init__()
        
        """Encoder path"""
        self.conv11 = nn.Conv2d(3, 16, 3, padding=padding)
        self.conv12 = nn.Conv2d(16, 16, 3, padding=padding)
        # Here we have c1 from the paper
        self.max1 = nn.MaxPool2d(2, 2)
        # Here is p1
        self.conv21 = nn.Conv2d(16, 32, 3, padding=padding)
        self.conv22 = nn.Conv2d(32, 32, 3, padding=padding)
        # Here we have c2 from the paper
        self.max2 = nn.MaxPool2d(2, 2)
        # Here is p2
        self.conv31 = nn.Conv2d(32, 64, 3, padding=padding)
        self.conv32 = nn.Conv2d(64, 64, 3, padding=padding)
        # Here we have c3 from the paper
        self.max3 = nn.MaxPool2d(2, 2)
        # Here is p3
        self.conv41 = nn.Conv2d(64, 128, 3, padding=padding)
        self.conv42 = nn.Conv2d(128, 128, 3, padding=padding)
        # Here we have c4 from the paper
        self.max4 = nn.MaxPool2d(2, 2)
        # Here is p4
        self.conv51 = nn.Conv2d(128, 256, 3, padding=padding)
        self.conv52 = nn.Conv2d(256, 256, 3, padding=padding)
        # Here we have c5 from the paper
        self.max5 = nn.MaxPool2d(2, 2)
        # Here is p5
        
        """Decoder path"""
        self.tconv6 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        # Here we have u6 from the paper
        self.conv61 = nn.Conv2d(256, 128, 3, padding=padding)
        self.conv62 = nn.Conv2d(128, 128, 3, padding=padding)
        # Here we have c6 from the paper
        self.tconv7 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        # Here we have u7 from the paper
        self.conv71 = nn.Conv2d(128, 64, 3, padding=padding)
        self.conv72 = nn.Conv2d(64, 64, 3, padding=padding)
        # Here we have c7 from the paper
        self.tconv8 = nn.ConvTranspose2d(64, 32, 2, stride=2)
        # Here we have u8 from the paper
        self.conv81 = nn.Conv2d(64, 32, 3, padding=padding)
        self.conv82 = nn.Conv2d(32, 32, 3, padding=padding)
        # Here we have c8 from the paper
        self.tconv9 = nn.ConvTranspose2d(32, 16, 2, stride=2)
        # Here we have u9 from the paper
        self.conv91 = nn.Conv2d(32, 16, 3, padding=padding)
        self.conv92 = nn.Conv2d(16, 16, 3, padding=padding)
        # Here we have c9 from the paper
        
        """Output layer"""
        self.conv10 = nn.Conv2d(16, 1, 1)

    def forward(self, x):
        """Encoder path"""
        x = self.conv11(x)
        x = self.conv12(x)
        c1 = x
        x = self.max1(x)
        # x = p1
        x = self.conv21(x)
        x = self.conv22(x)
        c2 = x
        x = self.max2(x)
        # x = p2
        x = self.conv31(x)
        x = self.conv32(x)
        c3 = x
        x = self.max3(x)
        # x = p3
        x = self.conv41(x)
        x = self.conv42(x)
        c4 = x
        x = self.max4(x)
        # x = p4
        x = self.conv51(x)
        x = self.conv52(x)
        # C5 is not used in the decoder path
        # there is no p5 (maxpooling)
        
        """Decoder path"""
        x = self.tconv6(x, output_size=c4.size())
        # x = u6
        x = torch.cat([x, c4], dim=1)
        # x = "u6 + c4"
        x = self.conv61(x)
        x = self.conv62(x)
        # x = c6
        x = self.tconv7(x, output_size=c3.size())
        # x = u7
        x = torch.cat([x, c3], dim=1)
        # x = "u7 + c3"
        x = self.conv71(x)
        x = self.conv72(x)
        # x = c7
        x = self.tconv8(x, output_size=c2.size())
        # x = u8
        x = torch.cat([x, c2], dim=1)
        # x = "u8 + c2"
        x = self.conv81(x)
        x = self.conv82(x)
        # x = c8
        x = self.tconv9(x, output_size=c1.size())
        # x = u9
        x = torch.cat([x, c1], dim=1)
        # x = "u9 + c1"
        x = self.conv91(x)
        x = self.conv92(x)
        # x = c9
        
        """Output layer"""
        x = self.conv10(x)
        # Some x might be out of [0, 1] range
        return torch.clamp(x, 0, 1)

## Training

In [23]:
from torch.nn import BCELoss
import matplotlib.pyplot as plt

model = UNet(padding=1).to('cuda')
criterion = BCELoss()
optimizer = Adam(model.parameters(), lr=0.0001)
batch_size = 32
epochs = 30
epoch = 0
min_loss = float('inf')
stagnation = 0

if TRAIN:
    while epoch < epochs:
        epoch_loss = 0
        for i in range(0, ids.size, batch_size):
            batch_ids = ids[i:i+batch_size]
            images = []
            masks = []
            for id in batch_ids:
                img = plt.imread('dataset/train/images/' + id + '.png')
                mask = plt.imread('dataset/train/masks/' + id + '.png')

                # Conv2D wants batches, channels, height, width while plt.imread returns height, width, channels
                # (moreover we need to get rid of the alpha channel)
                img = img[:,:,:3]
                img = np.transpose(img, (2, 0, 1))
                
                # We create a virtual channel for the mask
                mask = mask[:,:,None]
                mask = np.transpose(mask, (2, 0, 1))
                
                images.append(img)
                masks.append(mask)
            inputs = torch.tensor(np.array(images), dtype=torch.float32).to('cuda')
            labels = torch.tensor(np.array(masks), dtype=torch.float32).to('cuda')
            outputs = model(inputs)

            model.train()
            optimizer.zero_grad()
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.cpu().item() / (ids.size / batch_size)

            print('Epoch', epoch, 'Batch', str(i) + "/" + str(ids.size), 'Loss:', epoch_loss * (ids.size / (i + batch_size)), end='\r', sep='\t')
        print('Epoch', epoch, 'Loss:', epoch_loss, "      ", sep='\t')
        lr = optimizer.param_groups[0]['lr']
        if epoch_loss < min_loss * (1 + lr * 1000):
            min_loss = epoch_loss
            torch.save(model.state_dict(), f'model-epoch{epoch}.pth')
            epoch += 1
        elif epoch_loss < min_loss * (1 + lr * 100000):
            print('Stagnating, restoring previous model and nudging a bit the weights')
            model.load_state_dict(torch.load(f'model-epoch{epoch-1}.pth'))
            stagnation += 1
            for param in model.parameters():
                param.data *= (torch.randn(param.size()).to('cuda') - 0.5) * 100 * lr + 1
            if stagnation > 3:
                print('Stagnating for too long, reducing learning rate from', lr, 'to', lr * 0.8)
                for param_group in optimizer.param_groups:
                    param_group['lr'] *= 0.8
                stagnation = 0
        else:
            print('Diverging, nudging a lot parameters')
            model.load_state_dict(torch.load(f'model-epoch{epoch-1}.pth'))
            for param in model.parameters():
                param.data *= (torch.randn(param.size()).to('cuda') - 0.5) * 1000000 * lr + 1


Epoch	0	Loss:	0.5605555183887482	      183887482
Epoch	1	Loss:	0.5664425008296966	      008296966
Epoch	2	Batch	1088/4000	Loss:	0.5487694799900052

KeyboardInterrupt: 