# Imports

#### PyTorch imports

In [14]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
print('PyTorch version', torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

PyTorch version 2.5.1
Device: cuda


#### General imports

In [3]:
import os
import json
import math
import time
import random
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

from tqdm import tqdm

# Parameters

#### Directories for the images

In [16]:
parent = Path().cwd().parent
data_root = parent / 'try_1'

train_images_dir = data_root / 'train' / 'images'
train_masks_dir = data_root / 'train' / 'masks'

val_images_dir = data_root / 'val' / 'images'
val_masks_dir = data_root / 'val' / 'masks'

#### General parameters

In [None]:
IMG_HEIGHT = 768
IMG_WIDTH = 768

BATCH_SIZE = 8

EPOCHS = int(input('Enter number of epochs: '))
print('Number of epochs:', EPOCHS)

LR = float(input('Enter learning rate: '))
print('Learning rate:', LR)

"""
0 : background (black)
1 : arteries (white)
2 : veins (gray)
"""
NUM_CLASSES = 3

COLOR_TO_LABEL = {
    (0,0,0): 0,
    (255,255,255): 1,
    (128,128,128): 2
}

Number of epochs: 100
Learning rate: 0.001


#### Seed

In [9]:
# s = 42

In [10]:
def set_seed(s) :
    random.seed(s)
    np.random.seed(s)
    torch.manual_seed(s)
    torch.cuda.manual_seed_all(s)

# DataLoaders

#### Create the class for the dataset images

In [15]:
class RAVIRDataset(Dataset):
    def __init__(self, img_dir, mask_dir, img_size=(IMG_HEIGHT, IMG_WIDTH), transforms=None):
        self.img_paths = sorted(list(Path(img_dir).glob('*.png')))
        self.mask_dir = Path(mask_dir)
        self.img_size = img_size
        self.transforms = transforms

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_p = self.img_paths[idx]
        mask_p = self.mask_dir / img_p.name
        img = Image.open(img_p).convert('RGB').resize(self.img_size, Image.BILINEAR)
        img = np.array(img, dtype=np.float32) / 255.0
        img = np.transpose(img, (2,0,1))  # C,H,W
        img_t = torch.from_numpy(img).float()
        # load mask and map colors to labels
        m = Image.open(mask_p).convert('RGB').resize(self.img_size, Image.NEAREST)
        m_arr = np.array(m, dtype=np.uint8)
        label = np.zeros((self.img_size[1], self.img_size[0]), dtype=np.uint8)
        for color, lab in COLOR_TO_LABEL.items():
            mask = np.all(m_arr == np.array(color, dtype=np.uint8), axis=-1)
            label[mask] = lab
        label_t = torch.from_numpy(label).long()  # H,W
        return img_t, label_t

#### Actually create the datasets

In [None]:
train_ds = RAVIRDataset(train_images_dir, train_masks_dir, (IMG_WIDTH, IMG_HEIGHT))
val_ds = RAVIRDataset(val_images_dir, val_masks_dir, (IMG_WIDTH, IMG_HEIGHT))

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# UNet Model

![Image de l'architecture UNet](unet.png)

#### Conv 3x3, ReLU

In [18]:
def DoubleConv(in_ch, out_ch) :
    """
    Creates a bloc :
        Conv 3x3 -> ReLU -> Conv 3x3 -> ReLU
    """
    return nn.Sequential(
        nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
        nn.ReLU(inplace=True)
    )

#### Max pool 2x2

In [19]:
def down_block(in_ch, out_ch) :
    """
    Encoder bloc : 
        DoubleConv -> MaxPool 2x2
    """
    return nn.Sequential(
        DoubleConv(in_ch, out_ch),
        nn.MaxPool2d(kernel_size=2)
    )

#### Exemple

In [21]:
x = torch.randn(1, 1, 768, 768)

encode1 = down_block(1, 64)
y = encode1(x)

print(x.shape)
print(y.shape)

torch.Size([1, 1, 768, 768])
torch.Size([1, 64, 384, 384])


#### Bottleneck

In [22]:
def bottleneck(in_ch, out_ch) :
    """
    Central part of UNet
    """
    return DoubleConv(in_ch, out_ch)

#### UpSampling

In [23]:
def up_block(in_ch, out_ch) :
    """
    Decoder bloc :
        UpConv 2x2 -> Concatenate -> DoubleConv
    """
    return nn.Sequential(
        nn.ConvTranspose2d(in_ch, out_ch, kernel_size=2, stride=2),
        DoubleConv(in_ch, out_ch)
    )

In [25]:
def forward_up(block, x, skip_x):
    """
    Forward step for the upsampling block in UNet.
    - block: contains 'up' (ConvTranspose2d) and 'conv' (double_conv module)
    - x: feature map coming from the previous decoder step (or bottleneck)
    - skip_x: feature map from the corresponding encoder layer (skip connection)
    """
    # 1) Upsample the decoder feature map (x) to double its spatial size
    x = block["up"](x)

    # 2) Sometimes due to pooling/odd dimensions shapes might not perfectly match.
    #    If so, we pad x to match skip_x's height and width.
    if x.shape[-2:] != skip_x.shape[-2:]:
        x = nn.functional.pad(
            x,
            [0, skip_x.shape[-1] - x.shape[-1],  # pad width
             0, skip_x.shape[-2] - x.shape[-2]]  # pad height
        )

    # 3) Concatenate along the channel dimension: (batch, C_decoder + C_encoder, H, W)
    x = torch.cat([skip_x, x], dim=1)

    # 4) Apply the double convolution to fuse encoder and decoder features
    x = block["conv"](x)

    return x

#### Assembling of those blocs

In [30]:
class UNet(nn.Module) :
    def __init__(self, in_channels=1, out_classes=1) :
        """
        U-Net full architecture assembly.
        - in_channels  : number of channels in input image (1=grayscale, 3=RGB)
        - out_classes  : number of output channels (1=binary mask, N=multi-class)
        """
        super().__init__()

        # Encoder path
        self.down1 = down_block(in_channels, 64)
        self.down2 = down_block(64, 128)
        self.down3 = down_block(128, 256)
        self.down4 = down_block(256, 512)

        # Bottleneck
        self.bottleneck = bottleneck(512, 1024)

        # Decoder path
        self.up4 = up_block(1024, 512)
        self.up3 = up_block(512, 256)
        self.up2 = up_block(256, 128)
        self.up1 = up_block(128, 64)

        # Final 1x1 convolution to map to output classes
        self.final_conv = nn.Conv2d(64, out_classes, kernel_size=1)
    
    def forward(self, x) :
        # Encoder
        x1 = self.down1(x)
        x2 = self.down2(x1)
        x3 = self.down3(x2)
        x4 = self.down4(x3)

        # Bottleneck
        b = self.bottleneck(x4)

        # Decoder with skip connections
        d4 = forward_up(self.up4, b, x4)
        d3 = forward_up(self.up3, d4, x3)
        d2 = forward_up(self.up2, d3, x2)
        d1 = forward_up(self.up1, d2, x1)

        # Final output layer
        out = self.final_conv(d1)
        return out

#### Test

In [31]:
model = UNet(in_channels=1, out_classes=3)  # exemple 3 classes : fond / veine / artère
x = torch.randn(1, 1, 768, 768)  # batch=1, grayscale
y = model(x)

print(y.shape)  # ✅ doit donner [1, 3, 768, 768]


TypeError: 'str' object cannot be interpreted as an integer