# **Dataset preparation**

In [2]:
!nvidia-smi

Fri Nov 22 09:47:17 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...    Off | 00000000:01:00.0 Off |                  N/A |
| N/A   42C    P0              N/A /  80W |      8MiB /  6141MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Install necessary librairies

In [1]:
!pip install numpy==1.24.3 opencv-python tensorflow pandas scikit-learn wandb

Defaulting to user installation because normal site-packages is not writeable
Collecting wandb
  Downloading wandb-0.18.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.1/16.1 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting sentry-sdk>=2.0.0
  Downloading sentry_sdk-2.19.0-py2.py3-none-any.whl (322 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 KB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
Collecting gitpython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 KB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting setproctitle
  Downloading setproctitle-1.3.4-cp3

## Load images and masks

In [20]:
import os
import cv2
import numpy as np

def load_training_data(image_dir, mask_dir):
    """
    Loads training images and their corresponding masks.
    Args:
        image_dir (str): Path to the directory containing training images.
        mask_dir (str): Path to the directory containing ground truth masks.
    Returns:
        list of tuples: [(image, mask), ...]
    """
    data = []
    image_filenames = sorted(os.listdir(image_dir))
    mask_filenames = sorted(os.listdir(mask_dir))

    for image_file, mask_file in zip(image_filenames, mask_filenames):
        # Load the image and mask
        image_path = os.path.join(image_dir, image_file)
        mask_path = os.path.join(mask_dir, mask_file)

        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Load mask as grayscale

        if image is None or mask is None:
            raise FileNotFoundError(f"Missing image or mask: {image_path}, {mask_path}")

        data.append((image, mask))

    return data

## Visualize data

In [21]:
data = load_training_data("data/train", "data/train_gt")

# Display the first image and mask
image, mask = data[0]
cv2.imshow("Image", image)
cv2.imshow("Mask", mask)  # Scale mask for display if needed
cv2.waitKey(0)
cv2.destroyAllWindows()

## Reprocess images and masks

Resize images and normalize their pixel values for input to the model. Masks will also be resized, with their values remaining binary (0 and 1 for segmentation).

In [22]:
def preprocess(image, mask, size=(256, 256)):
    """
    Resizes and normalizes the image and mask.
    Args:
        image (np.ndarray): Original image.
        mask (np.ndarray): Original mask.
        size (tuple): Target size (height, width).
    Returns:
        tuple: Preprocessed image and mask.
    """
    # Resize image and mask
    image = cv2.resize(image, size) / 255.0  # Normalize image to [0, 1]
    mask = cv2.resize(mask, size, interpolation=cv2.INTER_NEAREST)  # Resize mask
    mask = (mask > 0).astype(np.uint8)  # Ensure binary mask
    return image, mask

Example usage :

In [23]:
preprocessed_image, preprocessed_mask = preprocess(data[0][0], data[0][1])
print("Image shape:", preprocessed_image.shape)  # Example: (256, 256, 3)
print("Mask shape:", preprocessed_mask.shape)    # Example: (256, 256)

Image shape: (256, 256, 3)
Mask shape: (256, 256)


## Prepare training and validation splits

In [24]:
from sklearn.model_selection import train_test_split

def split_dataset(data, test_size=0.2):
    """
    Splits data into training and validation sets.
    Args:
        data (list): List of (image, mask) pairs.
        test_size (float): Fraction of the dataset for validation.
    Returns:
        tuple: (train_data, val_data)
    """
    return train_test_split(data, test_size=test_size, random_state=42)

Example usage :

In [25]:
train_data, val_data = split_dataset(data)
print("Training samples:", len(train_data))
print("Validation samples:", len(val_data))

Training samples: 800
Validation samples: 200


## Data generator

In [26]:
def data_generator(data, batch_size=16, size=(256, 256)):
    """
    Generator for batching data.
    Args:
        data (list): List of (image, mask) pairs.
        batch_size (int): Number of samples per batch.
        size (tuple): Target size (H, W).
    Yields:
        tuple: (batch_images, batch_masks)
    """
    while True:
        batch_images, batch_masks = [], []
        for _ in range(batch_size):
            image, mask = data[np.random.randint(len(data))]
            image, mask = preprocess(image, mask, size)
            batch_images.append(image)
            batch_masks.append(mask)
        yield np.array(batch_images), np.array(batch_masks)

Example usage :

In [27]:
train_gen = data_generator(train_data)
val_gen = data_generator(val_data)

# Fetch one batch
batch_images, batch_masks = next(train_gen)
print("Batch image shape:", batch_images.shape)  # (16, 256, 256, 3)
print("Batch mask shape:", batch_masks.shape)    # (16, 256, 256)

Batch image shape: (16, 256, 256, 3)
Batch mask shape: (16, 256, 256)


## Load test images

In [28]:
def load_test_images(image_dir):
    """
    Loads test images for inference.
    Args:
        image_dir (str): Path to the test image directory.
    Returns:
        list: List of test images.
    """
    images = []
    for image_file in sorted(os.listdir(image_dir)):
        image_path = os.path.join(image_dir, image_file)
        image = cv2.imread(image_path)
        if image is None:
            raise FileNotFoundError(f"Test image {image_path} not found!")
        images.append(image)
    return images

Example usage :

In [29]:
test_images = load_test_images("data/test")
print("Number of test images:", len(test_images))

Number of test images: 200


# **Model implementation**

## Define the model architecture (UNet)

In [30]:
import tensorflow as tf
from tensorflow.keras import layers, models

def unet_model(input_size=(256, 256, 3)):
    inputs = layers.Input(input_size)
    
    # Encoder path (Downsampling)
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling2D((2, 2))(conv1)

    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling2D((2, 2))(conv2)

    conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = layers.MaxPooling2D((2, 2))(conv3)

    # Bottleneck
    conv4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)

    # Decoder path (Upsampling)
    up5 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv4)
    concat5 = layers.concatenate([up5, conv3], axis=3)
    conv5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(concat5)
    conv5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv5)

    up6 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv5)
    concat6 = layers.concatenate([up6, conv2], axis=3)
    conv6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(concat6)
    conv6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv6)

    up7 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv6)
    concat7 = layers.concatenate([up7, conv1], axis=3)
    conv7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(concat7)
    conv7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv7)

    # Output layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(conv7)

    # Create model
    model = models.Model(inputs, outputs)
    
    return model


2024-11-21 15:32:57.393427: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-21 15:32:57.512970: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-21 15:32:57.558246: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-21 15:32:57.575576: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-21 15:32:57.676102: I tensorflow/core/platform/cpu_feature_guar

## Compile the model

In [31]:
model = unet_model(input_size=(256, 256, 3))

# Compile the model
model.compile(optimizer='adam', 
              loss='dice_loss', 
              metrics=['accuracy'])

I0000 00:00:1732199640.655443   16754 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-11-21 15:34:00.757377: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


# **Training**

## Loss function : Dice loss

In [32]:
import tensorflow as tf

def dice_loss(y_true, y_pred, smooth=1e-6):
    
    # Calculate the intersection of the predicted and true masks
    intersection = tf.reduce_sum(y_true * y_pred)
    # Calculate the Dice coefficient loss
    return 1 - (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)


## optimizer

In [33]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

## Training loop

In [35]:
import tensorflow as tf

# Train the model using the data generators
history = model.fit(
    train_gen,  # Training data generator
    epochs=50,  # Number of epochs, adjust as needed
    validation_data=val_gen,  # Validation data generator # Optional callbacks
)


Epoch 1/50
      5/Unknown [1m69s[0m 13s/step - accuracy: 0.6300 - loss: 0.6035

KeyboardInterrupt: 