# SERLI - AI4Industry - 2025

---
Notebook du Groupe rouge foncé (on aurait préféré le bleu)
---

## Import des librairies

In [12]:
import cv2
import os
import json
from tqdm import tqdm
import numpy as np
import threading
from scipy.ndimage import label
import time
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras import layers as L
from tensorflow.keras.models import Model
from tensorflow.data import Dataset
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB0


from codecarbon import EmissionsTracker


## Variables globales

In [13]:
MASK_FOLDER = "mask/"

DATASET_FOLDER = "dataset/"
DATASET_LIGHT_FOLDER = "dataset_light/dataset/"
USE_LIGHT_DATASET = True

USE_MEMORY_SAVING = True # If False, can crash the kernel
USE_OPENCV = False

IMAGE_SIZE = (848, 480)
MASK_FILE = "mask1.png"

SEED = int(time.time())
BATCH_SIZE = 32
EPOCHS = 100
TEST_SPLIT = 0.1
VALIDATION_SPLIT = 0.2

LIMIT_LAT_MIN = 47.3947574
LIMIT_LON_MIN = -1.1866685
LIMIT_LAT_MAX = 47.3968351
LIMIT_LON_MAX = -1.1841471

## Conversion des données

In [14]:
def convertArrayToImage(array, index) :
    dataset_folder = DATASET_FOLDER if not USE_LIGHT_DATASET else DATASET_LIGHT_FOLDER
    file_id = (index + 1) * 10 if USE_LIGHT_DATASET else (index + 1)
    file_name = "part" + "{:03d}".format(file_id // 1000) + "/mask_" + str(file_id) + ".png"
    
    array = (array * 255).astype(np.uint8)  # Convert boolean array to uint8
    cv2.imwrite(os.path.join(dataset_folder, file_name), cv2.cvtColor(array, cv2.COLOR_GRAY2BGR))

for file_id, file in enumerate(os.listdir(MASK_FOLDER)) :
    array = np.load(os.path.join(MASK_FOLDER, file))
    for c, arr in enumerate(array) :
        convertArrayToImage(arr, file_id * 50 + c)
    

## Import des données

In [None]:
data_lock = threading.Lock()

def read_subfolder(subfolder_path, image_size, image_list, label_list):
    print(f"Reading subfolder: {subfolder_path}")
    for file in os.listdir(subfolder_path):
        if file.endswith('.png') and file.startswith("frame_"):
            id = int(file.split("_")[1].split(".")[0])
            image_path = os.path.join(subfolder_path, "mask_" + str(id) + ".png")
            if (not os.path.exists(image_path)):
                print(f"Image does not exist for: {file}")
            file_name = file.split(".")[0]
            json_path = os.path.join(subfolder_path, file_name + ".json")

            if USE_LIGHT_DATASET:
                index = int(int(file_name.split("_")[1])/10)
            else:
                index = int(file_name.split("_")[1])

            if os.path.exists(json_path) and os.path.exists(image_path):
                with data_lock:
                    image_list[index-1] = str(image_path)
                    with open(json_path) as json_file:
                        data = json.load(json_file)
                        label_list[index-1] = data
            else:
                print(f"json file does not exist for: {file}")
    print(f"Finished reading subfolder: {subfolder_path}")

if os.path.exists(DATASET_FOLDER) and os.path.exists(DATASET_LIGHT_FOLDER):
    print("Dataset folder exists")
    
    folder = DATASET_FOLDER if not USE_LIGHT_DATASET else DATASET_LIGHT_FOLDER
    dataset_len = 0
    for subfolders in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolders)
        for file in os.listdir(subfolder_path):
            if file.endswith('.png') and file.startswith("frame_"):
                dataset_len += 1
                
    image_list = np.zeros((dataset_len), dtype=object)
    label_list = np.zeros((dataset_len), dtype=object)
    
    threads = []

    # Lancer un thread pour chaque sous-dossier
    for subfolder in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolder)
        if os.path.isdir(subfolder_path):
            thread = threading.Thread(target=read_subfolder, args=(subfolder_path, IMAGE_SIZE, image_list, label_list))
            threads.append(thread)
            thread.start()

    # Attendre que tous les threads soient terminés
    for thread in threads:
        thread.join()
else:
    raise Exception("Dataset folder does not exist")


In [None]:
print (f"Dataset length: {len(image_list)}")
print (f"Label length: {len(label_list)}")
print (f"Image list: {image_list[:5]}")
print (f"Label list: {label_list[:5]}")

In [None]:

if USE_OPENCV:
    mask = cv2.imread(MASK_FILE, cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (IMAGE_SIZE[1], IMAGE_SIZE[0]))
else:
    mask = tf.io.read_file(MASK_FILE)
    mask = tf.image.decode_png(mask, channels=1)
    mask = tf.image.resize(mask, IMAGE_SIZE)
    mask = tf.cast(mask, tf.float32) / 255.0

def load_image(image_path):
    if USE_OPENCV:
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (IMAGE_SIZE[1], IMAGE_SIZE[0]))
        image = image / 255.0
        image = cv2.bitwise_and(image, image, mask=mask)
        image = np.expand_dims(image, axis=-1)
    else:
        image = tf.io.read_file(image_path)
        image = tf.image.decode_png(image, channels=1)
        image = tf.image.resize(image, IMAGE_SIZE)
        image = tf.cast(image, tf.float32) / 255.0
        image = image * mask
    
    return image

def compute_position(position):
    lat = position["lat"]
    lon = position["lon"]
    x = (lon - LIMIT_LON_MIN) / (LIMIT_LON_MAX - LIMIT_LON_MIN)
    y = (lat - LIMIT_LAT_MIN) / (LIMIT_LAT_MAX - LIMIT_LAT_MIN)
    return (x, y)

def image_generator(frames, positions):
    for image_path, position in zip(frames, positions):
        if os.path.exists(image_path):
            yield load_image(image_path), compute_position(position)
        else:
            print(f"Image manquante : {image_path}")

In [None]:
if USE_MEMORY_SAVING:
    
    valid_indices = [i for i, path in enumerate(image_list) if path is not None]
    valid_image_list = [image_list[i] for i in valid_indices]
    valid_label_list = [label_list[i] for i in valid_indices]

    generator = lambda: image_generator(valid_image_list, valid_label_list)

    dataset = Dataset.from_generator(
        generator,
        output_signature=(
            tf.TensorSpec(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 1), dtype=tf.float32),
            tf.TensorSpec(shape=(2,), dtype=tf.float32)
        )
    )

    # Mélange, division et batch du dataset
    dataset = dataset.shuffle(buffer_size=1000, seed=SEED)

    dataset_length = len(image_list)
    dataset = dataset.apply(tf.data.experimental.assert_cardinality(dataset_length))
    dataset = dataset.cache()

    validation_size = int(dataset_length * VALIDATION_SPLIT)
    test_size = int(dataset_length * TEST_SPLIT)

    dataset_validation = dataset.take(validation_size).batch(BATCH_SIZE)
    dataset_test = dataset.skip(validation_size).take(test_size).batch(BATCH_SIZE)
    dataset_train = dataset.skip(validation_size + test_size).batch(BATCH_SIZE)
    
    dataset_train = dataset_train.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset_validation = dataset_validation.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset_test = dataset_test.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

else:
    dataset_length = len(image_list)

    images = np.zeros((dataset_length, IMAGE_SIZE[0], IMAGE_SIZE[1], 1), dtype=np.float32)
    positions = np.zeros((dataset_length, 2), dtype=np.float32)
    
    for i in tqdm(range(dataset_length)):
        image = load_image(image_list[i])
        images[i] = image
        positions[i] = compute_position(label_list[i])

    dataset_images = Dataset.from_tensor_slices(images)
    dataset_positions = Dataset.from_tensor_slices(positions)

    dataset = Dataset.zip((dataset_images, dataset_positions)).shuffle(SEED)

    dataset_validation = dataset.take(int(dataset_length * VALIDATION_SPLIT))
    dataset_test = dataset.skip(len(dataset_validation)).take(int(dataset_length * TEST_SPLIT))
    dataset_train = dataset.skip(len(dataset_validation) + len(dataset_test)).take(dataset_length - len(dataset_validation) - len(dataset_test))

    dataset_validation = dataset_validation.batch(BATCH_SIZE)
    dataset_test = dataset_test.batch(BATCH_SIZE)
    dataset_train = dataset_train.batch(BATCH_SIZE)
    
    dataset_train = dataset_train.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset_validation = dataset_validation.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset_test = dataset_test.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    
    images = None
    positions = None

print("Datasets shapes:")
print(dataset_train.element_spec)

## Création du modèle

In [None]:
def create_custom_model(input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 1), output_shape=2, model_name="cnn_model"):
    inputs = L.Input(shape=input_shape, name=f'{model_name}_input')
    x = L.Conv2D(3, kernel_size=3, activation='relu', padding='same', name=f'{model_name}_conv2D_3_7')(inputs)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_3_7')(x)
    x = L.Conv2D(16, kernel_size=3, activation='relu', padding='same', name=f'{model_name}_conv2D_16_5')(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_16_5')(x)
    x = L.Dropout(0.5, name=f'{model_name}_dropout_mid')(x)
    x = L.Conv2D(48, kernel_size=3, activation='relu', padding='same', name=f'{model_name}_conv2D_48_3')(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_48_3')(x)
    x = L.Conv2D(24, kernel_size=3, activation='relu', padding='same', name=f'{model_name}_conv2D_24_3')(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_24_3')(x)
    x = L.Conv2D(12, kernel_size=3, activation='relu', padding='same', name=f'{model_name}_conv2D_12_3')(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_12_3')(x)
    x = L.Dropout(0.5, name=f'{model_name}_dropout')(x)
    x = L.Flatten(name=f'{model_name}_global_avg')(x)
    x = L.Dense(16, activation='relu', name=f'{model_name}_dense_128')(x)
    outputs = L.Dense(output_shape, activation='softmax', name=f'{model_name}_output')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mae', metrics=['mae'])
    return model
model = create_custom_model()
model.summary()

In [None]:
def create_custom_model_v2(input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 1), output_shape=2, model_name="cnn_transformer"):
    inputs = L.Input(shape=input_shape, name=f'{model_name}_input')
    
    x = L.Conv2D(16, kernel_size=3, activation='relu', strides=2, kernel_regularizer=l2(1e-4), name=f'{model_name}_conv2D_16')(inputs)
    x = L.BatchNormalization()(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_16')(x)
    
    x = L.Conv2D(32, kernel_size=3, activation='relu', dilation_rate=2, name=f'{model_name}_conv2D_32')(x)
    x = L.BatchNormalization()(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_32')(x)
    
    x = L.Conv2D(128, kernel_size=3, activation='relu', name=f'{model_name}_conv2D_128')(x)
    x = L.BatchNormalization()(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_128')(x)
    
    x = L.Conv2D(64, kernel_size=3, activation='relu', name=f'{model_name}_conv2D_64')(x)
    x = L.BatchNormalization()(x)
    x = L.MaxPooling2D(pool_size=2, name=f'{model_name}_maxpool_64')(x)
    
    attention = L.Flatten()(x)
    attention = L.Dense(64, activation='softmax', name=f'{model_name}_attention_dense')(attention)
    attention = L.Reshape((1, 1, 64))(attention)
    x = L.Multiply()([x, attention])
    x = L.Dropout(0.3)(x)

    x = L.Flatten(name=f'{model_name}_global_avg_pool')(x)
    
    x = L.Dense(128, activation='relu', name=f'{model_name}_dense_128')(x)
    x = L.Dense(64, activation='relu', kernel_regularizer=l2(1e-4), name=f'{model_name}_dense_64')(x)
    outputs = L.Dense(output_shape, activation='softmax', name=f'{model_name}_output')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss='mse', metrics=['mae'])
    return model

model = create_custom_model_v2()
model.summary()

In [None]:
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Surveille la perte de validation
    factor=0.5,          # Réduction du learning rate par un facteur de 0.5
    patience=10,          # Nombre d'époques sans amélioration avant réduction
    min_lr=1e-10          # Limite minimale du learning rate
)

tracker = EmissionsTracker()
tracker.start()
history = model.fit(dataset_train, validation_data=dataset_validation, epochs=EPOCHS, callbacks=[reduce_lr])
emissions = tracker.stop()

plt.plot(history.history['val_mae'], label='train')
print(f"Total emissions: {emissions} kg")

In [None]:
def create_efficientNet_model(input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 1), output_shape=2, model_name="efficientNet"):
    inputs = L.Input(shape=input_shape, name=f'{model_name}_input')
    base_model = EfficientNetB0(include_top=False, input_tensor=inputs, weights=None)
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    x = L.Dense(128, activation='relu', name=f'{model_name}_dense_128')(x)
    x = L.Dense(64, activation='relu', name=f'{model_name}_dense_64')(x)
    outputs = L.Dense(output_shape, activation='softmax', name=f'{model_name}_output')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mae', metrics=['mae'])
    return model

model = create_efficientNet_model()
model.summary()

In [None]:
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Surveille la perte de validation
    factor=0.5,          # Réduction du learning rate par un facteur de 0.5
    patience=10,          # Nombre d'époques sans amélioration avant réduction
    min_lr=1e-10          # Limite minimale du learning rate
)

tracker = EmissionsTracker()
tracker.start()
history = model.fit(dataset_train, validation_data=dataset_validation, epochs=EPOCHS, callbacks=[reduce_lr])
emissions = tracker.stop()

plt.plot(history.history['val_mae'], label='train')
print(f"Total emissions: {emissions} kg")