### Partie 1

#### Bibliothèques et importation des données

In [25]:
# Importation des bibliothèques
import os
import numpy as np
import pandas as pd
import pydicom
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 1. Charger les données
PathDicom = "C:\\Users\\mamad\\Downloads\\dicom-images-train"
CSVPath = "C:\\Users\\mamad\\Downloads\\trainSet-rle.csv"
df = pd.read_csv(CSVPath)



##### Nettoyage des données

In [26]:
# Nettoyer la colonne EncodedPixels
df["EncodedPixels"] = df["EncodedPixels"].str.strip()

# Définir la cible : 0 (absence) ou 1 (présence de pneumothorax)
df["Pneumothorax"] = df["EncodedPixels"].apply(lambda x: 0 if x == "-1" else 1)

#### Prétraitement des données

In [27]:
# 1. Prétraitement des données
def load_image(image_path):
    dicom = pydicom.dcmread(image_path)
    img = dicom.pixel_array
    # Vérification et normalisation (si nécessaire)
    img = img - np.min(img)  
    img = img / np.max(img)  
    return img

def preprocess_images(image_paths, target_size=(256, 256)):
    images = []
    for path in image_paths:
        img = load_image(path)
        img = tf.image.resize(img[..., np.newaxis], target_size)  
        images.append(img)
    return np.array(images)

# Associer les chemins des fichiers DICOM au DataFrame
df["Path"] = df["ImageId"].apply(lambda x: os.path.join(PathDicom, f"{x}.dcm"))

# Séparer les données en ensembles train/validation avec stratification
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["Pneumothorax"])

# Charger les images
train_images = preprocess_images(train_df["Path"].values)
val_images = preprocess_images(val_df["Path"].values)

# Extraire les labels
train_labels = train_df["Pneumothorax"].values
val_labels = val_df["Pneumothorax"].values


#### Construction et entrainement du modèle

In [28]:
# 2. Construire le modèle de classification
def build_classification_model(input_shape=(256, 256, 1)):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),  
        layers.Dense(1, activation='sigmoid')  
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_classification_model()

# 3. Callbacks pour sauvegarder le meilleur modèle
callbacks = [
    tf.keras.callbacks.ModelCheckpoint("best_model.keras", save_best_only=True, monitor="val_loss"),
    tf.keras.callbacks.EarlyStopping(patience=3, monitor="val_loss") 
]

# 4. Entraîner le modèle
history = model.fit(
    train_images, train_labels,
    validation_data=(val_images, val_labels),
    epochs=10,
    batch_size=16,
    callbacks=callbacks
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m526/526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 383ms/step - accuracy: 0.7056 - loss: 0.6478 - val_accuracy: 0.7154 - val_loss: 0.5768
Epoch 2/10
[1m526/526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 381ms/step - accuracy: 0.7153 - loss: 0.5827 - val_accuracy: 0.7149 - val_loss: 0.5862
Epoch 3/10
[1m526/526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 394ms/step - accuracy: 0.7222 - loss: 0.5635 - val_accuracy: 0.7163 - val_loss: 0.5825
Epoch 4/10
[1m526/526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 393ms/step - accuracy: 0.7265 - loss: 0.5519 - val_accuracy: 0.7249 - val_loss: 0.5691
Epoch 5/10
[1m526/526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 383ms/step - accuracy: 0.7295 - loss: 0.5247 - val_accuracy: 0.7235 - val_loss: 0.5653
Epoch 6/10
[1m526/526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 374ms/step - accuracy: 0.7467 - loss: 0.4918 - val_accuracy: 0.7425 - val_loss: 0.5409
Epoc

#### Exportation

In [29]:
# Charger le meilleur modèle sauvegardé
best_model = tf.keras.models.load_model("best_model.keras")

# Prédire sur l'ensemble de validation
val_predictions = (best_model.predict(val_images) > 0.5).astype(int).flatten()

# Sauvegarder les résultats
val_results = pd.DataFrame({
    "ImageId": val_df["ImageId"].values,
    "Pneumothorax": val_predictions
})
val_results.to_csv("classification_results.csv", index=False)


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 137ms/step


### Partie 2 : Les masques

#### Chargement et normalisation d'une image DICOM

In [30]:
def load_image(image_path):
    dicom = pydicom.dcmread(image_path)
    img = dicom.pixel_array
    img = img - np.min(img)  
    img = img / np.max(img)  
    return img


#### Décodage du masque RLE

In [31]:
def rle_decode(mask_rle, shape=(1024, 1024)):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


#### Prétraitement des images et masques

In [32]:
def preprocess_images_and_masks(df, target_size=(256, 256)):
    images = []
    masks = []
    for _, row in df.iterrows():
        # Charger et redimensionner l'image
        img = load_image(row["Path"])
        img = tf.image.resize(img[..., np.newaxis], target_size)
        images.append(img)

        # Charger et redimensionner le masque
        if row["EncodedPixels"] == "-1":
            mask = np.zeros((1024, 1024))  
        else:
            mask = rle_decode(row["EncodedPixels"])
        mask = tf.image.resize(mask[..., np.newaxis], target_size)
        masks.append(mask)

    return np.array(images), np.array(masks)

# Charger les chemins des fichiers DICOM
df["Path"] = df["ImageId"].apply(lambda x: os.path.join(PathDicom, f"{x}.dcm"))

# Séparer les données en train/validation
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=(df["EncodedPixels"] != "-1"))

# Prétraiter les images et masques
train_images, train_masks = preprocess_images_and_masks(train_df)
val_images, val_masks = preprocess_images_and_masks(val_df)


#### Construction du Modèle
#### Définition d'un modèle U-Net pour la segmentation

In [33]:
def build_unet_model(input_shape=(256, 256, 1)):
    inputs = layers.Input(shape=input_shape)

    # Encoder
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Bottleneck
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)

    # Decoder
    u1 = layers.UpSampling2D((2, 2))(c3)
    u1 = layers.Conv2D(128, (2, 2), activation='relu', padding='same')(u1)
    u1 = layers.Concatenate()([u1, c2])

    u2 = layers.UpSampling2D((2, 2))(u1)
    u2 = layers.Conv2D(64, (2, 2), activation='relu', padding='same')(u2)
    u2 = layers.Concatenate()([u2, c1])

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(u2)

    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', dice_coefficient])
    return model

def dice_coefficient(y_true, y_pred):
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred))

# Instancier le modèle
model = build_unet_model()

callbacks = [
    tf.keras.callbacks.ModelCheckpoint("best_segmentation_model.keras", save_best_only=True, monitor="val_loss"),
    tf.keras.callbacks.EarlyStopping(patience=3, monitor="val_loss")
]


#### Entraînement du Modèle

In [None]:
history = model.fit(
    train_images, train_masks,
    validation_data=(val_images, val_masks),
    epochs=10,
    batch_size=16,
    callbacks=callbacks
)


#### Prédictions et Encodage des Masques

In [None]:
best_model = tf.keras.models.load_model("best_segmentation_model.keras", custom_objects={"dice_coefficient": dice_coefficient})

val_predictions = best_model.predict(val_images)
val_predictions = (val_predictions > 0.5).astype(np.uint8)


#### Encodage RLE des masques prédits

In [None]:
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

val_df["PredictedMask"] = [rle_encode(mask) for mask in val_predictions]


#### Sauvegarde des résultats

In [None]:
val_results = val_df[["ImageId", "EncodedPixels", "PredictedMask"]]
val_results.to_csv("segmentation_results.csv", index=False)