In [None]:
# PROJET : D√©tection de la Pneumonie √† partir de Radiographies
# Auteur : KONE Zana , OUEDRAOGO Freddy , PITROIPA Soraya

# -------------------------------------------------------------
# 1) Installation des d√©pendances
# -------------------------------------------------------------
!pip install -q tensorflow keras scikit-learn seaborn matplotlib opencv-python

In [2]:
# -------------------------------------------------------------
# 2) Imports
# -------------------------------------------------------------
import os
import shutil
import random
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import time
from datetime import datetime

from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    roc_auc_score, precision_recall_fscore_support, roc_curve, auc
)
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU disponible: {tf.config.list_physical_devices('GPU')}")

code
#VSC-67158eec
python
# -------------------------------------------------------------
# 3) Montage de Google Drive et Configuration (safe pour local)
# -------------------------------------------------------------
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_DIR = '/content/drive/MyDrive/chest_xray/chest_xray'
    WORK_DIR = '/content/chest_xray_reduced'
    print('Google Colab drive mounted')
except Exception:
    # Not running in Colab - provide local defaults (adjust if needed)
    print('Not running in Google Colab - using local paths if available')
    DATA_DIR = './data/chest_xray/chest_xray'
    WORK_DIR = './chest_xray_reduced'

# Hyperparam√®tres (AM√âLIOR√âS)
IMG_SIZE = (224, 224)  # Taille standard pour MobileNetV2
BATCH_SIZE = 32
SEED = 42
EPOCHS_INITIAL = 15  # Augment√©
EPOCHS_FINETUNE = 10  # Augment√©

print(f"Dataset source : {DATA_DIR}")
print(f"Dataset r√©duit : {WORK_DIR}")
code
#VSC-39998e89
python
# -------------------------------------------------------------
# 4) Cr√©ation d'un dataset √©quilibr√© (TAILLE AUGMENT√âE)
# -------------------------------------------------------------
def create_balanced_subset(src_root, dst_root, max_per_class=2000):
    """
    Cr√©e un sous-ensemble √©quilibr√© du dataset original
    max_per_class augment√© √† 2000 pour avoir plus de donn√©es
    """
    if os.path.exists(dst_root):
        shutil.rmtree(dst_root)
    os.makedirs(dst_root, exist_ok=True)

    splits = ['train', 'val', 'test']
    classes = ['PNEUMONIA', 'NORMAL']

    for split in splits:
        for cls in classes:
            src_path = os.path.join(src_root, split, cls)
            dst_path = os.path.join(dst_root, split, cls)
            os.makedirs(dst_path, exist_ok=True)

            all_imgs = [f for f in os.listdir(src_path) if f.endswith(('.jpeg', '.jpg', '.png'))]
            random.shuffle(all_imgs)
            selected_imgs = all_imgs[:max_per_class]

            for img in selected_imgs:
                shutil.copy2(os.path.join(src_path, img), os.path.join(dst_path, img))

            print(f"{split}/{cls}: {len(selected_imgs)} images copi√©es.")

    print(f"\n‚úÖ Dataset √©quilibr√© cr√©√© dans {dst_root}")

# Cr√©ation du dataset
create_balanced_subset(DATA_DIR, WORK_DIR, max_per_class=2000)

code
#VSC-cfd9c078
python
# -------------------------------------------------------------
# 5) Pr√©paration des g√©n√©rateurs d'images (AVEC AUGMENTATION)
# -------------------------------------------------------------

# IMPORTANT : Augmentation de donn√©es pour le training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2],
    validation_split=0.2
)

# Pas d'augmentation pour validation et test
val_test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    WORK_DIR + '/train',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    seed=SEED
)

val_gen = train_datagen.flow_from_directory(
    WORK_DIR + '/train',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    seed=SEED
)

test_gen = val_test_datagen.flow_from_directory(
    WORK_DIR + '/test',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

print(f"Classes d√©tect√©es : {train_gen.class_indices}")
print(f"Training samples: {train_gen.samples}")
print(f"Validation samples: {val_gen.samples}")
print(f"Test samples: {test_gen.samples}")

code
#VSC-27f225e2
python

# -------------------------------------------------------------
# 6) Construction du mod√®le CNN (MobileNetV2)
# -------------------------------------------------------------
def build_cnn_model():
    """Construction du mod√®le CNN avec MobileNetV2"""
    base_model = MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
    )
    base_model.trainable = False  # Freeze base model initialement

    inputs = keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs, outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy',
                 tf.keras.metrics.AUC(name='auc'),
                 tf.keras.metrics.Precision(name='precision'),
                 tf.keras.metrics.Recall(name='recall')]
    )

    return model, base_model

cnn_model, base_model = build_cnn_model()

# Afficher le r√©sum√©
print("\nüìä Architecture du mod√®le:")
cnn_model.summary()
print(f"\n‚úÖ Nombre total de param√®tres: {cnn_model.count_params():,}")
code
#VSC-98b4c8c4
python
# -------------------------------------------------------------
# 7) Callbacks pour l'entra√Ænement
# -------------------------------------------------------------
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    ModelCheckpoint(
        'best_cnn_model.h5',
        monitor='val_auc',
        save_best_only=True,
        mode='max',
        verbose=1
    )
]

code
#VSC-a63e9254
python
# -------------------------------------------------------------
# 8) Entra√Ænement Initial (Base Model Frozen)
# -------------------------------------------------------------
print("\nüöÄ Phase 1: Entra√Ænement initial (base model frozen)")
print("="*60)

start_time = time.time()

history = cnn_model.fit(
    train_gen,
    epochs=EPOCHS_INITIAL,
    validation_data=val_gen,
    callbacks=callbacks,
    verbose=1
)

initial_training_time = time.time() - start_time
print(f"\n‚è±Ô∏è  Temps d'entra√Ænement Phase 1: {initial_training_time/60:.2f} minutes")

code
#VSC-0537718d
python

code
#VSC-1d78e80c
python

# -------------------------------------------------------------
# 9) Fine-tuning (Unfreeze les derni√®res couches)
# -------------------------------------------------------------
print("\nüöÄ Phase 2: Fine-tuning (unfreezing last layers)")
print("="*60)

# Unfreeze les derni√®res couches de MobileNetV2
base_model.trainable = True

# Freeze toutes les couches sauf les 30 derni√®res
for layer in base_model.layers[:-30]:
    layer.trainable = False

# Recompiler avec un learning rate plus faible
cnn_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy',
             tf.keras.metrics.AUC(name='auc'),
             tf.keras.metrics.Precision(name='precision'),
             tf.keras.metrics.Recall(name='recall')]
)

print(f"Nombre de couches entra√Ænables: {len([l for l in cnn_model.layers if l.trainable])}")

start_time = time.time()

history_ft = cnn_model.fit(
    train_gen,
    epochs=EPOCHS_FINETUNE,
    validation_data=val_gen,
    callbacks=callbacks,
    verbose=1
)

finetune_training_time = time.time() - start_time
total_training_time = initial_training_time + finetune_training_time

print(f"\n‚è±Ô∏è  Temps d'entra√Ænement Phase 2: {finetune_training_time/60:.2f} minutes")
print(f"‚è±Ô∏è  TEMPS TOTAL D'ENTRA√éNEMENT: {total_training_time/60:.2f} minutes")

code
#VSC-3989f691
python
# -------------------------------------------------------------
# 10) √âvaluation Compl√®te du CNN
# -------------------------------------------------------------
print("\nüìä √âVALUATION COMPL√àTE DU MOD√àLE CNN")
print("="*60)

# Charger le meilleur mod√®le
cnn_model = keras.models.load_model('best_cnn_model.h5')

# Pr√©dictions sur test set
test_gen.reset()
y_pred_proba = cnn_model.predict(test_gen, verbose=1)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()
y_true = test_gen.classes

# M√©triques globales
test_loss, test_acc, test_auc, test_precision, test_recall = cnn_model.evaluate(test_gen, verbose=0)

# Calcul de m√©triques suppl√©mentaires
from sklearn.metrics import f1_score

f1 = f1_score(y_true, y_pred)

# Matrice de confusion
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

# Sp√©cificit√© (important en m√©dical)
specificity = tn / (tn + fp)

# Affichage des r√©sultats
print(f"\nüìà R√âSULTATS SUR L'ENSEMBLE DE TEST:")
print(f"  ‚Ä¢ Accuracy:    {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"  ‚Ä¢ AUC:         {test_auc:.4f}")
print(f"  ‚Ä¢ Precision:   {test_precision:.4f}")
print(f"  ‚Ä¢ Recall:      {test_recall:.4f}")
print(f"  ‚Ä¢ F1-Score:    {f1:.4f}")
print(f"  ‚Ä¢ Specificity: {specificity:.4f}")

print(f"\nüìä MATRICE DE CONFUSION:")
print(f"  TN={tn}, FP={fp}")
print(f"  FN={fn}, TP={tp}")

# Rapport de classification d√©taill√©
print("\nüìã RAPPORT DE CLASSIFICATION:")
print(classification_report(y_true, y_pred, target_names=['NORMAL', 'PNEUMONIA']))

code
#VSC-dab0682c
python
# -------------------------------------------------------------
# 11) Visualisations CNN
# -------------------------------------------------------------
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Accuracy
axes[0, 0].plot(history.history['accuracy'] + history_ft.history['accuracy'], label='Train')
axes[0, 0].plot(history.history['val_accuracy'] + history_ft.history['val_accuracy'], label='Validation')
axes[0, 0].set_title('Accuracy du mod√®le CNN')
axes[0, 0].set_xlabel('√âpoques')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].axvline(x=EPOCHS_INITIAL, color='r', linestyle='--', label='Fine-tuning start')

# 2. Loss
axes[0, 1].plot(history.history['loss'] + history_ft.history['loss'], label='Train')
axes[0, 1].plot(history.history['val_loss'] + history_ft.history['val_loss'], label='Validation')
axes[0, 1].set_title('Loss du mod√®le CNN')
axes[0, 1].set_xlabel('√âpoques')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].axvline(x=EPOCHS_INITIAL, color='r', linestyle='--')

# 3. AUC
axes[0, 2].plot(history.history['auc'] + history_ft.history['auc'], label='Train')
axes[0, 2].plot(history.history['val_auc'] + history_ft.history['val_auc'], label='Validation')
axes[0, 2].set_title('AUC du mod√®le CNN')
axes[0, 2].set_xlabel('√âpoques')
axes[0, 2].set_ylabel('AUC')
axes[0, 2].legend()
axes[0, 2].axvline(x=EPOCHS_INITIAL, color='r', linestyle='--')

# 4. Matrice de confusion
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 0],
            xticklabels=['NORMAL', 'PNEUMONIA'],
            yticklabels=['NORMAL', 'PNEUMONIA'])
axes[1, 0].set_title('Matrice de Confusion')
axes[1, 0].set_ylabel('Vrai Label')
axes[1, 0].set_xlabel('Pr√©diction')

# 5. Courbe ROC
fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
roc_auc_score_val = auc(fpr, tpr)
axes[1, 1].plot(fpr, tpr, label=f'ROC (AUC = {roc_auc_score_val:.4f})')
axes[1, 1].plot([0, 1], [0, 1], 'k--', label='Hasard')
axes[1, 1].set_title('Courbe ROC')
axes[1, 1].set_xlabel('Taux de Faux Positifs')
axes[1, 1].set_ylabel('Taux de Vrais Positifs')
axes[1, 1].legend()
axes[1, 1].grid(True)

# 6. Distribution des pr√©dictions
axes[1, 2].hist(y_pred_proba[y_true==0], bins=50, alpha=0.5, label='NORMAL', color='green')
axes[1, 2].hist(y_pred_proba[y_true==1], bins=50, alpha=0.5, label='PNEUMONIA', color='red')
axes[1, 2].axvline(x=0.5, color='black', linestyle='--', label='Seuil')
axes[1, 2].set_title('Distribution des Probabilit√©s Pr√©dites')
axes[1, 2].set_xlabel('Probabilit√©')
axes[1, 2].set_ylabel('Fr√©quence')
axes[1, 2].legend()

plt.tight_layout()
plt.savefig('cnn_evaluation_complete.png', dpi=300, bbox_inches='tight')
plt.show()

code
#VSC-efc63b77
python
# -------------------------------------------------------------
# 12) Extraction de features pour SVM & Random Forest
# -------------------------------------------------------------
print("\nüîÑ EXTRACTION DE FEATURES POUR SVM ET RANDOM FOREST")
print("="*60)

def extract_features(generator):
    """Extrait les features du mod√®le CNN pour les mod√®les ML classiques"""
    features, labels = [], []
    steps = generator.samples // generator.batch_size + 1

    for i, (imgs, lbls) in enumerate(generator):
        if i >= steps:
            break
        feats = base_model.predict(imgs, verbose=0)
        features.append(np.mean(feats, axis=(1,2)))
        labels.extend(lbls)

    return np.vstack(features), np.array(labels)

print("Extraction des features d'entra√Ænement...")
X_train, y_train = extract_features(train_gen)

print("Extraction des features de validation...")
val_gen_for_features = train_datagen.flow_from_directory(
    WORK_DIR + '/train',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=False,
    seed=SEED
)
X_val, y_val = extract_features(val_gen_for_features)

print("Extraction des features de test...")
test_gen.reset()
X_test, y_test = extract_features(test_gen)

print(f"\n‚úÖ Features extraites:")
print(f"  Train: {X_train.shape}")
print(f"  Val:   {X_val.shape}")
print(f"  Test:  {X_test.shape}")

code
#VSC-3437c76f
python
# -------------------------------------------------------------
# 13) SVM avec GridSearchCV (OPTIMIS√â)
# -------------------------------------------------------------
print("\nüîç ENTRA√éNEMENT SVM AVEC GRIDSEARCHCV")
print("="*60)

param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto', 0.001, 0.01]
}

svm = SVC(probability=True, random_state=SEED)
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=SEED)

start_time = time.time()

grid_svm = GridSearchCV(
    svm,
    param_grid_svm,
    cv=cv,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=2
)
grid_svm.fit(X_train, y_train)

svm_training_time = time.time() - start_time

best_svm = grid_svm.best_estimator_

print(f"\n‚úÖ Meilleurs hyperparam√®tres SVM:")
print(f"  {grid_svm.best_params_}")
print(f"‚è±Ô∏è  Temps d'entra√Ænement SVM: {svm_training_time/60:.2f} minutes")

# √âvaluation SVM sur test
y_pred_svm = best_svm.predict(X_test)
y_pred_svm_proba = best_svm.predict_proba(X_test)[:, 1]

svm_acc = accuracy_score(y_test, y_pred_svm)
svm_auc = roc_auc_score(y_test, y_pred_svm_proba)
svm_precision, svm_recall, svm_f1, _ = precision_recall_fscore_support(
    y_test, y_pred_svm, average='binary'
)

print(f"\nüìà R√âSULTATS SVM SUR TEST:")
print(f"  ‚Ä¢ Accuracy:  {svm_acc:.4f}")
print(f"  ‚Ä¢ AUC:       {svm_auc:.4f}")
print(f"  ‚Ä¢ Precision: {svm_precision:.4f}")
print(f"  ‚Ä¢ Recall:    {svm_recall:.4f}")
print(f"  ‚Ä¢ F1-Score:  {svm_f1:.4f}")

code
#VSC-85f6e060
python
# -------------------------------------------------------------
# 14) Random Forest avec GridSearchCV (OPTIMIS√â)
# -------------------------------------------------------------
print("\nüå≤ ENTRA√éNEMENT RANDOM FOREST AVEC GRIDSEARCHCV")
print("="*60)

param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=SEED, n_jobs=-1)

start_time = time.time()

grid_rf = GridSearchCV(
    rf,
    param_grid_rf,
    cv=cv,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=2
)
grid_rf.fit(X_train, y_train)

rf_training_time = time.time() - start_time

best_rf = grid_rf.best_estimator_

print(f"\n‚úÖ Meilleurs hyperparam√®tres Random Forest:")
print(f"  {grid_rf.best_params_}")
print(f"‚è±Ô∏è  Temps d'entra√Ænement RF: {rf_training_time/60:.2f} minutes")

# √âvaluation RF sur test
y_pred_rf = best_rf.predict(X_test)
y_pred_rf_proba = best_rf.predict_proba(X_test)[:, 1]

rf_acc = accuracy_score(y_test, y_pred_rf)
rf_auc = roc_auc_score(y_test, y_pred_rf_proba)
rf_precision, rf_recall, rf_f1, _ = precision_recall_fscore_support(
    y_test, y_pred_rf, average='binary'
)

print(f"\nüìà R√âSULTATS RANDOM FOREST SUR TEST:")
print(f"  ‚Ä¢ Accuracy:  {rf_acc:.4f}")
print(f"  ‚Ä¢ AUC:       {rf_auc:.4f}")
print(f"  ‚Ä¢ Precision: {rf_precision:.4f}")
print(f"  ‚Ä¢ Recall:    {rf_recall:.4f}")
print(f"  ‚Ä¢ F1-Score:  {rf_f1:.4f}")

code
#VSC-49d4f94f
python

# -------------------------------------------------------------
# 15) COMPARAISON FINALE DES MOD√àLES (CORRIG√âE)
# -------------------------------------------------------------
print("\nüèÜ COMPARAISON FINALE DES MOD√àLES")
print("="*60)

# Cr√©er DataFrame de comparaison
results_df = pd.DataFrame({
    'Mod√®le': ['CNN (MobileNetV2)', 'SVM', 'Random Forest'],
    'Accuracy': [test_acc, svm_acc, rf_acc],
    'AUC': [test_auc, svm_auc, rf_auc],
    'Precision': [test_precision, svm_precision, rf_precision],
    'Recall': [test_recall, svm_recall, rf_recall],
    'F1-Score': [f1, svm_f1, rf_f1],
    'Temps (min)': [
        total_training_time/60,
        svm_training_time/60,
        rf_training_time/60
    ]
})

print(results_df.to_string(index=False))

# Identifier le meilleur mod√®le
best_model_idx = results_df['AUC'].idxmax()
best_model_name = results_df.loc[best_model_idx, 'Mod√®le']

print(f"\nü•á MEILLEUR MOD√àLE: {best_model_name}")
print(f"   AUC: {results_df.loc[best_model_idx, 'AUC']:.4f}")

code
#VSC-c641b9f1
python
# -------------------------------------------------------------
# 16) Visualisation Comparative Finale
# -------------------------------------------------------------
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Graphique 1: Comparaison des Accuracy
axes[0, 0].bar(results_df['Mod√®le'], results_df['Accuracy'], color=['#3498db', '#e74c3c', '#2ecc71'])
axes[0, 0].set_title('Comparaison des Accuracy', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_ylim([0.5, 1.0])
for i, v in enumerate(results_df['Accuracy']):
    axes[0, 0].text(i, v + 0.01, f'{v:.3f}', ha='center', fontweight='bold')

# Graphique 2: Comparaison des AUC
axes[0, 1].bar(results_df['Mod√®le'], results_df['AUC'], color=['#3498db', '#e74c3c', '#2ecc71'])
axes[0, 1].set_title('Comparaison des AUC', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('AUC')
axes[0, 1].set_ylim([0.5, 1.0])
for i, v in enumerate(results_df['AUC']):
    axes[0, 1].text(i, v + 0.01, f'{v:.3f}', ha='center', fontweight='bold')

# Graphique 3: Temps d'entra√Ænement
axes[1, 0].bar(results_df['Mod√®le'], results_df['Temps (min)'], color=['#3498db', '#e74c3c', '#2ecc71'])
axes[1, 0].set_title('Temps d\'entra√Ænement', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('Minutes')
for i, v in enumerate(results_df['Temps (min)']):
    axes[1, 0].text(i, v + 0.5, f'{v:.1f}', ha='center', fontweight='bold')

# Graphique 4: Comparaison Precision/Recall/F1
x = np.arange(len(results_df))
width = 0.25

axes[1, 1].bar(x - width, results_df['Precision'], width, label='Precision', color='#3498db')
axes[1, 1].bar(x, results_df['Recall'], width, label='Recall', color='#e74c3c')
axes[1, 1].bar(x + width, results_df['F1-Score'], width, label='F1-Score', color='#2ecc71')
axes[1, 1].set_title('Comparaison Precision/Recall/F1', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Score')
axes[1, 1].set_xticks(x)
axes[1, 1].set_xticklabels(results_df['Mod√®le'], rotation=15, ha='right')
axes[1, 1].legend()
axes[1, 1].set_ylim([0.5, 1.0])

plt.tight_layout()
plt.savefig('comparaison_finale_modeles.png', dpi=300, bbox_inches='tight')
plt.show()

code
#VSC-39cfbe35
python
# -------------------------------------------------------------
# 17) Comparaison des Matrices de Confusion
# -------------------------------------------------------------
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# CNN
cm_cnn = confusion_matrix(y_true, y_pred)
sns.heatmap(cm_cnn, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=['NORMAL', 'PNEUMONIA'],
            yticklabels=['NORMAL', 'PNEUMONIA'])
axes[0].set_title(f'CNN (Acc: {test_acc:.3f})', fontweight='bold')
axes[0].set_ylabel('Vrai Label')
axes[0].set_xlabel('Pr√©diction')

# SVM
cm_svm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Reds', ax=axes[1],
            xticklabels=['NORMAL', 'PNEUMONIA'],
            yticklabels=['NORMAL', 'PNEUMONIA'])
axes[1].set_title(f'SVM (Acc: {svm_acc:.3f})', fontweight='bold')
axes[1].set_ylabel('Vrai Label')
axes[1].set_xlabel('Pr√©diction')

# Random Forest
cm_rf = confusion_matrix(y_test, y_pred_rf)
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Greens', ax=axes[2],
            xticklabels=['NORMAL', 'PNEUMONIA'],
            yticklabels=['NORMAL', 'PNEUMONIA'])
axes[2].set_title(f'Random Forest (Acc: {rf_acc:.3f})', fontweight='bold')
axes[2].set_ylabel('Vrai Label')
axes[2].set_xlabel('Pr√©diction')

plt.tight_layout()
plt.savefig('matrices_confusion_comparaison.png', dpi=300, bbox_inches='tight')
plt.show()
code
#VSC-0cb772f4
python
# -------------------------------------------------------------
# 18) Courbes ROC Comparatives
# -------------------------------------------------------------
plt.figure(figsize=(10, 8))

# CNN
fpr_cnn, tpr_cnn, _ = roc_curve(y_true, y_pred_proba)
plt.plot(fpr_cnn, tpr_cnn, label=f'CNN (AUC = {test_auc:.4f})', linewidth=2)

# SVM
fpr_svm, tpr_svm, _ = roc_curve(y_test, y_pred_svm_proba)
plt.plot(fpr_svm, tpr_svm, label=f'SVM (AUC = {svm_auc:.4f})', linewidth=2)

# Random Forest
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf_proba)
plt.plot(fpr_rf, tpr_rf, label=f'Random Forest (AUC = {rf_auc:.4f})', linewidth=2)

# Ligne de hasard
plt.plot([0, 1], [0, 1], 'k--', label='Hasard (AUC = 0.5)', linewidth=1)

plt.xlabel('Taux de Faux Positifs', fontsize=12)
plt.ylabel('Taux de Vrais Positifs', fontsize=12)
plt.title('Courbes ROC Comparatives des Trois Mod√®les', fontsize=14, fontweight='bold')
plt.legend(loc='lower right', fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('courbes_roc_comparaison.png', dpi=300, bbox_inches='tight')
plt.show()
code
#VSC-d989735b
python
# -------------------------------------------------------------
# 19) Sauvegarde des Mod√®les
# -------------------------------------------------------------
print("\nüíæ SAUVEGARDE DES MOD√àLES")
print("="*60)

# Sauvegarder le meilleur mod√®le CNN
cnn_model.save('best_cnn_model_final.h5')
print("‚úÖ Mod√®le CNN sauvegard√©: best_cnn_model_final.h5")

# Sauvegarder SVM et RF avec joblib
import joblib
joblib.dump(best_svm, 'best_svm_model.pkl')
joblib.dump(best_rf, 'best_rf_model.pkl')
print("‚úÖ Mod√®le SVM sauvegard√©: best_svm_model.pkl")
print("‚úÖ Mod√®le RF sauvegard√©: best_rf_model.pkl")

# Sauvegarder les r√©sultats
results_df.to_csv('resultats_comparaison_modeles.csv', index=False)
print("‚úÖ R√©sultats sauvegard√©s: resultats_comparaison_modeles.csv")

code
#VSC-35f0bb4c
python
from google.colab import files

print("T√©l√©chargement des fichiers...")

try:
    files.download('best_cnn_model_final.h5')
    files.download('best_svm_model.pkl')
    files.download('best_rf_model.pkl')
    files.download('resultats_comparaison_modeles.csv')
    print("\n‚úÖ Fichiers pr√™ts √† √™tre t√©l√©charg√©s.")
except Exception as e:
    print(f"Une erreur est survenue lors du t√©l√©chargement : {e}")

print("\nUne fois les fichiers t√©l√©charg√©s, vous pourrez les utiliser dans votre environnement local pour votre application Streamlit.")
code
#VSC-68cca564
python
# -------------------------------------------------------------
# 20) Analyse des Erreurs du Meilleur Mod√®le
# -------------------------------------------------------------
print("\nüîç ANALYSE DES ERREURS DU MEILLEUR MOD√àLE")
print("="*60)

# Utiliser le mod√®le CNN (g√©n√©ralement le meilleur)
test_gen.reset()

# R√©cup√©rer les noms de fichiers
filenames = test_gen.filenames
y_true_labels = ['PNEUMONIA' if y == 1 else 'NORMAL' for y in y_true]
y_pred_labels = ['PNEUMONIA' if y == 1 else 'NORMAL' for y in y_pred]

# Identifier les erreurs
errors_idx = np.where(y_true != y_pred)[0]
print(f"Nombre total d'erreurs: {len(errors_idx)} / {len(y_true)} ({len(errors_idx)/len(y_true)*100:.2f}%)")

# Faux Positifs (pr√©dit PNEUMONIA alors que NORMAL)
fp_idx = np.where((y_true == 0) & (y_pred == 1))[0]
print(f"Faux Positifs: {len(fp_idx)}")

# Faux N√©gatifs (pr√©dit NORMAL alors que PNEUMONIA)
fn_idx = np.where((y_true == 1) & (y_pred == 0))[0]
print(f"Faux N√©gatifs: {len(fn_idx)}")

# Cr√©er un DataFrame des erreurs
errors_df = pd.DataFrame({
    'Fichier': [filenames[i] for i in errors_idx],
    'Vrai_Label': [y_true_labels[i] for i in errors_idx],
    'Pred_Label': [y_pred_labels[i] for i in errors_idx],
    'Confiance': [y_pred_proba[i][0] for i in errors_idx]
})

print("\nüìã Exemples d'erreurs:")
print(errors_df.head(10))

# Visualiser quelques erreurs
fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.ravel()

for i, idx in enumerate(errors_idx[:10]):
    img_path = os.path.join(WORK_DIR, 'test', filenames[idx])
    img = plt.imread(img_path)

    axes[i].imshow(img, cmap='gray')
    axes[i].axis('off')
    axes[i].set_title(
        f"Vrai: {y_true_labels[idx]}\n"
        f"Pred: {y_pred_labels[idx]}\n"
        f"Conf: {y_pred_proba[idx][0]:.2f}",
        fontsize=10,
        color='red'
    )

plt.suptitle('Exemples d\'Erreurs de Classification', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('analyse_erreurs.png', dpi=300, bbox_inches='tight')
plt.show()

code
#VSC-aa166b85
python

# -------------------------------------------------------------
# 21) R√©sum√© Final pour le Rapport
# -------------------------------------------------------------
print("\n" + "="*70)
print(" " * 20 + "üìä R√âSUM√â FINAL DU PROJET")
print("="*70)

print(f"\nüéØ OBJECTIF: D√©tection de Pneumonie par Radiographie Thoracique")
print(f"\nüìÅ DONN√âES:")
print(f"  ‚Ä¢ Dataset: Chest X-Ray Images (Pneumonia)")
print(f"  ‚Ä¢ Training samples: {train_gen.samples}")
print(f"  ‚Ä¢ Validation samples: {val_gen.samples}")
print(f"  ‚Ä¢ Test samples: {test_gen.samples}")
print(f"  ‚Ä¢ Classes: NORMAL (0), PNEUMONIA (1)")

print(f"\nü§ñ MOD√àLES COMPAR√âS:")
print(f"  1. CNN avec Transfer Learning (MobileNetV2)")
print(f"     - Param√®tres totaux: {cnn_model.count_params():,}")
print(f"     - Temps d'entra√Ænement: {total_training_time/60:.2f} min")
print(f"  2. SVM avec features CNN")
print(f"     - Temps d'entra√Ænement: {svm_training_time/60:.2f} min")
print(f"  3. Random Forest avec features CNN")
print(f"     - Temps d'entra√Ænement: {rf_training_time/60:.2f} min")

print(f"\nüèÜ MEILLEUR MOD√àLE: {best_model_name}")
print(f"\nüìà PERFORMANCES DU MEILLEUR MOD√àLE (Test Set):")
best_idx = results_df['AUC'].idxmax()
print(f"  ‚Ä¢ Accuracy:    {results_df.loc[best_idx, 'Accuracy']:.4f} ({results_df.loc[best_idx, 'Accuracy']*100:.2f}%)")
print(f"  ‚Ä¢ AUC:         {results_df.loc[best_idx, 'AUC']:.4f}")
print(f"  ‚Ä¢ Precision:   {results_df.loc[best_idx, 'Precision']:.4f}")
print(f"  ‚Ä¢ Recall:      {results_df.loc[best_idx, 'Recall']:.4f}")
print(f"  ‚Ä¢ F1-Score:    {results_df.loc[best_idx, 'F1-Score']:.4f}")

print(f"\n‚ö†Ô∏è  ANALYSE CLINIQUE:")
print(f"  ‚Ä¢ Faux Positifs: {len(fp_idx)} (patients sains diagnostiqu√©s malades)")
print(f"  ‚Ä¢ Faux N√©gatifs: {len(fn_idx)} (patients malades non d√©tect√©s)")
print(f"  ‚Ä¢ Note: En contexte m√©dical, minimiser les Faux N√©gatifs est CRITIQUE")

print(f"\nüí° RECOMMANDATIONS:")
print(f"  ‚Ä¢ Le mod√®le peut servir d'outil d'aide au diagnostic")
print(f"  ‚Ä¢ Validation m√©dicale obligatoire avant d√©cision clinique")
print(f"  ‚Ä¢ Surveillance continue des performances en production")

print("\n" + "="*70)
print(f"Projet compl√©t√© le {datetime.now().strftime('%Y-%m-%d √† %H:%M:%S')}")
print("="*70)

code
#VSC-d90555e5
python

code
#VSC-3985f08e
python
# -------------------------------------------------------------
# Export explicite des DataFrames importants vers CSV
# -------------------------------------------------------------
import os
import pandas as pd
out_dir = 'notebook_outputs/tables'
os.makedirs(out_dir, exist_ok=True)
# Noms explicites si pr√©sents
to_save = {}
if 'results_df' in globals():
    to_save['results_df'] = results_df
if 'errors_df' in globals():
    to_save['errors_df'] = errors_df
# Chercher toutes les DataFrame pandas dans l'espace global
for name, val in list(globals().items()):
    try:
        if isinstance(val, pd.DataFrame) and name not in to_save:
            to_save[name] = val
    except Exception:
        pass
# Sauvegarder
for n, df in to_save.items():
    path = os.path.join(out_dir, f'{n}.csv')
    try:
        df.to_csv(path, index=False)
        print(f'Saved {path}')
    except Exception as e:
        print(f'Could not save {n}: {e}')
print('Export termin√©')


TensorFlow version: 2.20.0
GPU disponible: []


NameError: name 'code' is not defined