In [None]:
# Réinstallation propre des bonnes versions
!pip uninstall -y scikit-learn imbalanced-learn
!pip install scikit-learn==1.3.2 imbalanced-learn==0.11.0

In [None]:
import os
 import cv2
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 import tensorflow as tf
 import random
 from tensorflow.keras.applications import Xception
 from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection import train_test_split
 from tensorflow.keras.utils import to_categorical
 from imblearn.over_sampling import RandomOverSampler
 from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
 from tensorflow.keras.models import Model

In [None]:
sns.set(style="whitegrid")

In [None]:
metadata = pd.read_csv("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv")
image_dir_1 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/"
image_dir_2 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2/"
 
metadata.head()

In [None]:
sex_counts = metadata["sex"].value_counts()

In [None]:
plt.figure(figsize=(6, 6))
bars = plt.bar(sex_counts.index, sex_counts.values, color=['lightblue', 'pink'])
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 10, f"{int(yval)}", ha='center', fontsize=12, fontweight='bold')
plt.title("Répartition des patients par sexe", fontsize=14)
plt.ylabel("Nombre de patients")
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
ax = sns.countplot(x=metadata["dx"], order=metadata["dx"].value_counts().index, palette="Set2")
for p in ax.patches:
    height = int(p.get_height())  # Conversion en entier
    ax.annotate(f'{height}', 
                (p.get_x() + p.get_width() / 2., height),
                ha='center', va='bottom', fontsize=12)
plt.title(" Distribution des classes (types de maladies)", fontsize=14)
plt.xlabel("Type de lésion")
plt.ylabel("Nombre d'images")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show() 

In [None]:
plt.figure(figsize=(10, 6))
ax = sns.countplot(
    data=metadata,
    y='dx',  # axe inversé
    hue='sex',
    order=metadata['dx'].value_counts().index,
    palette='Set2'
 )
for container in ax.containers:
    ax.bar_label(container, fmt='%d', label_type='edge', padding=3, fontsize=10)
plt.legend(
    title="Sexe",
    loc='center left',     
    bbox_to_anchor=(1.02, 0.5),  
    borderaxespad=0,
    fontsize=11
 )
plt.title("Répartition des maladies selon le sexe", fontsize=14)
plt.xlabel("Nombre de cas")
plt.ylabel("Type de lésion")
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(15, 9))  #  Plus large et plus haut
ax = sns.countplot(
    data=metadata,
    y='localization',  #  Axe inversé
    hue='sex',
    order=metadata['localization'].value_counts().index,
    palette="pastel"
 )
for container in ax.containers:
    ax.bar_label(container, fmt='%d', label_type='edge', padding=3, fontsize=11)
plt.legend(
    title="Sexe",
    loc='center left',
    bbox_to_anchor=(1.02, 0.5),
    borderaxespad=0,
    fontsize=12
 )
plt.title("Localisation des lésions selon le sexe", fontsize=16)
plt.xlabel("Nombre de cas", fontsize=13)
plt.ylabel("Localisation", fontsize=13)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout(rect=[0, 0, 0.85, 1])  
plt.show()

In [None]:
def load_image(image_id):
    path1 = os.path.join(image_dir_1, image_id + ".jpg")
    path2 = os.path.join(image_dir_2, image_id + ".jpg")
    img_path = path1 if os.path.exists(path1) else path2
    img = cv2.imread(img_path)
    if img is not None:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (72, 72))
        return img.astype("float32") / 255.0
    return None

images, labels = [], []
for _, row in metadata.iterrows():
    img = load_image(row["image_id"])
    if img is not None:
        images.append(img)
        labels.append(row["dx"])
X = np.array(images)
le = LabelEncoder()
y = le.fit_transform(labels)
class_names = le.classes_
y_cat = to_categorical(y)
print("Dimensions des images :", X.shape)
print("Noms des classes :", class_names)

In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from tensorflow.keras.utils import to_categorical
from collections import Counter
import numpy as np

In [None]:
# Aplatir les images
X_flat = X.reshape(len(X), -1)

# Comptage initial des classes
counter = Counter(y)
print("Distribution initiale :", counter)

# Objectif : 5000 échantillons pour chaque classe
desired_count = 5000

# Étape 1 : SMOTE pour augmenter les classes minoritaires
smote_strategy = {cls: desired_count for cls, count in counter.items() if count < desired_count}
smote = SMOTE(sampling_strategy=smote_strategy, random_state=42)
X_smote, y_smote = smote.fit_resample(X_flat, y)

# Étape 2 : Sous-échantillonnage des classes > 5000
final_strategy = {cls: desired_count for cls in np.unique(y_smote)}
rus = RandomUnderSampler(sampling_strategy=final_strategy, random_state=42)
X_bal, y_bal = rus.fit_resample(X_smote, y_smote)

# Reformater en images
X_bal = X_bal.reshape(-1, 72, 72, 3)
y_bal_cat = to_categorical(y_bal)

# ✅ Vérification
print("✅ Distribution finale :", Counter(y_bal))
print("✅ Dimensions équilibrées :", X_bal.shape)
print("✅ Labels one-hot :", y_bal_cat.shape)

In [None]:
counts_after = pd.Series(y_bal).value_counts().sort_index()
labels_after = pd.Index([class_names[i] for i in counts_after.index]) 

In [None]:
plt.figure(figsize=(10, 6))
ax = sns.barplot(x=labels_after, y=counts_after.values, palette="Set3")
plt.title("Distribution des classes après équilibrage (Oversampling)", fontsize=14)
plt.xlabel("Classe")
plt.ylabel("Nombre d'images")
for i, val in enumerate(counts_after.values):
 ax.text(i, val + 5, str(int(val)), ha='center', va='bottom', fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal_cat, test_size=0.2, random_state=42)
print(" Taille de l'ensemble d'entraînement :", X_train.shape)
print(" Taille de l'ensemble de test :", X_test.shape)

print("y_train shape :", y_train.shape)

In [None]:
plt.figure(figsize=(16, 4))
for i in range(7):
    indices = np.where(np.argmax(y_train, axis=1) == i)[0]
    
    idx = random.choice(indices)
    
    plt.subplot(1, 7, i + 1)
    plt.imshow(X_train[idx])
    plt.axis("off")
    plt.title(class_names[i].upper(), fontweight='bold', fontsize=14)  
plt.suptitle("Exemple par classe", fontsize=18, fontweight='bold')  
plt.tight_layout()
plt.show()

In [None]:
import tensorflow as tf
import numpy as np

def light_augment_smote(image):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.08)
    image = tf.image.random_contrast(image, 0.95, 1.05)
    image = tf.image.resize_with_crop_or_pad(image, 76, 76)
    image = tf.image.random_crop(image, size=(72, 72, 3))
    return tf.clip_by_value(image, 0.0, 1.0)
def apply_smote_augmentation(X_input, batch_size=64):
    X_tensor = tf.convert_to_tensor(X_input, dtype=tf.float32)
    ds = tf.data.Dataset.from_tensor_slices(X_tensor)
    ds = ds.map(lambda x: light_augment_smote(x), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return np.concatenate([batch.numpy() for batch in ds], axis=0)
X_bal_aug = apply_smote_augmentation(X_bal)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random
import tensorflow as tf

In [None]:
# Choisis ici ta fonction d'augmentation selon ton contexte
augment_function = light_augment_smote  # ou strong_augment si avant SMOTE
plt.figure(figsize=(14, 4))
for i in range(7):
    indices = np.where(np.argmax(y_train, axis=1) == i)[0]
    idx = random.choice(indices)
    # Image originale
    plt.subplot(2, 7, i + 1)
    plt.imshow(X_train[idx])
    plt.axis("off")
    plt.title(class_names[i].upper() + "\n(Originale)", fontweight='bold', fontsize=11)
    # Image augmentée
    augmented = augment_function(tf.convert_to_tensor(X_train[idx])).numpy()
    plt.subplot(2, 7, i + 8)
    plt.imshow(augmented)
    plt.axis("off")
    plt.title("(Augmentée)", fontweight='bold', fontsize=11)
plt.suptitle("Exemple par classe — Originale vs Augmentée", fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
def create_vit_xception(input_shape=(72, 72, 3), num_classes=7):
    inputs = tf.keras.Input(shape=input_shape, name="input_image")
    # --- ViT --
    patches = tf.keras.layers.Conv2D(64, kernel_size=6, strides=6, padding='valid')(inputs)
    reshaped = tf.keras.layers.Reshape((-1, 64))(patches)
    positions = tf.range(start=0, limit=reshaped.shape[1])
    pos_embed = tf.keras.layers.Embedding(input_dim=reshaped.shape[1], output_dim=64)(positions)
    encoded = reshaped + pos_embed
    for _ in range(4):
        x1 = tf.keras.layers.LayerNormalization()(encoded)
        attn = tf.keras.layers.MultiHeadAttention(num_heads=4, key_dim=64)(x1, x1)
        x2 = tf.keras.layers.Add()([encoded, attn])
        x3 = tf.keras.layers.LayerNormalization()(x2)
        mlp = tf.keras.layers.Dense(64, activation='gelu')(x3)
        encoded = tf.keras.layers.Add()([x2, mlp])
    vit_output = tf.keras.layers.Flatten()(encoded)
    # --- Xception --
    base_xcp = Xception(include_top=False, weights='imagenet', input_tensor=inputs)
    base_xcp.trainable = True
    xcp_output = tf.keras.layers.GlobalAveragePooling2D()(base_xcp.output)
    # --- Fusion --
    fusion = tf.keras.layers.Concatenate()([vit_output, xcp_output])
    fusion = tf.keras.layers.BatchNormalization()(fusion)
    fusion = tf.keras.layers.Dense(256, activation='gelu',
                                   kernel_regularizer=tf.keras.regularizers.l2(1e-4))(fusion)
    fusion = tf.keras.layers.Dropout(0.5)(fusion)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(fusion)
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name="ViT_Xception_HAM10000")
    return model
model = create_vit_xception()

In [None]:
# Compilation du modèle
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss='categorical_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
 )
model.summary()

In [None]:
callbacks = [
    ModelCheckpoint("best_vit_xception.keras", 
                    save_best_only=True, 
                    monitor="val_accuracy", 
                    mode="max", 
                    verbose=1),
    
    EarlyStopping(monitor="val_accuracy", 
                  patience=4, 
                  restore_best_weights=True, 
                  verbose=1),
    
    ReduceLROnPlateau(monitor='val_loss', 
                      factor=0.5, 
                      patience=2, 
                      verbose=1, 
                      min_lr=1e-6)
 ]
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=64,
    epochs=50,
    callbacks=callbacks
 )

In [None]:
import matplotlib.pyplot as plt
history_dict = history.history
epochs = range(1, len(history_dict['loss']) + 1)

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, history_dict['loss'], 'b--o', label='Perte entraînement')
plt.plot(epochs, history_dict['val_loss'], 'orange', marker='s', label='Perte validation')
plt.xlabel('Époques')
plt.ylabel('Perte')
plt.title(' Courbe de Perte')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(epochs, history_dict['accuracy'], 'b--o', label='Précision entraînement')
plt.plot(epochs, history_dict['val_accuracy'], 'green', marker='s', label='Précision validation')
plt.xlabel('Époques')
plt.ylabel('Précision')
plt.title(' Courbe de Précision')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
test_loss, test_acc, test_precision, test_recall = model.evaluate(X_test, y_test, verbose=1)
print(f"\n Accuracy test : {test_acc:.4f}")
print(f" Précision test : {test_precision:.4f}")
print(f" Rappel test : {test_recall:.4f}")

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

class_labels = ['BKL', 'NV', 'DF', 'MEL', 'VASC', 'BCC', 'AKIEC']
print("\n Rapport de classification :")
print(classification_report(y_true, y_pred_classes, target_names=class_labels))

In [None]:
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel("Prédit")
plt.ylabel("Réel")
plt.title(" Matrice de Confusion")
plt.tight_layout()
plt.show()

In [None]:
TP = np.diag(cm)
FP = np.sum(cm, axis=0) - TP
FN = np.sum(cm, axis=1) - TP
TN = np.sum(cm) - (TP + FP + FN)

exactitude = np.mean((TP + TN) / (TP + TN + FP + FN))  # = Accuracy
precision = np.mean(TP / (TP + FP + 1e-10))
rappel = np.mean(TP / (TP + FN + 1e-10))
f1_score = np.mean((2 * precision * rappel) / (precision + rappel + 1e-10))

metrics_names = ["Exactitude", "Précision", "Rappel", "F1-Score"]
metrics_values = [exactitude, precision, rappel, f1_score]
colors = ['limegreen', 'forestgreen', 'seagreen', 'mediumseagreen']

In [None]:
plt.figure(figsize=(10, 6))
bars = plt.bar(metrics_names, metrics_values, color=colors, alpha=0.8)
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.03, f"{yval:.4f}",
             ha='center', fontsize=14, fontweight='bold')
plt.ylim(0, 1.1)
plt.title("Performances Globales", fontsize=16, fontweight='bold')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
report = classification_report(y_true, y_pred_classes, output_dict=True)
classes = list(report.keys())[:-3]
precision = [report[c]['precision'] for c in classes]
recall = [report[c]['recall'] for c in classes]
f1 = [report[c]['f1-score'] for c in classes]

x = np.arange(len(classes))
fig, ax = plt.subplots(1, 3, figsize=(18, 5))
bars1 = ax[0].bar(x, precision, color='royalblue', alpha=0.8)
ax[0].set_title("Précision par classe", fontweight='bold', fontsize=14)
ax[0].set_xticks(x)
ax[0].set_xticklabels(class_labels, rotation=45, fontweight='bold')
for bar in bars1:
    ax[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
               f"{bar.get_height():.2f}", ha='center', color='red', fontweight='bold', fontsize=12)

bars2 = ax[1].bar(x, recall, color='orange', alpha=0.8)
ax[1].set_title("Rappel par classe", fontweight='bold', fontsize=14)
ax[1].set_xticks(x)
ax[1].set_xticklabels(class_labels, rotation=45, fontweight='bold')
for bar in bars2:
    ax[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
               f"{bar.get_height():.2f}", ha='center', color='red', fontweight='bold', fontsize=12)

bars3 = ax[2].bar(x, f1, color='green', alpha=0.8)
ax[2].set_title("F1-Score par classe", fontweight='bold', fontsize=14)
ax[2].set_xticks(x)
ax[2].set_xticklabels(class_labels, rotation=45, fontweight='bold')
for bar in bars3:
    ax[2].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
               f"{bar.get_height():.2f}", ha='center', color='red', fontweight='bold', fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
correct = np.zeros(len(class_labels))
incorrect = np.zeros(len(class_labels))
for i in range(len(y_true)):
    true_cls = y_true[i]
    pred_cls = y_pred_classes[i]
    if true_cls == pred_cls:
        correct[true_cls] += 1
    else:
        incorrect[true_cls] += 1

plt.figure(figsize=(12, 7))
bar_width = 0.4
x_indexes = np.arange(len(class_labels))

bars_correct = plt.bar(x_indexes - bar_width/2, correct, width=bar_width, label='Bien classé', color='green')
bars_incorrect = plt.bar(x_indexes + bar_width/2, incorrect, width=bar_width, label='Mal classé', color='red')

for i in range(len(class_labels)):
    plt.text(x_indexes[i] - bar_width/2, correct[i] + 2, f"{int(correct[i])}", ha='center', va='bottom', fontsize=11, color='green', fontwei)
    
    if incorrect[i] > 0:
        plt.text(x_indexes[i] + bar_width/2, incorrect[i] + 2, f"{int(incorrect[i])}", ha='center', va='bottom', fontsize=11, color='red', f)

plt.xticks(ticks=x_indexes, labels=class_labels, rotation=45, fontsize=12)
plt.xlabel("Classe", fontsize=14)
plt.ylabel("Nombre d'images", fontsize=14)
plt.title("Prédictions Correctes vs Incorrectes par Classe", fontsize=16)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout(rect=[0, 0, 0.85, 1])
plt.show()

In [None]:
y_pred_proba = model.predict(X_test)
fpr, tpr, roc_auc = {}, {}, {}
for i in range(y_test.shape[1]):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(10, 8))
colors = ['blue', 'green', 'red', 'orange', 'purple', 'brown', 'pink']
for i in range(y_test.shape[1]):
    plt.plot(fpr[i], tpr[i], lw=2, color=colors[i], label=f"{class_labels[i]} (AUC = {roc_auc[i]:.4f})")

plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlabel("Taux de Faux Positifs (FPR)")
plt.ylabel("Taux de Vrais Positifs (TPR)")
plt.title("Courbes ROC par classe")
plt.legend(loc='lower right')
plt.grid(True, linestyle="--", alpha=0.6)
plt.show()

In [None]:
 # Affichage des AUC
print("\n🔹Valeurs AUC par classe :")
for i, cls in enumerate(class_labels):
    print(f"{cls} : AUC = {roc_auc[i]:.4f}")

In [None]:
def extract_gradcam_insights(img_array, class_index, model, class_name, threshold=0.3):
    last_conv_layer = model.get_layer("block14_sepconv2_act")
    grad_model = Model(inputs=model.input, outputs=[last_conv_layer.output, model.output])
    
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, class_index]
    
    grads = tape.gradient(loss, conv_outputs)
    if grads is None:
        return None, None, 0.0, np.zeros((72, 72))
    
    conv_outputs = conv_outputs[0]
    grads = grads[0]
    weights = tf.reduce_mean(tf.nn.relu(grads), axis=(0, 1))
    cam = tf.reduce_sum(weights * conv_outputs, axis=-1)
    heatmap = tf.nn.relu(cam)
    heatmap = heatmap / (tf.reduce_max(heatmap) + 1e-10)
    heatmap_np = heatmap.numpy()
    
    heatmap_resized = cv2.resize(heatmap_np, (72, 72))
    
    # Calcul du centre
    y_idx, x_idx = np.unravel_index(np.argmax(heatmap_resized), heatmap_resized.shape)
    heatmap_center = (x_idx, y_idx)
    
    # Bounding box et surface
    binary_mask = heatmap_resized > threshold
    coords = np.argwhere(binary_mask)
    if coords.shape[0] == 0:
        return heatmap_center, None, 0.0, heatmap_resized

plt.figure(figsize=(20, 6))
y_true = np.argmax(y_test, axis=1)
images_shown = 0
for i in range(7):
    found = False
    attempts = 0
    indices = np.where(y_true == i)[0]
    while not found and attempts < 50:
        idx = np random choice(indices)