In [1]:
import pandas as pd
import io
import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import VBox

# Chargement automatique du fichier CSV
encodage = "UTF-8"
sep = ","
df_global = None
try:
    df_global = pd.read_csv("cleanedData.csv", encoding=encodage, sep=sep)
    print("📂 Fichier 'cleanedData.csv' chargé automatiquement.")
except Exception as e:
    print("⚠️ Erreur chargement initial :", e)

# Widgets
upload_widget = widgets.FileUpload(accept='.csv', multiple=False)
threshold_widget = widgets.FloatSlider(value=0.3, min=0.0, max=1.0, step=0.05, description='Seuil corr:')
test_size_widget = widgets.FloatSlider(value=0.2, min=0.05, max=0.5, step=0.05, description='Test size:')

target_dropdown = widgets.Dropdown(options=[], description='Col cible :', disabled=False)

model_options = ['LogisticRegression', 'RandomForest', 'XGBoost', 'LightGBM', 'GradientBoosting']
models_checkbox = widgets.SelectMultiple(
    options=model_options,
    value=['LogisticRegression', 'RandomForest'],
    description='Modèles :',
    layout=widgets.Layout(height='120px')
)

figures_options = [
    'Heatmap corrélations',
    'Rapport classification',
    'Matrice confusion (texte)',
    'Matrice confusion (image)',
    'Courbes seuil'
]
figures_checkbox = widgets.SelectMultiple(
    options=figures_options,
    value=['Heatmap corrélations', 'Rapport classification', 'Matrice confusion (image)', 'Courbes seuil'],
    description='Afficher :',
    layout=widgets.Layout(height='140px')
)

run_button = widgets.Button(description='Lancer', button_style='success')
output_area = widgets.VBox()

# Fonctions
def update_target_options():
    global df_global
    if df_global is not None:
        options = df_global.columns.tolist()
        target_dropdown.options = options
        target_dropdown.value = options[-1]

update_target_options()

def handle_upload(change):
    global df_global
    uploaded_file = upload_widget.value
    if uploaded_file:
        content = list(uploaded_file.values())[0]['content']
        try:
            df_global = pd.read_csv(io.BytesIO(content), sep=sep, encoding=encodage)
            update_target_options()
            display_section(lambda: print("✅ Fichier chargé avec succès.\n", df_global.head()))
        except Exception as e:
            display_section(lambda: print("❌ Erreur :", e))

upload_widget.observe(handle_upload, names='value')

def select_features_by_corr(df, target_col, threshold=0.3):
    corr = df.corr(numeric_only=True)[target_col].drop(target_col)
    selected = corr[abs(corr) >= threshold].index.tolist()
    return selected, corr[selected]

def plot_filtered_corr_heatmap(df, target_col, threshold=0.3):
    import seaborn as sns
    import matplotlib.pyplot as plt
    corr = df.corr(numeric_only=True)
    filtered_vars = corr[target_col][corr[target_col].abs() >= threshold].index.tolist()
    if target_col not in filtered_vars:
        filtered_vars.append(target_col)
    corr_filtered = corr.loc[filtered_vars, filtered_vars]
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_filtered, annot=True, cmap='coolwarm', center=0)
    plt.title(f'Heatmap des variables corrélées ≥ {threshold} avec "{target_col}"')
    plt.show()

def pipeline_corr_model(df, target_col, threshold=0.3, test_size=0.2, display_items=None, selected_models=None):
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
    from xgboost import XGBClassifier
    from lightgbm import LGBMClassifier
    from sklearn.metrics import precision_recall_curve, classification_report, confusion_matrix, ConfusionMatrixDisplay

    if display_items is None:
        display_items = ['Rapport classification', 'Matrice confusion (image)', 'Courbes seuil']
    if selected_models is None or len(selected_models) == 0:
        print("⚠️ Aucun modèle sélectionné.")
        return

    selected_features, corrs = select_features_by_corr(df, target_col, threshold)
    if not selected_features:
        print(f"⛔ Aucune variable corrélée ≥ {threshold} avec la cible '{target_col}'.")
        return

    if 'Heatmap corrélations' in display_items:
        plot_filtered_corr_heatmap(df, target_col, threshold)

    print(f"Variables sélectionnées ({len(selected_features)}) avec corrélation ≥ {threshold} :")
    for feat in selected_features:
        print(f" - {feat}: corr={corrs[feat]:.3f}")

    X = df[selected_features]
    y = df[target_col]
    if y.nunique() != 2:
        print(f"⚠️ La cible '{target_col}' n'est pas binaire (classes={y.nunique()}).")
        return

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=test_size, stratify=y, random_state=42)

    model_dict = {
        'LogisticRegression': LogisticRegression(max_iter=1000, random_state=42),
        'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
        'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
        'LightGBM': LGBMClassifier(random_state=42),
        'GradientBoosting': GradientBoostingClassifier(random_state=42)
    }

    for name in selected_models:
        print(f"\n--- Modèle : {name} ---")
        model = model_dict[name]
        model.fit(X_train, y_train)
        y_probs = model.predict_proba(X_valid)[:, 1]

        precision, recall, thresholds = precision_recall_curve(y_valid, y_probs)
        f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
        best_idx = np.argmax(f1_scores[:-1])
        best_threshold = thresholds[best_idx]
        best_f1 = f1_scores[best_idx]

        print(f"Meilleur seuil F1={best_f1:.3f} à {best_threshold:.3f}")
        y_pred = (y_probs >= best_threshold).astype(int)

        if 'Rapport classification' in display_items:
            print(classification_report(y_valid, y_pred))

        if 'Matrice confusion (texte)' in display_items:
            cm = confusion_matrix(y_valid, y_pred)
            print("Matrice de confusion (texte) :")
            print(f"[[TN FP]\n [FN TP]]\n{cm}")

        if 'Matrice confusion (image)' in display_items:
            disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_valid, y_pred))
            disp.plot(cmap='Blues')
            plt.title(f"Matrice de confusion - {name}")
            plt.show()

        if 'Courbes seuil' in display_items:
            plt.figure(figsize=(8, 4))
            plt.plot(thresholds, precision[:-1], label='Precision')
            plt.plot(thresholds, recall[:-1], label='Recall')
            plt.plot(thresholds, f1_scores[:-1], label='F1-score')
            plt.axvline(best_threshold, color='red', linestyle='--', label=f'Seuil optimal = {best_threshold:.2f}')
            plt.title(f"Scores vs seuil - {name}")
            plt.xlabel('Seuil')
            plt.ylabel('Score')
            plt.legend()
            plt.grid(True)
            plt.show()

# Création d'une section indépendante avec bouton de suppression
def create_section_with_delete(content_function):
    output = widgets.Output()
    delete_button = widgets.Button(description="🗑️ Supprimer cette section", button_style="danger")
    def delete_output(b):
        container.layout.display = 'none'
    delete_button.on_click(delete_output)
    with output:
        content_function()
    container = VBox([delete_button, output])
    return container

# Lors du clic sur "Lancer"
def on_run_clicked(b):
    def display_run():
        print("="*80)
        print("🟢 Nouvelle exécution")
        print("="*80)
        if df_global is None:
            print("❌ Aucune donnée disponible.")
            return
        pipeline_corr_model(
            df_global,
            target_col=target_dropdown.value,
            threshold=threshold_widget.value,
            test_size=test_size_widget.value,
            display_items=figures_checkbox.value,
            selected_models=models_checkbox.value
        )
    section = create_section_with_delete(display_run)
    output_area.children = (section,) + output_area.children  # ← ajoute en haut

run_button.on_click(on_run_clicked)

# Interface utilisateur
display(widgets.VBox([
    widgets.Label("⬆️ Charger un fichier CSV (optionnel) :"),
    upload_widget,
    target_dropdown,
    threshold_widget,
    test_size_widget,
    models_checkbox,
    figures_checkbox,
    run_button,
    widgets.Label("📊 Résultats des exécutions successives :"),
    output_area
]))


📂 Fichier 'cleanedData.csv' chargé automatiquement.


VBox(children=(Label(value='⬆️ Charger un fichier CSV (optionnel) :'), FileUpload(value=(), accept='.csv', des…