<a href="https://colab.research.google.com/github/LIONPANJSHIR/Machie-learning-avec-r/blob/main/discretization_tools_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Crée ce fichier dans ton projet Google Drive ou local
# contenu de discretization_tools.py :
# !pip install mapclassify

from sklearn.preprocessing import KBinsDiscretizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import mapclassify
import pandas as pd

def compare_discretisation_auc_all_vars(df, target, numeric_vars=None, bins_range=range(2, 6)):
    results = []
    best_configs = []

    if numeric_vars is None:
        numeric_vars = df.select_dtypes(include='number').drop(columns=[target]).columns.tolist()

    for var in numeric_vars:
        print(f"\n=== Variable : {var} ===")
        all_auc = []

        for q in bins_range:
            try:
                df[f"{var}_qcut"] = pd.qcut(df[var], q, duplicates='drop')
                kbd = KBinsDiscretizer(n_bins=q, encode='ordinal', strategy='kmeans')
                df[f"{var}_kmeans"] = kbd.fit_transform(df[[var]])
                tree = DecisionTreeClassifier(max_leaf_nodes=q, random_state=42)
                tree.fit(df[[var]], df[target])
                df[f"{var}_tree"] = tree.apply(df[[var]])
                jenks = mapclassify.NaturalBreaks(df[var], k=q)
                df[f"{var}_jenks"] = jenks.yb

                cols = [var, f"{var}_qcut", f"{var}_kmeans", f"{var}_tree", f"{var}_jenks"]

                for col in cols:
                    method = col.replace(f"{var}_", "") if col != var else "brute"
                    X = df[[col]]
                    if method != "brute":
                        X = pd.get_dummies(X, drop_first=True)
                    y = df[target]

                    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)
                    model = LogisticRegression(max_iter=1000)
                    model.fit(X_train, y_train)
                    y_pred = model.predict_proba(X_test)[:, 1]
                    auc = roc_auc_score(y_test, y_pred)

                    results.append({"var": var, "q": q, "method": method, "AUC": auc})
                    all_auc.append((method, q, auc))
                    print(f"[{var}] q={q if method != 'brute' else 'aucun'} | {method:<6} → AUC: {auc:.4f}")
            except Exception as e:
                print(f"⚠️ Erreur pour {var} avec q={q}: {e}")
                continue

        if all_auc:
            best_method, best_q, best_auc = max(all_auc, key=lambda x: x[2])
            best_configs.append({
                "var": var,
                "best_method": best_method,
                "best_q": "aucun" if best_method == "brute" else best_q,
                "best_auc": best_auc
            })

    return pd.DataFrame(results), pd.DataFrame(best_configs)


Collecting mapclassify
  Downloading mapclassify-2.9.0-py3-none-any.whl.metadata (3.1 kB)
Downloading mapclassify-2.9.0-py3-none-any.whl (286 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.7/286.7 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mapclassify
Successfully installed mapclassify-2.9.0
