In [None]:
# update_readme.py
# -----------------------------------------------------------------------------
# Génère/actualise README.md à partir de reports/metrics.json
# - Gère 2 schémas :
#   1) {"train": {...}}  -> résumé train
#   2) {"ModA": {...}, "ModB": {...}} -> tableau comparatif des modèles (TEST)
# -----------------------------------------------------------------------------

import json
import os
from datetime import datetime
from pathlib import Path

METRICS_PATH = Path("reports/metrics.json")
FIG_DIR = Path("reports/figures")
README_PATH = Path("README.md")

def _fmt(x, nd=3):
    try:
        return f"{float(x):.{nd}f}"
    except Exception:
        return str(x)

def _load_metrics():
    if not METRICS_PATH.exists():
        raise FileNotFoundError(f"Fichier introuvable: {METRICS_PATH}")
    with open(METRICS_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data

def _detect_schema(data: dict):
    """
    Retourne "train" si data contient la clé 'train',
    sinon "models" si c'est un mapping {model -> metrics}.
    """
    if isinstance(data, dict) and "train" in data and isinstance(data["train"], dict):
        return "train"
    # Heuristique: toutes les valeurs sont des dicts avec au moins pr_auc/roc_auc
    if isinstance(data, dict) and data:
        vals = list(data.values())
        if all(isinstance(v, dict) for v in vals):
            return "models"
    return "unknown"

def _render_train_section(train: dict) -> str:
    rows = []
    rows.append("| Métrique | Valeur |")
    rows.append("|---|---|")
    rows.append(f"| PR-AUC | {_fmt(train.get('pr_auc'))} |")
    rows.append(f"| ROC-AUC | {_fmt(train.get('roc_auc'))} |")
    rows.append(f"| Precision (classe 1) | {_fmt(train.get('precision'))} |")
    rows.append(f"| Recall (classe 1) | {_fmt(train.get('recall'))} |")
    rows.append(f"| F1 (classe 1) | {_fmt(train.get('f1'))} |")
    thr = train.get("threshold", None)
    if thr is not None:
        rows.append(f"| Seuil optimal (Fβ=2) | {_fmt(thr)} |")
    md = "\n".join(rows)
    return f"""## 📊 Résultats (train)

*(données extraites de `reports/metrics.json`)*

{md}

> Le seuil est calibré par **Fβ=2** (priorise le rappel).
"""

def _render_models_table(models: dict) -> str:
    # Trie par PR-AUC desc puis Recall desc
    items = []
    for name, m in models.items():
        items.append({
            "model": name,
            "pr_auc": m.get("pr_auc"),
            "roc_auc": m.get("roc_auc"),
            "precision": m.get("precision"),
            "recall": m.get("recall"),
            "f1": m.get("f1"),
            "threshold": m.get("threshold"),
        })
    items = sorted(items, key=lambda d: (d.get("pr_auc") or 0.0, d.get("recall") or 0.0), reverse=True)

    # Détermine le meilleur (PR-AUC max)
    best = items[0] if items else None
    best_line = ""
    if best:
        best_line = (
            f"> 🏅 **Meilleur modèle (PR-AUC)** : **{best['model']}**  \n"
            f"> Scores — PR-AUC: {_fmt(best['pr_auc'])} | ROC-AUC: {_fmt(best['roc_auc'])} | "
            f"Precision: {_fmt(best['precision'])} | Recall: {_fmt(best['recall'])} | F1: {_fmt(best['f1'])}"
        )

    # Tableau Markdown
    rows = []
    rows.append("| Modèle | PR-AUC | ROC-AUC | Precision | Recall | F1 | Seuil |")
    rows.append("|---|---:|---:|---:|---:|---:|---:|")
    for d in items:
        rows.append(
            f"| {d['model']} | {_fmt(d['pr_auc'])} | {_fmt(d['roc_auc'])} | "
            f"{_fmt(d['precision'])} | {_fmt(d['recall'])} | {_fmt(d['f1'])} | {_fmt(d['threshold'])} |"
        )
    table = "\n".join(rows)

    return f"""## 🧠 Comparaison des modèles (test)

*(données extraites de `reports/metrics.json`)*

{table}

{best_line}
"""

def _figures_section() -> str:
    figs = []
    if FIG_DIR.exists():
        for f in sorted(FIG_DIR.glob("*.png")):
            figs.append(f"- `{f.as_posix()}`")
    flist = "\n".join(figs) if figs else "_(Aucune figure trouvée dans `reports/figures/`)_"
    return f"""## 📈 Visualisations

Les figures exportées (si présentes) :

{flist}
"""

def _project_header() -> str:
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    return f"""# 💳 Détection de Fraude — Projet

> **Dernière mise à jour automatique :** {ts}

Ce projet détecte des transactions bancaires frauduleuses à partir du dataset public
**Credit Card Fraud Detection (Kaggle)**.
"""

def _project_structure() -> str:
    return """## 📂 Structure

In [None]:
def _tech_section() -> str:
    return """## 🧩 Stack / Outils

- Python (pandas, numpy, scikit-learn, imbalanced-learn, xgboost)
- Métriques : **PR-AUC**, **ROC-AUC**, **Recall**, **Precision**, **F1**
- Équilibrage : **SMOTE**
- Optimisation : **GridSearchCV** (CV stratifiée, scoring = *average_precision*)
"""

def _footer() -> str:
    return """---

*Ce README est généré automatiquement à partir de `reports/metrics.json`.*
"""

def main():
    data = _load_metrics()
    schema = _detect_schema(data)

    md = [_project_header(), _project_structure(), _tech_section()]

    if schema == "train":
        md.append(_render_train_section(data["train"]))
    elif schema == "models":
        md.append(_render_models_table(data))
    else:
        md.append("⚠️ `reports/metrics.json` détecté mais schéma non reconnu. Attendu: `{'train': {...}}` ou `{model: metrics}`.")

    md.append(_figures_section())
    md.append(_footer())

    README_PATH.write_text("\n\n".join(md), encoding="utf-8")
    print(f"[OK] README mis à jour -> {README_PATH.resolve()}")

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print("[ERREUR]", e)