In [6]:
from pathlib import Path
import shutil, json, re

ROOT    = Path.cwd() if (Path.cwd()/ "data").exists() else Path.cwd().parent
MODELS  = ROOT / "models"
REPORTS = ROOT / "reports"
ARTIFACT= ROOT / "artifacts"

def _read_json(p: Path):
    return json.loads(p.read_text(encoding="utf-8"))

def freeze_artifact(tag_alias=("lgbm","lightgbm"), dst_dir: str | None = None) -> Path:
    """
    1) Cherche le dernier model_meta*.json dont meta['model_tag'] ∈ tag_alias (case-insensitive).
       Sinon fallback: dernier meta trouvé.
    2) Récupère 'stamp' depuis meta['version'] ou depuis le nom du méta (AAAAmmjj-HHMMSS).
    3) Cherche le .pkl qui contient ce stamp ; fallback: dernier .pkl contenant un alias.
    4) Copie modèle, méta, rapport & images dans artifacts/prod_{stamp}/
    """
    tag_set = {t.lower() for t in tag_alias}

    meta_files = sorted(MODELS.glob("model_meta*.json"))
    if not meta_files:
        raise FileNotFoundError(f"Aucun meta dans {MODELS}")

    # 1) pick meta
    meta_path = None
    meta = None
    for p in reversed(meta_files):
        try:
            m = _read_json(p)
            mt = str(m.get("model_tag","")).lower()
            if mt in tag_set:
                meta_path, meta = p, m
                break
        except Exception:
            continue
    if meta_path is None:
        # fallback: tout dernier meta
        meta_path = meta_files[-1]
        meta = _read_json(meta_path)

    # 2) stamp
    stamp = meta.get("version")
    if not stamp:
        m = re.search(r"(\d{8}-\d{6})", meta_path.stem)
        if not m:
            raise ValueError(f"Impossible d'inférer le stamp depuis {meta_path}")
        stamp = m.group(1)

    # 3) model pkl
    pkl_candidates = [q for q in MODELS.glob("*.pkl") if stamp in q.stem]
    if not pkl_candidates:
        # fallback: dernier .pkl qui contient un alias dans le nom
        pkl_candidates = [q for q in MODELS.glob("*.pkl")
                          if any(a in q.stem.lower() for a in tag_set)]
    if not pkl_candidates:
        raise FileNotFoundError(f"Aucun .pkl correspondant (stamp={stamp}, alias={tag_alias})")
    model_path = sorted(pkl_candidates)[-1]

    # 4) copies
    report_md = REPORTS / f"model_report_{stamp}.md"
    fi_png    = REPORTS / "feature_importance.png"
    cm_png    = REPORTS / "confusion_matrix.png"

    out = ARTIFACT / (dst_dir or f"prod_{stamp}")
    out.mkdir(parents=True, exist_ok=True)

    shutil.copy2(model_path, out / "model.pkl")
    shutil.copy2(meta_path,  out / "model_meta.json")
    if report_md.exists(): shutil.copy2(report_md, out / "model_report.md")
    if fi_png.exists():    shutil.copy2(fi_png, out / "feature_importance.png")
    if cm_png.exists():    shutil.copy2(cm_png, out / "confusion_matrix.png")

    # requirements mini
    (out/"requirements.txt").write_text(
        "lightgbm\nscikit-learn\npandas\nnumpy\njoblib\nfastapi\nuvicorn\ntabulate\n",
        encoding="utf-8"
    )

    # README simple
    readme = f"""# Artefact modèle (figé)
- modèle : {meta.get('model_tag','?')} ({stamp})
- seuil (t*) : {meta.get('threshold',0):.3f}
- coûts : FP={meta.get('cost_fp','?')}×  |  FN={meta.get('cost_fn','?')}×
- features ({len(meta.get('features',[]))}) : {', '.join(meta.get('features',[]))}

## Lancer l'API
pip install -r requirements.txt
uvicorn api:app --host 0.0.0.0 --port 8000
"""
    (out/"README.md").write_text(readme, encoding="utf-8")

    print("✔ meta :", meta_path.name, "| model_tag:", meta.get("model_tag"))
    print("✔ model:", model_path.name)
    print(f"✅ artefact figé dans: {out}")
    return out


In [7]:
ART_DIR = freeze_artifact(("lgbm","lightgbm"))
ART_DIR

✔ meta : model_meta_20250905-153538.json | model_tag: None
✔ model: lgbm_isotonic_20250905-153538.pkl
✅ artefact figé dans: c:\Users\FFix\Desktop\Formation Data Science\Projects\OpenClassroom\Project 07\projet7_scoring\artifacts\prod_20250905-153538


WindowsPath('c:/Users/FFix/Desktop/Formation Data Science/Projects/OpenClassroom/Project 07/projet7_scoring/artifacts/prod_20250905-153538')

In [9]:
# 04_packaging — Cell 2 : écrire serve.py dans l'artefact
code = r'''
import json, joblib
import pandas as pd
from pathlib import Path

def load_artifact(art_dir: str | Path):
    art_dir = Path(art_dir)
    model = joblib.load(art_dir/"model.pkl")
    meta  = json.loads((art_dir/"model_meta.json").read_text(encoding="utf-8"))
    return model, meta

def _ensure_features(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
    # garde l'ordre exact, crée les colonnes manquantes à NaN (imputer du pipeline gère)
    X = df.reindex(columns=features)
    return X

def predict_df(df: pd.DataFrame, model, meta, return_proba=True) -> pd.DataFrame:
    cols = meta["features"]
    t = float(meta["threshold"])
    X = _ensure_features(df, cols)
    proba = model.predict_proba(X)[:, 1]
    yhat = (proba >= t).astype(int)
    out = df.copy()
    out["proba"] = proba
    out["risk"]  = yhat
    return out if return_proba else yhat

def predict_records(records: list[dict], model, meta):
    df = pd.DataFrame.from_records(records)
    out = predict_df(df, model, meta)
    return out[["proba","risk"]].to_dict(orient="records")
'''
(ART_DIR/"serve.py").write_text(code, encoding="utf-8")
print("✅ serve.py écrit →", ART_DIR/"serve.py")


✅ serve.py écrit → c:\Users\FFix\Desktop\Formation Data Science\Projects\OpenClassroom\Project 07\projet7_scoring\artifacts\prod_20250905-153538\serve.py


In [11]:
import sys, importlib
from pathlib import Path

# ART_DIR doit pointer vers ton dossier artifacts/prod_...
print("ART_DIR =", ART_DIR)
assert (ART_DIR / "serve.py").exists(), "serve.py n'a pas été généré dans ART_DIR"

p = str(ART_DIR)
if p not in sys.path:
    sys.path.insert(0, p)

# (re)load propre si déjà importé
if "serve" in sys.modules:
    importlib.reload(sys.modules["serve"])

from serve import load_artifact, predict_df

import pandas as pd

model, meta = load_artifact(ART_DIR)
df = pd.read_csv(PROC / "train_features_v1.csv", nrows=5)
res = predict_df(df, model, meta)
res.head()


ART_DIR = c:\Users\FFix\Desktop\Formation Data Science\Projects\OpenClassroom\Project 07\projet7_scoring\artifacts\prod_20250905-153538




Unnamed: 0,SK_ID_CURR,TARGET,n_bureau,n_bureau_months,n_prev,n_inst_pay,n_pos_cash,n_ccb,credit_income_perc,annuity_income_perc,...,employed_years,ext1,ext2,ext3,cnt_children,cnt_family_members,n_prev_apps,is_new_to_credit,proba,risk
0,100002,1,8.0,110.0,1.0,19.0,19.0,0.0,2.007889,0.121978,...,1.744011,0.083037,0.262949,0.139376,0,1.0,0,0,0.407106,1
1,100003,0,4.0,0.0,3.0,25.0,28.0,0.0,4.79075,0.132217,...,3.252567,0.311267,0.622246,,0,2.0,0,0,0.036509,0
2,100004,0,2.0,0.0,1.0,3.0,4.0,0.0,2.0,0.1,...,0.616016,,0.555912,0.729567,0,1.0,0,0,0.040064,0
3,100006,0,0.0,0.0,9.0,16.0,21.0,6.0,2.316167,0.2199,...,8.320329,,0.650442,,0,2.0,0,1,0.061732,0
4,100007,0,1.0,0.0,6.0,66.0,66.0,0.0,4.222222,0.179963,...,8.317591,,0.322738,,0,1.0,0,0,0.05786,0


In [12]:
# 04_packaging — Cell 4 : écrire api.py (FastAPI) dans l'artefact
api_code = r'''
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
from pathlib import Path

from serve import load_artifact, predict_records

ART_DIR = Path(__file__).resolve().parent
model, meta = load_artifact(ART_DIR)

app = FastAPI(title="Scoring API", version=meta["version"])

class Payload(BaseModel):
    records: List[dict]

@app.get("/health")
def health():
    return {"ok": True, "model": meta["model_tag"], "version": meta["version"]}

@app.get("/meta")
def get_meta():
    m = meta.copy()
    m["n_features"] = len(m["features"])
    return m

@app.post("/predict")
def predict(payload: Payload):
    try:
        out = predict_records(payload.records, model, meta)
        return {"ok": True, "n": len(out), "predictions": out}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))
'''
(ART_DIR/"api.py").write_text(api_code, encoding="utf-8")
print("✅ api.py écrit →", ART_DIR/"api.py")


✅ api.py écrit → c:\Users\FFix\Desktop\Formation Data Science\Projects\OpenClassroom\Project 07\projet7_scoring\artifacts\prod_20250905-153538\api.py


In [20]:
import json, numpy as np, pandas as pd, requests
from pathlib import Path

ROOT    = Path.cwd() if (Path.cwd()/ "data").exists() else Path.cwd().parent
MODELS  = ROOT / "models"
REPORTS = ROOT / "reports"
DATA = ROOT / "data"
PROC = DATA / "processed"
ARTIFACT= ROOT / "artifacts"
ART_DIR= ARTIFACT / "prod_20250905-153538"


# 1) colonnes attendues par le modèle
meta = json.loads((ART_DIR/"model_meta.json").read_text(encoding="utf-8"))
cols = meta["features"]

# 2) on prend 2 lignes de démo + NaN -> null
df = pd.read_csv(PROC/"train_features_v1.csv", nrows=2)[cols]
payload = {"records": json.loads(df.replace({np.nan: None}).to_json(orient="records"))}

# 3) appel API
r = requests.post("http://127.0.0.1:8000/predict", json=payload, timeout=10)
print(r.status_code, r.reason)
print(r.json())


200 OK
{'ok': True, 'n': 2, 'predictions': [{'proba': 0.40710644243942645, 'risk': 1}, {'proba': 0.03650872058411355, 'risk': 0}]}
