In [1]:
import joblib

MODEL_PATH = "../backend/models/lightgbm_production_artifact_20250415_081218.pkl"

# Chargement
artifacts = joblib.load(MODEL_PATH)

# Inspection des clés principales
print("🔍 Clés disponibles dans artifacts :", artifacts.keys())

# Si metadata est bien là, on regarde ce qu'il contient
metadata = artifacts.get("metadata", {})
print("🔍 Clés disponibles dans metadata :", metadata.keys())

# On vérifie si les features sont bien définies
features = metadata.get("features", None)
if features is None:
    print("❌ 'features' est manquant dans metadata.")
else:
    print(f"✅ Nombre de features : {len(features)}")
    print("Extrait des features :", features[:5])  # Affiche les 5 premières



🔍 Clés disponibles dans artifacts : dict_keys(['scaler', 'model', 'metadata'])
🔍 Clés disponibles dans metadata : dict_keys(['optimal_threshold', 'training_date', 'features', 'dtype_example', 'training_stats', 'performance_metrics', 'training_strategy'])
✅ Nombre de features : 85
Extrait des features : ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'AMT_CREDIT', 'ANNUITY_INCOME_PERC']


In [6]:
import pickle

# Charge le fichier artifact contenant les informations
artifact_path = "../backend/models/lightgbm_production_artifact_20250415_081218.pkl"

# Charger les artifacts
with open(artifact_path, 'rb') as f:
    artifacts = pickle.load(f)

# Afficher le type de l'objet chargé pour mieux comprendre sa structure
print("🔍 Type des artifacts :", type(artifacts))

# Afficher un extrait des données pour comprendre la structure
print("🔍 Extrait des artifacts :", artifacts[:5])  # Affiche les 5 premiers éléments si c'est un tableau
print("longueur du tableau" ,len(artifacts))


🔍 Type des artifacts : <class 'numpy.ndarray'>
🔍 Extrait des artifacts : ['EXT_SOURCE_1' 'EXT_SOURCE_2' 'EXT_SOURCE_3' 'AMT_CREDIT'
 'ANNUITY_INCOME_PERC']
longueur du tableau 85


In [9]:
import pandas as pd

# Suppose que client_data est un DataFrame que tu veux prédire
TEST_SAMPLE_FOR_API = "../backend/data/test_1000_sample_for_api.csv"
TEST_SAMPLE_WITH_TARGET = "../backend/data/test_1000_sample_with_target.csv"

df_test = pd.read_csv(TEST_SAMPLE_WITH_TARGET)
df_api = pd.read_csv(TEST_SAMPLE_FOR_API)

client_id = df_test["SK_ID_CURR"]
client_data = df_api[df_api["SK_ID_CURR"] == client_id].drop(columns=["SK_ID_CURR"])

# Comparer les features attendues avec les données d'entrée
expected_features = artifacts

# Trouver les colonnes manquantes dans les données d'entrée
missing_cols = set(expected_features) - set(client_data.columns)

# Afficher les résultats
if missing_cols:
    print("⚠️ Colonnes manquantes dans les données d'entrée :", missing_cols)
else:
    print("✅ Toutes les colonnes nécessaires sont présentes dans les données.")


✅ Toutes les colonnes nécessaires sont présentes dans les données.


In [19]:
import joblib

# Charger les artefacts sauvegardés
model_path = "../backend/models/lightgbm_production_artifact_20250415_081218.pkl"
artifacts = joblib.load(model_path)

# Extraire le modèle et le scaler
model = artifacts['model']
scaler = artifacts['scaler']

# Si nécessaire, obtenir les métadonnées
metadata = artifacts['metadata']
optimal_threshold = metadata['optimal_threshold']
features = metadata['features']

# Vérifier les données d'entrée (client_data)
client_data = client_data[features]  

# Appliquer le scaler sur les données du client
client_data_scaled = scaler.transform(client_data)

# Appliquer le scaler en conservant les noms de colonnes
client_data_scaled = pd.DataFrame(
    scaler.transform(client_data),
    columns=client_data.columns,
    index=client_data.index
)

# Faire une prédiction sur les données du client
pred_proba = model.predict_proba(client_data_scaled)[:, 1]  # Probabilité de défaut

# Appliquer le seuil optimal pour la prédiction finale
pred = (pred_proba >= optimal_threshold).astype(int)

# Affichage des résultats
print(f"Probabilité de défaut : {pred_proba[0]:.2%}")
print(f"Décision : {'🛑 Refusé' if pred[0] == 1 else '✅ Accepté'}")


Probabilité de défaut : 3.91%
Décision : ✅ Accepté


In [16]:
import pickle
import lightgbm as lgb

# Charge les artifacts depuis le fichier
artifact_path = "../backend/models/lightgbm_production_artifact_20250415_081218.pkl"

# Charger les artifacts
with open(artifact_path, 'rb') as f:
    artifacts = pickle.load(f)

# Vérifier la structure des artifacts
print("🔍 Type des artifacts :", type(artifacts))
print("🔍 Forme des artifacts :", artifacts.shape)  # Vérifie si c'est un tableau NumPy et sa taille

# Afficher un extrait pour mieux comprendre la structure
print("🔍 Extrait des artifacts :", artifacts[:5])  # Affiche les 5 premiers éléments si c'est un tableau


🔍 Type des artifacts : <class 'numpy.ndarray'>
🔍 Forme des artifacts : (85,)
🔍 Extrait des artifacts : ['EXT_SOURCE_1' 'EXT_SOURCE_2' 'EXT_SOURCE_3' 'AMT_CREDIT'
 'ANNUITY_INCOME_PERC']


In [20]:
import requests
import pandas as pd
import random
import json

# URL de ton API (modifie selon ton environnement)
API_URL = "http://localhost:8000/predict"

# Chargement des données test
df_test = pd.read_csv("../backend/data/test_1000_sample_for_api.csv")

# Sélection aléatoire d'un client
random_client = df_test.sample(n=1)  # Sélectionne un seul client au hasard
client_data = random_client.drop(columns=["SK_ID_CURR"]).to_dict(orient="records")[0]  # Extrait les données du client sous forme de dictionnaire

# Affichage des données du client sélectionné
print("Données du client sélectionné :")
print(client_data)

# Construction du corps de la requête
data = {
    "data": client_data  # Les données du client sélectionné
}

# Envoi de la requête à l'API
response = requests.post(API_URL, json=data)

# Affichage de la réponse de l'API
if response.status_code == 200:
    result = response.json()
    print("Réponse de l'API :")
    print(json.dumps(result, indent=4))
else:
    print(f"Erreur {response.status_code} : {response.text}")



Données du client sélectionné :
{'EXT_SOURCE_1': 0.5074440332442849, 'EXT_SOURCE_2': 0.7109608784019691, 'EXT_SOURCE_3': 0.7267112092725122, 'AMT_CREDIT': 1113840.0, 'ANNUITY_INCOME_PERC': 0.4222333333333333, 'BURO_DAYS_CREDIT_ENDDATE_MEAN': -65.0, 'BURO_DAYS_CREDIT_UPDATE_MEAN': -72.0, 'DAYS_BIRTH': -19274.0, 'DAYS_ID_PUBLISH': -2805.0, 'FLAG_EMP_PHONE': 1.0, 'NAME_INCOME_TYPE_PENSIONER': 0.0, 'BURO_AMT_CREDIT_SUM_MAX': 485640.0, 'BURO_CREDIT_ACTIVE_CLOSED_MEAN': 1.0, 'BURO_CREDIT_TYPE_CAR_LOAN_MEAN': 0.0, 'BURO_CREDIT_TYPE_CREDIT_CARD_MEAN': 0.0, 'BURO_CREDIT_TYPE_MICROLOAN_MEAN': 0.0, 'BURO_CREDIT_TYPE_MORTGAGE_MEAN': 0.0, 'BURO_DAYS_CREDIT_ENDDATE_MAX': -65.0, 'BURO_DAYS_CREDIT_MEAN': -1161.0, 'CODE_GENDER': 1.0, 'DAYS_EMPLOYED_PERC': 0.2647089343156584, 'DAYS_LAST_PHONE_CHANGE': -802.0, 'DAYS_REGISTRATION': -9316.0, 'DEF_60_CNT_SOCIAL_CIRCLE': 0.0, 'FLAG_DOCUMENT_3': 1.0, 'FLAG_DOCUMENT_6': 0.0, 'NAME_HOUSING_TYPE_RENTED_APARTMENT': 0.0, 'OCCUPATION_TYPE_ACCOUNTANTS': 0.0, 'ORGANI

In [21]:

import requests
import json

API_URL = "http://localhost:8000/predict"

{
  "data": {
    "EXT_SOURCE_1": 0.5074440332442849,
    "EXT_SOURCE_2": 0.7109608784019691,
    "EXT_SOURCE_3": 0.7267112092725122,
    "AMT_CREDIT": 1113840.0,
    "ANNUITY_INCOME_PERC": 0.4222333333333333,
    "BURO_DAYS_CREDIT_ENDDATE_MEAN": -65.0,
    "BURO_DAYS_CREDIT_UPDATE_MEAN": -72.0,
    "DAYS_BIRTH": -19274.0,
    "DAYS_ID_PUBLISH": -2805.0,
    "FLAG_EMP_PHONE": 1.0,
    "NAME_INCOME_TYPE_PENSIONER": 0.0,
    "BURO_AMT_CREDIT_SUM_MAX": 485640.0,
    "BURO_CREDIT_ACTIVE_CLOSED_MEAN": 1.0,
    "BURO_CREDIT_TYPE_CAR_LOAN_MEAN": 0.0,
    "BURO_CREDIT_TYPE_CREDIT_CARD_MEAN": 0.0,
    "BURO_CREDIT_TYPE_MICROLOAN_MEAN": 0.0,
    "BURO_CREDIT_TYPE_MORTGAGE_MEAN": 0.0,
    "BURO_DAYS_CREDIT_ENDDATE_MAX": -65.0,
    "BURO_DAYS_CREDIT_MEAN": -1161.0,
    "CODE_GENDER": 1.0,
    "DAYS_EMPLOYED_PERC": 0.2647089343156584,
    "DAYS_LAST_PHONE_CHANGE": -802.0,
    "DAYS_REGISTRATION": -9316.0,
    "DEF_60_CNT_SOCIAL_CIRCLE": 0.0,
    "FLAG_DOCUMENT_3": 1.0,
    "FLAG_DOCUMENT_6": 0.0,
    "NAME_HOUSING_TYPE_RENTED_APARTMENT": 0.0,
    "OCCUPATION_TYPE_ACCOUNTANTS": 0.0,
    "ORGANIZATION_TYPE_BUSINESS_ENTITY_TYPE_3": 1.0,
    "ORGANIZATION_TYPE_MILITARY": 0.0,
    "ORGANIZATION_TYPE_SELF_EMPLOYED": 0.0,
    "PAYMENT_RATE": 0.0511756625727213,
    "REGION_POPULATION_RELATIVE": 0.04622,
    "REGION_RATING_CLIENT_W_CITY": 1.0,
    "REG_CITY_NOT_LIVE_CITY": 0.0,
    "ACTIVE_DAYS_CREDIT_ENDDATE_MEAN": 716.0,
    "AMT_REQ_CREDIT_BUREAU_QRT": 0.0,
    "APPROVED_AMT_ANNUITY_MEAN": 12188.835,
    "APPROVED_DAYS_DECISION_MIN": -802.0,
    "APPROVED_HOUR_APPR_PROCESS_START_MEAN": 14.666666666666666,
    "BURO_AMT_CREDIT_MAX_OVERDUE_MEAN": 0.0,
    "BURO_AMT_CREDIT_SUM_DEBT_SUM": 0.0,
    "BURO_CREDIT_TYPE_ANOTHER_TYPE_OF_LOAN_MEAN": 0.0,
    "BURO_CREDIT_TYPE_LOAN_FOR_BUSINESS_DEVELOPMENT_MEAN": 0.0,
    "BURO_CREDIT_TYPE_LOAN_FOR_THE_PURCHASE_OF_EQUIPMENT_MEAN": 0.0,
    "BURO_DAYS_CREDIT_MAX": -1161.0,
    "BURO_DAYS_CREDIT_VAR": 386627.0666666667,
    "CLOSED_AMT_CREDIT_SUM_SUM": 485640.0,
    "ELEVATORS_AVG": 0.0,
    "FLAG_DOCUMENT_13": 0.0,
    "FLAG_DOCUMENT_15": 0.0,
    "FLAG_DOCUMENT_16": 0.0,
    "FLAG_DOCUMENT_17": 0.0,
    "FLAG_DOCUMENT_20": 0.0,
    "FLAG_DOCUMENT_21": 0.0,
    "FLAG_OWN_CAR": 0.0,
    "FLAG_WORK_PHONE": 0.0,
    "FLOORSMAX_MODE": 0.1667,
    "INCOME_CREDIT_PERC": 0.12120232708468,
    "INCOME_PER_PERSON": 67500.0,
    "INSTAL_AMT_PAYMENT_MAX": 16267.815,
    "INSTAL_AMT_PAYMENT_MEAN": 13673.899285714286,
    "INSTAL_AMT_PAYMENT_MIN": 547.2,
    "INSTAL_AMT_PAYMENT_SUM": 382869.18,
    "INSTAL_DAYS_ENTRY_PAYMENT_MAX": -36.0,
    "INSTAL_DAYS_ENTRY_PAYMENT_SUM": -10352.0,
    "INSTAL_DBD_MAX": 29.0,
    "INSTAL_DBD_MEAN": 4.928571428571429,
    "INSTAL_DBD_SUM": 138.0,
    "INSTAL_DPD_MEAN": 0.4285714285714285,
    "INSTAL_PAYMENT_DIFF_MEAN": 514.0446428571429,
    "INSTAL_PAYMENT_DIFF_SUM": 14393.25,
    "LIVE_CITY_NOT_WORK_CITY": 0.0,
    "LIVINGAREA_MODE": 0.0731,
    "NAME_EDUCATION_TYPE_ACADEMIC_DEGREE": 0.0,
    "NAME_EDUCATION_TYPE_SECONDARY__SECONDARY_SPECIAL": 1.0,
    "NAME_FAMILY_STATUS_MARRIED": 1.0,
    "NAME_HOUSING_TYPE_CO_OP_APARTMENT": 0.0,
    "NAME_HOUSING_TYPE_MUNICIPAL_APARTMENT": 0.0,
    "NAME_INCOME_TYPE_STUDENT": 0.0,
    "NAME_INCOME_TYPE_WORKING": 0.0,
    "NAME_TYPE_SUITE_OTHER_B": 0.0,
    "OCCUPATION_TYPE_DRIVERS": 0.0,
    "OCCUPATION_TYPE_LABORERS": 1.0,
    "OCCUPATION_TYPE_LOW_SKILL_LABORERS": 0.0
  }
}

response = requests.post(API_URL, json=data)
print(response.status_code)
print(response.json())


200
{'probability': 2.69, 'decision': '✅ Accepté'}


In [None]:
import streamlit as st
import pandas as pd
import requests
import shap
import matplotlib.pyplot as plt
import warnings
import joblib
import config
from risk_gauge import show_risk_gauge, display_risk_message, animate_risk_gauge
import numpy as np
from st_aggrid import AgGrid

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

API_URL = "http://localhost:8000"
ARTIFACT_PATH = "../backend/models/lightgbm_production_artifact_20250415_081218.pkl"
THRESHOLD = 0.0931515  # Seuil de risque

st.set_page_config(layout="wide")
st.title("🏦 Dashboard Crédit - Prédictions & Explicabilité")

# ===== Chargement des données =====
@st.cache_data
def load_test_data():
    return pd.read_csv("../backend/data/test_2000_sample_for_api.csv")

@st.cache_resource
def load_model_artifacts():
    artifacts = joblib.load(ARTIFACT_PATH)
    model = artifacts['model']
    scaler = artifacts['scaler']
    features = artifacts['metadata']['features']
    explainer = shap.TreeExplainer(model)
    
    # Précalcul des SHAP values globales (échantillonné pour plus de rapidité)
    df_test_sample = df_test[features].sample(min(1000, len(df_test)), random_state=42)
    df_test_sample_scaled = scaler.transform(df_test_sample)
    global_shap_values = explainer.shap_values(df_test_sample_scaled)
    
    return model, scaler, features, explainer, global_shap_values, df_test_sample

df_test = load_test_data()
model, scaler, features, explainer, global_shap_values, df_test_sample = load_model_artifacts()
#print(f"Nombre de clients dans l'échantillon global : {len(df_test_sample)}")
#print(f"Shape des SHAP values globales : {global_shap_values.shape}")

client_ids = df_test["SK_ID_CURR"].unique().astype(int)

# ===== Sidebar =====
st.sidebar.markdown("## 🔍 Analyse d'un client")
selected_id = st.sidebar.selectbox("Sélectionner un client", client_ids)
submitted = st.sidebar.button("Soumettre la prédiction")

# Gestion de la checkbox SHAP via session state
st.session_state.show_shap = st.sidebar.checkbox(
    "Afficher l'explication SHAP",
    value=st.session_state.get("show_shap", False)  # Utiliser .get() avec valeur par défaut
)

# === Initialisation de session state ===
required_states = {
    "predicted": False,
    "client_data": None,
    "score_float": None,
    "previous_id": None,
    "client_row": None,
    "show_shap": False  # <-- Ajouter cette ligne
}

for key, value in required_states.items():
    if key not in st.session_state:
        st.session_state[key] = value

# ===== Réinitialisation lors du changement d'ID =====
if st.session_state.previous_id != selected_id:
    for key in ["predicted", "client_data", "score_float", "show_shap"]:
        st.session_state[key] = required_states[key]
    # Réinitialiser l'état d'animation
    if 'current_animated_id' in st.session_state:
        del st.session_state.current_animated_id
    st.session_state.previous_id = selected_id


# ===== Soumission prédiction =====
if submitted:
    client_row = df_test[df_test["SK_ID_CURR"] == selected_id]
    
    if not client_row.empty:
        try:
            client_data = client_row.drop(columns=["SK_ID_CURR"]).to_dict(orient="records")[0]
            response = requests.post(f"{API_URL}/predict", json={"data": client_data})
            response.raise_for_status()
            result = response.json()
            
            # Mise à jour session state
            st.session_state.update({
                "score_float": float(result['probability']) / 100,
                "client_data": client_data,
                "client_row": client_row,
                "predicted": True
            })
            
        except requests.exceptions.RequestException as e:
            st.error(f"Erreur lors de la prédiction : {e}")
            st.session_state.predicted = False
    else:
        st.error("Client introuvable dans les données")
        st.session_state.predicted = False

# ===== Affichage des résultats =====
col_left, col_right = st.columns([1, 1])

# Colonne gauche - Toujours visible
with col_left:
    st.subheader("📋 Infos Client")
    
    # Fonctions de formatage
    # Fonctions de formatage
    def safe_get(row, col, default="N/A"):
        return row[col] if col in row and not pd.isna(row[col]) else default

    def format_currency(value):
        try:
            return f"{float(value):,.0f} €"
        except:
            return "N/A"

    def format_percentage(value):
        try:
            return f"{float(value)*100:.1f} %"
        except:
            return "N/A"

    def format_gender(value):
        return {1: "Homme", 0: "Femme"}.get(value, "Inconnu")

    def format_years(value):
        try:
            return f"{-int(value)//365} ans"
        except:
            return "N/A"

    # Sélection d'une ligne par ID
    row = df_test[df_test["SK_ID_CURR"] == selected_id].iloc[0]

    # Dictionnaire des infos formatées
    infos = {
        "ID Client": int(row["SK_ID_CURR"]),
        "Âge": format_years(safe_get(row, "DAYS_BIRTH")),
        "Genre": format_gender(safe_get(row, "CODE_GENDER")),
        "Charge crédit": format_percentage(safe_get(row, "INCOME_CREDIT_PERC")),
        "Historique crédit": format_years(safe_get(row, "BURO_DAYS_CREDIT_MEAN"))
    }

    # Construction du DataFrame pour affichage
    df_infos = pd.DataFrame(list(infos.items()), columns=["Libellé", "Valeur"])
    df_infos["Valeur"] = df_infos["Valeur"].astype(str)  # 🔥 force explicite en string
    #st.dataframe(df_infos)
    AgGrid(df_infos, height=200, fit_columns_on_grid_load=True)

    # --- Analyse SHAP Globale ---
    # --- Analyse SHAP Globale ---
    if st.session_state.predicted and st.session_state.show_shap:
        st.markdown("---")
        st.subheader("Analyse Globale")
        with st.spinner("Calcul des tendances globales..."):
            try:
                # Utilisation directe du summary plot
                fig_global = plt.figure(figsize=(10, 6))
                shap.summary_plot(
                global_shap_values,
                df_test_sample[features],
                plot_type="bar",
                max_display=10,
                show=False
            )
                plt.title("Top 10 - Impact Global des Variables", pad=20)
                st.pyplot(fig_global)
                plt.close()
                
            except Exception as e:
                st.error(f"Erreur analyse globale : {str(e)}")
# Colonne droite - Résultats prédiction
with col_right:
    #st.subheader("Analyse du risque client")

    if st.session_state.predicted:
        try:
            # --- 1. Gestion de l'animation ---
            # Réinitialiser l'état d'animation pour chaque nouvel ID
            if 'current_animated_id' not in st.session_state:
                st.session_state.current_animated_id = None
            
            if st.session_state.current_animated_id != selected_id:
                animate_risk_gauge(
                    score=st.session_state.score_float,
                    client_id=selected_id
                )
                st.session_state.current_animated_id = selected_id
            else:
                # Affichage statique si même client
                show_risk_gauge(
                    score=st.session_state.score_float, 
                    client_id=selected_id
                )

            # --- 2. Message d'alerte ---
            display_risk_message(
                score=st.session_state.score_float,
                threshold=THRESHOLD
            )
            # --- 3. Explications SHAP indépendantes ---
            if st.session_state.show_shap:
                st.markdown("---")
                with st.spinner("Génération des explications SHAP..."):
                    X = pd.DataFrame([st.session_state.client_data])[features]
                    X_scaled = pd.DataFrame(
                        scaler.transform(X),
                        columns=features,
                        index=X.index
                    )

                    #shap_values = explainer(X_scaled)
                    #fig, ax = plt.subplots(figsize=(10, 6))
                    #shap.plots.bar(shap_values[0], max_display=10, show=False)
                    #st.pyplot(fig)

                    shap_values = explainer(X_scaled)
                    fig, ax = plt.subplots(figsize=(10, 6))
                    shap.plots.waterfall(shap_values[0], max_display=10, show=False)
                    st.pyplot(fig)

        except Exception as e:
            st.error(f"Erreur d'affichage : {str(e)}")
    else:
        show_risk_gauge(None, client_id=selected_id)