# Credit Risk Modeling (Mise à jour)
Ce notebook explore les données de risque de crédit et entraîne un modèle de scoring conforme à la dernière version du projet.

In [None]:
# Imports et configuration
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import sys, os
sys.path.append(os.path.abspath('../src'))

In [None]:
from model_training import load_data, build_full_pipeline, train_model, evaluate_model, save_pipeline

## Chargement des données

In [None]:
data = load_data('data/generated_credit_data.csv')
print('Shape:', data.shape)
data.head()

## Analyse exploratoire rapide

In [None]:
print(data.info())
print(data.describe())
sns.heatmap(data.isnull(), yticklabels=False, cbar=True, cmap='viridis')
plt.title('Valeurs manquantes')
plt.show()

## Découpage train/test et pipeline

In [None]:
X = data.drop('default', axis=1)
y = data['default']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Choix de l'imputation : 'median' ou 'knn'
full_pipeline = build_full_pipeline(imputer_type='knn')

## Entraînement du modèle (RandomizedSearchCV inclus dans train_model)

In [None]:
trained_pipeline = train_model(full_pipeline, X_train, y_train)

## Évaluation sur l'ensemble de test

In [None]:
metrics = evaluate_model(trained_pipeline, X_test, y_test)

## Sauvegarde du pipeline

In [None]:
# Save the pipeline
save_pipeline(pipeline, 'models/credit_risk_pipeline.pkl')
print("Pipeline saved successfully!")