<a href="https://colab.research.google.com/github/Sela80/s/blob/main/Donn%C3%A9es_de_pr%C3%A9vision_d'approbation_de_pr%C3%AAt_financier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#À propos de l'ensemble de données
La société de financement propose tous types de prêts. Le client demande d'abord un prêt immobilier après que la société a validé son éligibilité.

L'entreprise souhaite automatiser le processus d'admissibilité au prêt (en temps réel) en fonction des informations fournies par le client lors du remplissage du formulaire de demande en ligne. Ces informations sont : sexe, état civil, niveau d'études, nombre de personnes à charge, revenus, montant du prêt, historique de crédit, etc. Pour automatiser ce processus, elle a défini un problème permettant d'identifier les segments de clientèle éligibles aux prêts afin de cibler spécifiquement ces clients. Elle fournit ici un ensemble de données partiel.

-Notre Mission serait de construire un modèle intelligent capable de prédire si un client est éligible à un prêt auprès de l'entreprise financière.

1. Importation des librairies nécessaires

In [None]:
!pip install opendatasets --upgrade --quiet
!pip install category_encoders --quiet
!pip install nicegui --quiet
!pip install scikit-learn --upgrade

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from category_encoders import CatBoostEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix, RocCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import ipywidgets as widgets
from xgboost import XGBClassifier
from IPython.display import display

In [None]:
# Importation d'une Base de données Depuis Kaggle
import opendatasets as od
od.download('https://www.kaggle.com/datasets/krishnaraj30/finance-loan-approval-prediction-data')

In [None]:
# Chargement de notre Dataset
df1=pd.read_csv('/content/finance-loan-approval-prediction-data/train.csv')

In [None]:
df1.head()

In [None]:
df1.info()

In [None]:
df1.isna().sum()

In [None]:
# Handle missing values
for col in df1.columns:
    if df1[col].isnull().any():
                # Categorical column
        if df1[col].dtype == 'object':
            df1[col].fillna(df1[col].mode()[0], inplace=True)
        else:  # Numerical column
            df1[col].fillna(df1[col].median(), inplace=True)

print("Missing values after handling:")
print(df1.isnull().sum())

In [None]:
df1.duplicated().sum()

In [None]:
df1.describe()

2. Visualisation

In [None]:
num_col = df1.select_dtypes(include=['int64', 'float64']).columns
cat_col = df1.select_dtypes(include=['object']).columns

In [None]:
# 5) Numeric correlations heatmap (top features)
corr = df1[num_col].corr()
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=False, cmap='coolwarm', center=0)
plt.title('Corrélations entre variables numériques')
plt.tight_layout()
plt.show()

In [None]:
distribution_Loan_Status = df1['Loan_Status'].value_counts()
print(distribution_Loan_Status)

In [None]:
distribution_Loan_Status.plot(kind='bar')
plt.title('Distribution of Loan Status')
plt.xlabel('Loan Status')
plt.ylabel('Count')
plt.show()

In [None]:
df = df1.copy()

In [None]:
df.drop('Loan_ID', axis=1, inplace=True)
df

3. Prétraitement

In [None]:
num_cols = df.select_dtypes(include=['int64', 'float64']).columns
cat_cols = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
       'Property_Area']


In [None]:
x = df.drop(['Loan_Status'], axis=1)
y = df['Loan_Status'].map({'Y': 1, 'N': 0}).astype(int)

In [None]:
cat_cols

In [None]:
preprocessing = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_cols),
        ('cat', CatBoostEncoder(), cat_cols)
    ]
)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
log_reg = Pipeline(steps=[
    ('preprocessor', preprocessing),
     ('classifier', LogisticRegression())])

In [None]:
R_F_Classifier = Pipeline(steps=[
    ('preprocessor', preprocessing),
     ('classifier', RandomForestClassifier())])

In [None]:
xgb = Pipeline(steps=[
    ('preprocessor', preprocessing),
     ('classifier', XGBClassifier())])

In [None]:
# Perform cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [None]:
# Cross-validation for Logistic Regression
log_reg_cv_scores = cross_val_score(log_reg, x, y, cv=cv, scoring='accuracy')
log_reg_roc_auc_scores = cross_val_score(log_reg, x, y, cv=cv, scoring='roc_auc')

print("Logistic Regression Cross-Validation Accuracy Scores:", log_reg_cv_scores)
print("Logistic Regression Mean Cross-Validation Accuracy:", log_reg_cv_scores.mean())
print("Logistic Regression Cross-Validation ROC AUC Scores:", log_reg_roc_auc_scores)
print("Logistic Regression Mean Cross-Validation ROC AUC:", log_reg_roc_auc_scores.mean())

In [None]:
# Cross-validation for Random Forest Classifier
rf_cv_scores = cross_val_score(R_F_Classifier, x, y, cv=cv, scoring='accuracy')
rf_roc_auc_scores = cross_val_score(R_F_Classifier, x, y, cv=cv, scoring='roc_auc')

print("\nRandom Forest Classifier Cross-Validation Accuracy Scores:", rf_cv_scores)
print("Random Forest Classifier Mean Cross-Validation Accuracy:", rf_cv_scores.mean())
print("Random Forest Classifier Cross-Validation ROC AUC Scores:", rf_roc_auc_scores)
print("Random Forest Classifier Mean Cross-Validation ROC AUC:", rf_roc_auc_scores.mean())

In [None]:
# Cross-validation for XGBClassifier
xgb_cv_scores = cross_val_score(xgb, x, y, cv=cv, scoring='accuracy')
xgb_roc_auc_scores = cross_val_score(xgb, x, y, cv=cv, scoring='roc_auc')
print("\nXGBClassifier Cross-Validation Accuracy Scores:", xgb_cv_scores)
print("XGBClassifier Mean Cross-Validation Accuracy:", xgb_cv_scores.mean())
print("XGBClassifier Cross-Validation ROC AUC Scores:", xgb_roc_auc_scores)
print("XGBClassifier Mean Cross-Validation ROC AUC:", xgb_roc_auc_scores.mean())

In [None]:
# Fit models to the training data
log_reg.fit(x_train, y_train)
R_F_Classifier.fit(x_train, y_train)
xgb.fit(x_train, y_train)

# Courbes ROC
RocCurveDisplay.from_estimator(log_reg, x_test, y_test, name='LogReg')
RocCurveDisplay.from_estimator(R_F_Classifier, x_test, y_test, name='RandomForestClassifier')
RocCurveDisplay.from_estimator(xgb, x_test, y_test, name='XGB')
plt.legend(loc='lower right')
plt.title('ROC curves on Test')
plt.show()

In [None]:
# Sauvegarde du modèle

joblib.dump(R_F_Classifier, 'R_F_Classifier.pkl')
print('modèle Sauvegarde')

In [None]:
from google.colab import files

files.download('R_F_Classifier.pkl')

In [None]:
import gradio as gr
import pandas as pd
import joblib
import warnings

# Masquer tous les avertissements
warnings.filterwarnings("ignore")

# --------------------------
# 1. Charger le modèle SILENCIEUSEMENT
# --------------------------
model = None
try:
    model = joblib.load('R_F_Classifier.pkl')
    # AUCUN print() ici → pas de message "chargé avec succès"
except:
    model = None  # En cas d'erreur, on ne dit rien dans la console

# --------------------------
# 2. Fonction de prédiction
# --------------------------
def predict_loan_eligibility(
    Gender, Married, Dependents, Education, Self_Employed,
    ApplicantIncome, CoapplicantIncome, LoanAmount, Loan_Amount_Term, Credit_History, Property_Area
):
    if model is None:
        return "⚠️ Modèle non chargé. Vérifie le fichier 'R_F_Classifier.pkl'."

    # Créer un DataFrame avec les entrées utilisateur
    input_df = pd.DataFrame({
        'Gender': [Gender],
        'Married': [Married],
        'Dependents': [Dependents],
        'Education': [Education],
        'Self_Employed': [Self_Employed],
        'ApplicantIncome': [float(ApplicantIncome)],
        'CoapplicantIncome': [float(CoapplicantIncome)],
        'LoanAmount': [float(LoanAmount) if LoanAmount else 130.0],  # Valeur par défaut
        'Loan_Amount_Term': [float(Loan_Amount_Term)],
        'Credit_History': [float(Credit_History)],
        'Property_Area': [Property_Area]
    })

    try:
        prediction = model.predict(input_df)[0]
        proba = model.predict_proba(input_df)[0]

        # ✅ Gère les prédictions 'Y'/'N' (basé sur train.csv)
        if prediction == 'Y':
            resultat = f"🎉 CLIENT ÉLIGIBLE AU PRÊT ! ✅\n(Confiance : {proba[1]:.2%})"
        elif prediction == 'N':
            resultat = f"⛔ CLIENT NON ÉLIGIBLE. ❌\n(Confiance : {proba[0]:.2%})"
        else:
            resultat = f"⚠️ Valeur de prédiction inattendue : {prediction}"

        return resultat

    except Exception as e:
        return f"❌ Erreur lors de la prédiction : {str(e)}"

# --------------------------
# 3. Interface utilisateur (Gradio Blocks)
# --------------------------
with gr.Blocks(title="Prédicteur de Prêt - Random Forest", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🏦 Prédicteur d'Éligibilité de Prêt")
    gr.Markdown("### Modèle : Random Forest (`R_F_Classifier.pkl`)")
    gr.Markdown("Remplissez le formulaire ci-dessous pour obtenir une prédiction instantanée.")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### 👤 Informations Personnelles")
            gender = gr.Dropdown(["Male", "Female"], label="Genre", value="Male")
            married = gr.Dropdown(["Yes", "No"], label="Marié(e)", value="No")
            dependents = gr.Dropdown(["0", "1", "2", "3+"], label="Personnes à charge", value="0")
            education = gr.Dropdown(["Graduate", "Not Graduate"], label="Niveau d'études", value="Graduate")
            self_employed = gr.Dropdown(["Yes", "No"], label="Travailleur indépendant", value="No")
            property_area = gr.Dropdown(["Urban", "Semiurban", "Rural"], label="Zone de propriété", value="Urban")

        with gr.Column():
            gr.Markdown("### 💰 Informations Financières")
            applicant_income = gr.Number(label="Revenu du demandeur (₹)", value=5000, minimum=0)
            coapplicant_income = gr.Number(label="Revenu du co-demandeur (₹)", value=0, minimum=0)
            loan_amount = gr.Number(label="Montant du prêt demandé (₹ en milliers)", value=130, minimum=1)
            loan_term = gr.Slider(12, 480, step=12, value=360, label="Durée du prêt (mois)")
            credit_history = gr.Dropdown([1.0, 0.0], label="Historique de crédit (1=bon, 0=mauvais)", value=1.0)

    predict_btn = gr.Button("🚀 Lancer la prédiction", variant="primary")
    output = gr.Textbox(label="📊 Résultat", placeholder="Le résultat de la prédiction s'affichera ici...", lines=3)

    # Lier la fonction au bouton
    predict_btn.click(
        fn=predict_loan_eligibility,
        inputs=[
            gender, married, dependents, education, self_employed,
            applicant_income, coapplicant_income, loan_amount, loan_term, credit_history, property_area
        ],
        outputs=output
    )

# --------------------------
if __name__ == "__main__":
    demo.launch(
        inbrowser=True,
        share=False,
        debug=False,
        quiet=True
    )