In [88]:
# ======================================================
# 📥 Import des librairies
# ======================================================

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [89]:
# ======================================================
# 📥 Importation du fichier CSV
# ======================================================

import pandas as pd

# saisir ici le chemin absolu de l'adresse du fichier au format csv
# ------------------------------------------------------------------

path = r"loan_data.csv"

def import_csv(path):
    """
    Charge un fichier CSV et retourne un DataFrame pandas.
    - Vérifie que le fichier existe et est lisible.
    - Affiche les dimensions et un aperçu des premières lignes.
    """
    try:
        df = pd.read_csv(path)
        print(f" ✅ Fichier chargé avec succès : {path}\n")
        print(f" 📏 Dimensions : {df.shape[0]} lignes × {df.shape[1]} colonnes\n")
        print(f" 📄 Intitulés des colonnes : {list(df.columns)}\n")
     # aperçu visuel dans Jupyter
        return df.head(5)
    except FileNotFoundError:
        print(f"❌ Erreur : le fichier '{path}' est introuvable.")
    except pd.errors.EmptyDataError:
        print("⚠️ Le fichier est vide.")
    except Exception as e:
        print(f"🚨 Erreur inattendue : {e}")

In [85]:
import_csv(path)

 ✅ Fichier chargé avec succès : loan_data.csv

 📏 Dimensions : 9578 lignes × 14 colonnes

 📄 Intitulés des colonnes : ['credit.policy', 'purpose', 'int.rate', 'installment', 'log.annual.inc', 'dti', 'fico', 'days.with.cr.line', 'revol.bal', 'revol.util', 'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid']



Unnamed: 0,credit.policy,purpose,int.rate,installment,log.annual.inc,dti,fico,days.with.cr.line,revol.bal,revol.util,inq.last.6mths,delinq.2yrs,pub.rec,not.fully.paid
0,1,debt_consolidation,0.1189,829.1,11.350407,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,11.082143,14.29,707,2760.0,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,10.373491,11.63,682,4710.0,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,11.350407,8.1,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,11.299732,14.97,667,4066.0,4740,39.5,0,1,0,0


In [90]:
# ======================================================
# 📊 Petit résumé des colonnes du dataset
# ======================================================

def resume_colonnes(df):
    """
    Donne un aperçu simple de chaque colonne :
    - type de données
    - nombre de valeurs uniques
    - nombre et pourcentage de valeurs manquantes
    - minimum, maximum, moyenne et écart-type (si numérique)
    """
    resume = pd.DataFrame({
        "Type": df.dtypes,
        "Valeurs_uniques": df.nunique(),
        "Valeurs_manquantes": df.isna().sum()
    })

    resume["%_manquantes"] = (resume["Valeurs_manquantes"] / len(df) * 100).round(2)

    # Calcul des statistiques de base
    resume["Min"] = df.min(numeric_only=True)
    resume["Max"] = df.max(numeric_only=True)
    resume["Moyenne"] = df.mean(numeric_only=True)
    resume["Écart-type"] = df.std(numeric_only=True)

    print("📋 Résumé du dataset :\n")
    display(resume)

# ======================================================
# 🧩 Utilisation
# ======================================================

resume_colonnes(df)


📋 Résumé du dataset :



Unnamed: 0,Type,Valeurs_uniques,Valeurs_manquantes,%_manquantes,Min,Max,Moyenne,Écart-type
credit.policy,int64,2,0,0.0,0.0,1.0,0.80497,0.396245
purpose,object,7,0,0.0,,,,
int.rate,float64,249,0,0.0,0.06,0.2164,0.12264,0.026847
installment,float64,4788,0,0.0,15.67,940.14,319.089413,207.071301
log.annual.inc,float64,1987,0,0.0,7.547502,14.52835,10.932117,0.614813
dti,float64,2529,0,0.0,0.0,29.96,12.606679,6.88397
fico,int64,44,0,0.0,612.0,827.0,710.846314,37.970537
days.with.cr.line,float64,2687,0,0.0,178.958333,17639.96,4560.767197,2496.930377
revol.bal,int64,7869,0,0.0,0.0,1207359.0,16913.963876,33756.189557
revol.util,float64,1035,0,0.0,0.0,119.0,46.799236,29.014417


In [91]:
df.describe()

Unnamed: 0,credit.policy,int.rate,installment,log.annual.inc,dti,fico,days.with.cr.line,revol.bal,revol.util,inq.last.6mths,delinq.2yrs,pub.rec,not.fully.paid
count,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0
mean,0.80497,0.12264,319.089413,10.932117,12.606679,710.846314,4560.767197,16913.96,46.799236,1.577469,0.163708,0.062122,0.160054
std,0.396245,0.026847,207.071301,0.614813,6.88397,37.970537,2496.930377,33756.19,29.014417,2.200245,0.546215,0.262126,0.366676
min,0.0,0.06,15.67,7.547502,0.0,612.0,178.958333,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.1039,163.77,10.558414,7.2125,682.0,2820.0,3187.0,22.6,0.0,0.0,0.0,0.0
50%,1.0,0.1221,268.95,10.928884,12.665,707.0,4139.958333,8596.0,46.3,1.0,0.0,0.0,0.0
75%,1.0,0.1407,432.7625,11.291293,17.95,737.0,5730.0,18249.5,70.9,2.0,0.0,0.0,0.0
max,1.0,0.2164,940.14,14.528354,29.96,827.0,17639.95833,1207359.0,119.0,33.0,13.0,5.0,1.0
