In [12]:
import pandas as pd
import numpy as np

In [24]:
df = pd.read_csv("../data/df_segmentation.csv", index_col=[0])
df["date_mensuelle"] = pd.to_datetime(df["date_mensuelle"])

# MOC C

In [25]:
resultats = df.groupby("Classes").agg(moyenne_TARGET=("TARGET", "mean")).to_dict()["moyenne_TARGET"]

In [26]:
resultats

{1: 0.17363251155624038,
 2: 0.11102387595160318,
 3: 0.08076662977990902,
 4: 0.06629496923052204,
 5: 0.05008281539546401,
 6: 0.036547911547911545,
 7: 0.03700090063044131}

In [27]:
def Boostrapping_Classes(classe):
    df_classe = df[df['Classes'] == classe]
    tx_defaut_liste = []

    for _ in range(1000):
        echantillon = df_classe["TARGET"].sample(n=len(df_classe), replace=True)
        tx_defaut_liste.append(echantillon.mean())
        
    tx_defaut_liste = np.array(tx_defaut_liste)

    decile9 = np.percentile(tx_defaut_liste, 90)

    MOC_C = decile9 - resultats[classe]
    
    return(classe, resultats[classe], decile9, MOC_C)

In [28]:
MOC_C = pd.DataFrame(columns = ["Classe", "LRA", "Décile9", "Moc_C"])

In [29]:
for i in range(1,8):
    classe, LRA, decile9, MOC_C_classe = Boostrapping_Classes(i)
    MOC_C.loc[len(MOC_C)] = [classe, LRA, decile9, MOC_C_classe]

In [30]:
MOC_C

Unnamed: 0,Classe,LRA,Décile9,Moc_C
0,1.0,0.173633,0.178448,0.004815
1,2.0,0.111024,0.112538,0.001514
2,3.0,0.080767,0.082182,0.001416
3,4.0,0.066295,0.067645,0.00135
4,5.0,0.050083,0.051313,0.00123
5,6.0,0.036548,0.038127,0.00158
6,7.0,0.037001,0.039102,0.002101


# MOC A
### Valeurs manquantes dans les variables explicatives

In [31]:
var_explicatives = ['REGION_RATING_CLIENT_W_CITY', 'DAYS_CREDIT_ENDDATE', 'RATE_DOWN_PAYMENT', 'AMT_PAYMENT', 'NAME_INCOME_TYPE', 'OCCUPATION_TYPE']

In [32]:
train = pd.read_csv("../data/application_train_vf.csv")
df_bur = pd.read_csv('../data/bureau.csv')
df_bur_group = df_bur[['DAYS_CREDIT_ENDDATE', 'SK_ID_CURR']].groupby('SK_ID_CURR').sum()
df_bur_group.reset_index(inplace=True)

df_prev = pd.read_csv('../data/previous_application.csv')
df_prev_group = df_prev[['SK_ID_CURR', 'DAYS_FIRST_DRAWING', 'RATE_DOWN_PAYMENT']].groupby(
    'SK_ID_CURR').sum()
df_prev_group.reset_index(inplace=True)

df_ins = pd.read_csv('../data/installments_payments.csv')
df_ins_group = df_ins[['SK_ID_CURR', 'AMT_PAYMENT']].groupby('SK_ID_CURR').sum()
df_ins_group.reset_index(inplace=True)

train = train.merge(df_bur_group, on='SK_ID_CURR', how='left')
train = train.merge(df_prev_group, on='SK_ID_CURR', how='left')
train = train.merge(df_ins_group, on='SK_ID_CURR', how='left')

In [33]:
train[var_explicatives].isna().sum()

REGION_RATING_CLIENT_W_CITY        0
DAYS_CREDIT_ENDDATE            43764
RATE_DOWN_PAYMENT              16395
AMT_PAYMENT                    15810
NAME_INCOME_TYPE                   0
OCCUPATION_TYPE                96049
dtype: int64

### Période Manquante

In [34]:
max_year = df["date_mensuelle"].dt.year.max()
min_year = df["date_mensuelle"].dt.year.min()

In [35]:
periodes_attendues = [f"{year}_{quarter}" for year in range(min_year, max_year) for quarter in range(1, 5)]
periodes_reelles = df['date_trimestrielle'].unique()

periodes_manquantes = set(periodes_attendues) - set(periodes_reelles)
print("Périodes manquantes dans les données :", periodes_manquantes)

Périodes manquantes dans les données : set()


### Crise Covid

In [36]:
df['periode_crise_covid'] = ((df['date_trimestrielle'] == '2020_1') | (df['date_trimestrielle'] == '2020_2'))

sans_covid = df[df['periode_crise_covid'] == False].copy()

In [37]:
Covid_défaut = pd.DataFrame(columns = ["Classe", "Taux avec Covid", "Taux sans Covid", "Marge Conservatisme (%)"])

In [38]:
for i in range(1,8):
    tx_ss_covid = sans_covid[sans_covid["Classes"]==i]["TARGET"].mean()
    marge = ((tx_ss_covid/resultats[i])-1)*100
    Covid_défaut.loc[len(Covid_défaut)] = [i, resultats[i], tx_ss_covid, marge]

In [39]:
Covid_défaut

Unnamed: 0,Classe,Taux avec Covid,Taux sans Covid,Marge Conservatisme (%)
0,1.0,0.173633,0.173409,-0.128943
1,2.0,0.111024,0.110641,-0.345114
2,3.0,0.080767,0.08097,0.252129
3,4.0,0.066295,0.066394,0.148771
4,5.0,0.050083,0.050024,-0.118193
5,6.0,0.036548,0.036559,0.030722
6,7.0,0.037001,0.036939,-0.166446
