In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../../data/df_segmentation_P.csv", index_col=[0])
df["date_mensuelle"] = pd.to_datetime(df["date_mensuelle"])

# MOC C

In [3]:
resultats = df.groupby("Classes").agg(moyenne_TARGET=("TARGET", "mean")).to_dict()["moyenne_TARGET"]

In [4]:
resultats

{1: 0.25674217907227614,
 2: 0.17383829218205263,
 3: 0.12256764090239554,
 4: 0.07622553278882763,
 5: 0.050153307052124396,
 6: 0.03261193020424726,
 7: 0.02263618402483771}

In [5]:
def Boostrapping_Classes(classe):
    df_classe = df[df['Classes'] == classe]
    tx_defaut_liste = []

    for _ in range(1000):
        echantillon = df_classe["TARGET"].sample(n=len(df_classe), replace=True)
        tx_defaut_liste.append(echantillon.mean())
        
    tx_defaut_liste = np.array(tx_defaut_liste)

    decile9 = np.percentile(tx_defaut_liste, 90)

    MOC_C = decile9 - resultats[classe]
    
    return(classe, resultats[classe], decile9, MOC_C)

In [6]:
MOC_C = pd.DataFrame(columns = ["Classe", "LRA", "Décile9", "Moc_C"])

In [7]:
for i in range(1,8):
    classe, LRA, decile9, MOC_C_classe = Boostrapping_Classes(i)
    MOC_C.loc[len(MOC_C)] = [classe, LRA, decile9, MOC_C_classe]

In [8]:
MOC_C

Unnamed: 0,Classe,LRA,Décile9,Moc_C
0,1.0,0.256742,0.261201,0.004459
1,2.0,0.173838,0.17699,0.003152
2,3.0,0.122568,0.124661,0.002093
3,4.0,0.076226,0.077521,0.001295
4,5.0,0.050153,0.051134,0.00098
5,6.0,0.032612,0.033384,0.000772
6,7.0,0.022636,0.02416,0.001524


# MOC A
### Valeurs manquantes dans les variables explicatives

In [9]:
var_explicatives = ['REGION_RATING_CLIENT_W_CITY', 'DAYS_CREDIT_ENDDATE', 'RATE_DOWN_PAYMENT', 'AMT_PAYMENT', 'NAME_INCOME_TYPE', 'OCCUPATION_TYPE']

In [10]:
train = pd.read_csv("../../data/application_train_vf.csv")
df_bur = pd.read_csv('../../data/bureau.csv')
df_bur_group = df_bur[['DAYS_CREDIT_ENDDATE', 'SK_ID_CURR']].groupby('SK_ID_CURR').sum()
df_bur_group.reset_index(inplace=True)

df_prev = pd.read_csv('../../data/previous_application.csv')
df_prev_group = df_prev[['SK_ID_CURR', 'DAYS_FIRST_DRAWING', 'RATE_DOWN_PAYMENT']].groupby(
    'SK_ID_CURR').sum()
df_prev_group.reset_index(inplace=True)

df_ins = pd.read_csv('../../data/installments_payments.csv')
df_ins_group = df_ins[['SK_ID_CURR', 'AMT_PAYMENT']].groupby('SK_ID_CURR').sum()
df_ins_group.reset_index(inplace=True)

train = train.merge(df_bur_group, on='SK_ID_CURR', how='left')
train = train.merge(df_prev_group, on='SK_ID_CURR', how='left')
train = train.merge(df_ins_group, on='SK_ID_CURR', how='left')

In [11]:
train[var_explicatives].isna().sum()

REGION_RATING_CLIENT_W_CITY        0
DAYS_CREDIT_ENDDATE            43764
RATE_DOWN_PAYMENT              16395
AMT_PAYMENT                    15810
NAME_INCOME_TYPE                   0
OCCUPATION_TYPE                96049
dtype: int64

### Période Manquante

In [12]:
max_year = df["date_mensuelle"].dt.year.max()
min_year = df["date_mensuelle"].dt.year.min()

In [13]:
periodes_attendues = [f"{year}_{quarter}" for year in range(min_year, max_year) for quarter in range(1, 5)]
periodes_reelles = df['date_trimestrielle'].unique()

periodes_manquantes = set(periodes_attendues) - set(periodes_reelles)
print("Périodes manquantes dans les données :", periodes_manquantes)

Périodes manquantes dans les données : set()


### Crise Covid

In [14]:
df['periode_crise_covid'] = ((df['date_trimestrielle'] == '2020_1') | (df['date_trimestrielle'] == '2020_2'))

sans_covid = df[df['periode_crise_covid'] == False].copy()

In [15]:
Covid_défaut = pd.DataFrame(columns = ["Classe", "Taux avec Covid", "Taux sans Covid", "Marge Conservatisme (%)"])

In [16]:
for i in range(1,8):
    tx_ss_covid = sans_covid[sans_covid["Classes"]==i]["TARGET"].mean()
    marge = ((tx_ss_covid/resultats[i])-1)*100
    Covid_défaut.loc[len(Covid_défaut)] = [i, resultats[i], tx_ss_covid, marge]

In [17]:
Covid_défaut

Unnamed: 0,Classe,Taux avec Covid,Taux sans Covid,Marge Conservatisme (%)
0,1.0,0.256742,0.257824,0.421554
1,2.0,0.173838,0.173345,-0.283846
2,3.0,0.122568,0.122432,-0.110606
3,4.0,0.076226,0.076292,0.087472
4,5.0,0.050153,0.050057,-0.192103
5,6.0,0.032612,0.03247,-0.434446
6,7.0,0.022636,0.022651,0.063696
