In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../../data/df_segmentation_P.csv", index_col=[0])
df["date_mensuelle"] = pd.to_datetime(df["date_mensuelle"])
df['date_trimestrielle'] = df['date_mensuelle'].dt.year.astype(str) + '_' + df['date_mensuelle'].dt.quarter.astype(str)

# MOC C

In [3]:
resultats = df.groupby("Classes").agg(moyenne_TARGET=("TARGET", "mean")).to_dict()["moyenne_TARGET"]

In [4]:
resultats

{1: 0.25674217907227614,
 2: 0.17383829218205263,
 3: 0.12256764090239554,
 4: 0.07622553278882763,
 5: 0.050153307052124396,
 6: 0.03261193020424726,
 7: 0.02263618402483771}

In [5]:
def Boostrapping_Classes(classe):
    df_classe = df[df['Classes'] == classe]
    tx_defaut_liste = []

    for _ in range(1000):
        echantillon = df_classe["TARGET"].sample(n=len(df_classe), replace=True)
        tx_defaut_liste.append(echantillon.mean())
        
    tx_defaut_liste = np.array(tx_defaut_liste)

    decile9 = np.percentile(tx_defaut_liste, 90)

    MOC_C = decile9 - resultats[classe]
    
    return(classe, resultats[classe], decile9, MOC_C)

In [6]:
MOC_C = pd.DataFrame(columns = ["Classe", "LRA", "Décile9", "Moc_C"])

In [7]:
for i in range(1,8):
    classe, LRA, decile9, MOC_C_classe = Boostrapping_Classes(i)
    MOC_C.loc[len(MOC_C)] = [classe, LRA, decile9, MOC_C_classe]

In [8]:
MOC_C

Unnamed: 0,Classe,LRA,Décile9,Moc_C
0,1.0,0.256742,0.261489,0.004746
1,2.0,0.173838,0.177086,0.003248
2,3.0,0.122568,0.124687,0.002119
3,4.0,0.076226,0.077568,0.001343
4,5.0,0.050153,0.051158,0.001005
5,6.0,0.032612,0.033464,0.000852
6,7.0,0.022636,0.024104,0.001468


# MOC A
### Crise Covid

In [9]:
avant_covid = df[df['date_trimestrielle'] < '2020_2']

taux_defaut_avant_covid = avant_covid.groupby('Classes')['TARGET'].mean()
taux_defaut_pendant_covid = df.groupby('Classes')['TARGET'].mean()

impact_covid_par_classe = (taux_defaut_avant_covid/taux_defaut_pendant_covid)-1

In [10]:
impact_covid_par_classe

Classes
1    0.000614
2   -0.002937
3   -0.007154
4   -0.003364
5   -0.009690
6   -0.002816
7   -0.013034
Name: TARGET, dtype: float64

In [11]:
variance_taux_defaut_pendant_covid = df.groupby('Classes')['TARGET'].var()
variance_taux_defaut_pendant_covid

Classes
1    0.190839
2    0.143625
3    0.107548
4    0.070416
5    0.047639
6    0.031549
7    0.022125
Name: TARGET, dtype: float64

In [12]:
simulations = pd.DataFrame()

for classe in impact_covid_par_classe.index:
    moyenne_impact = impact_covid_par_classe[classe]
    variance_impact = variance_taux_defaut_pendant_covid[classe]

    variance_impact = variance_impact if variance_impact > 0 else 0.001

    tirages = np.random.normal(moyenne_impact, variance_impact, 1000)

    simulations[classe] = tirages
    
simulations

Unnamed: 0,1,2,3,4,5,6,7
0,-0.029866,-0.079585,0.071060,0.038974,-0.017930,0.000765,-0.044065
1,0.190391,-0.066032,0.006576,0.002126,-0.028625,0.012336,0.012273
2,-0.118212,0.223370,-0.172215,0.027188,-0.013745,-0.005388,-0.012243
3,-0.109836,-0.000530,0.071185,0.028454,-0.026984,0.031598,-0.016289
4,-0.164794,0.033931,-0.103553,0.035774,0.064859,-0.049031,-0.033077
...,...,...,...,...,...,...,...
995,-0.186169,0.220387,-0.195824,-0.105867,-0.005717,-0.003255,-0.007378
996,-0.092668,-0.041153,-0.160650,0.063698,0.026208,0.002531,0.003010
997,0.054906,-0.140993,-0.160194,-0.040641,0.012093,0.005730,-0.002500
998,0.124506,-0.038782,-0.035862,-0.027345,-0.022432,-0.043598,0.002127


In [13]:
moyenne_empirique = simulations.mean()
percentile_90 = simulations.quantile(0.90)
MOC_A = percentile_90 - taux_defaut_pendant_covid
MOC_A

1   -0.010823
2    0.007615
3    0.012518
4    0.010275
5   -0.000617
6    0.005205
7   -0.006966
dtype: float64

In [15]:
MOC_A = MOC_A.reset_index()

In [17]:
MOC_A.columns = ["CHR", "MOC_A"]

In [18]:
MOC_A

Unnamed: 0,CHR,MOC_A
0,1,-0.010823
1,2,0.007615
2,3,0.012518
3,4,0.010275
4,5,-0.000617
5,6,0.005205
6,7,-0.006966


# Conclusion

In [16]:
MOC = pd.DataFrame(columns = ["Classe", "LRA", "MOC_A", "MOC_C"])
MOC["LRA"] = taux_defaut_avant_covid.reset_index(drop=True)
MOC["MOC_A"] = MOC_A.reset_index(drop=True).apply(lambda x : 0 if x < 0 else x)
MOC["MOC_C"] = MOC_C["Moc_C"].reset_index(drop=True)
MOC["Classe"] = range(1,8)
MOC["LRA + MOC"] = MOC["LRA"] + MOC["MOC_A"] + MOC["MOC_C"]

In [17]:
MOC

Unnamed: 0,Classe,LRA,MOC_A,MOC_C,LRA + MOC
0,1,0.2569,0.0,0.005041,0.261941
1,2,0.173328,0.002676,0.003439,0.179442
2,3,0.121691,0.001133,0.002042,0.124865
3,4,0.075969,0.012848,0.001265,0.090083
4,5,0.049667,0.000612,0.000993,0.051273
5,6,0.03252,0.006496,0.000852,0.039869
6,7,0.022341,0.0,0.001411,0.023752
