In [None]:
import numpy as np
import pandas as pd
import argparse
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk
from sklearn import tree
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from pandas.tseries.offsets import DateOffset 
from sklearn.preprocessing import MinMaxScaler
from lineartree import LinearTreeRegressor
from joblib import dump, load
import requests
import xmltodict
import joblib
# warnings.filterwarnings('ignore')

# Loading functions

In [None]:
def bdf_request(dataset, id_list):
    token = "8c405577-925f-4f96-9a89-df95aceb3e61"
    url = f"https://api.webstat.banque-france.fr/webstat-fr/v1/data/{dataset}/"
    
    for id in id_list:
        url = url + id + "+"

    headers = {
    "X-IBM-Client-Id": token,
    "accept": "application/json"
    }

    params = {
    # "startPeriod": "2020-Q1",
    # "endPeriod": "2020-Q2"
    }

    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    return data

def parse_bdf_request(data):
    df = pd.DataFrame(columns=['Date'])
    for serie in data['seriesObs']:
        sub_df = pd.DataFrame()
        dates = []
        values = []
        for obs in serie['ObservationsSerie']['observations']:
            dates.append(obs['ObservationPeriod']['periodFirstDate'])
            values.append(pd.to_numeric(obs['ObservationPeriod']['value']))
        sub_df['Date'] = dates
        sub_df[serie['ObservationsSerie']['title']] = values
        df = df.merge(sub_df, on='Date', how='outer')

    if serie['ObservationsSerie']['frequency'] == 'A':
        df['Date'] = pd.to_datetime(df['Date'], format="%d-%m-%Y %H:%M:%S") + pd.offsets.MonthEnd(12)
    elif serie['ObservationsSerie']['frequency'] == 'Q':
        df['Date'] = pd.to_datetime(df['Date'], format="%d-%m-%Y %H:%M:%S") + pd.offsets.MonthEnd(3)  
    elif serie['ObservationsSerie']['frequency'] == 'M':
        df['Date'] = pd.to_datetime(df['Date'], format="%d-%m-%Y %H:%M:%S") + pd.offsets.MonthEnd(1) 
    
    return df

In [None]:
quarterly_bdf_ids_1 = [
    'CFT.Q.N.FR.W0.S1M.S1.N.A.LE.F62A._Z._Z.XDC._T.S.V.N._T',
    'CFT.Q.N.FR.W0.S1M.S1.N.A.LE.F62B._Z._Z.XDC._T.S.V.N._T'
]

quarterly_bdf_ids_2 = [
    'CNF.Q.N.FR.W0.S1M.S1.N.A.LE.F2.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.L.F.F8.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.A.F.F8.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.L.F.F4.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.A.F.F4.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.A.F.F3.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.A.F.F5._Z._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S1M.S1.N.L.F.F5._Z._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.L.F.F3.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.A.F.F3.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.L.F.F5._Z._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.A.F.F51._Z._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.A.LE.F8.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.L.LE.F8.T._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.A.F.F6._Z._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.L.F.F6._Z._Z.XDC._T.S.V.N._T',
    'CNF.Q.N.FR.W0.S11.S1.N.A.LE.F521._Z._Z.XDC._T.S.V.N._T'
]

monthly_bdf_ids_1 = [
    'BSI1.M.FR.N.A.L21.A.1.U6.2250.Z01.E',
    'BSI1.M.FR.N.A.L22.A.1.U6.2250.Z01.E',
    'BSI1.M.FR.N.A.L22FRPL.A.1.U6.2251.Z01.E',
    'BSI1.M.FR.N.A.L23.A.1.U6.2250.Z01.E',
    'BSI1.M.FR.N.A.L23FRLA.A.1.U6.2250.Z01.E',
    'BSI1.M.FR.N.A.L23FRLD.A.1.U6.2251.Z01.E',
    'BSI1.M.FR.N.R.A220Z.A.1.U6.2250.Z01.E',
    'BSI1.M.FR.N.R.A210Z.A.1.U6.2250.Z01.E',
    'BSI1.M.FR.N.R.A2N1Z.A.1.U6.2240.Z01.E',
    'BSI1.M.FR.N.R.A2N2Z.A.1.U6.2240.Z01.E',
    'BSI1.M.FR.N.F.L30.A.8.Z5.0000.Z01.T',
    'BSI1.M.FR.N.A.L22.A.1.U6.2240.Z01.E',
    'BSI1.M.FR.N.A.L21.A.1.U6.2240.Z01.E',
    'BSI1.M.FR.Y.R.A220Z.A.4.U6.2254FR.Z01.E',
    'BSI1.M.FR.N.R.A220Z.A.4.U6.2254FR.Z01.E'
]

monthly_bdf_ids_2 = [
    'MIR1.M.FR.B.A22.A.5.A.2254U6.EUR.N',
    'MIR1.M.FR.B.A2B.A.5.A.2250U6.EUR.N',
    'MIR1.M.FR.B.A22HR.A.5.A.2254U6.EUR.N'
    
]

monthly_bdf_ids_3 = [
    'MIR.M.FR.B.L22.H.R.A.2240.EUR.N'
]

data_bdf_quarterly_1 = bdf_request('CFT', quarterly_bdf_ids_1)
df_bdf_quarterly_1 = parse_bdf_request(data_bdf_quarterly_1)

data_bdf_quarterly_2 = bdf_request('CNF', quarterly_bdf_ids_2)
df_bdf_quarterly_2 = parse_bdf_request(data_bdf_quarterly_2)

df_bdf_quarterly = df_bdf_quarterly_1.merge(df_bdf_quarterly_2, on='Date', how='outer')
df_bdf_quarterly = df_bdf_quarterly.sort_values('Date', ascending=False)
for col in df_bdf_quarterly.columns:
    if col != 'Date':
        df_bdf_quarterly[col] = df_bdf_quarterly[col] * 1e3
display(df_bdf_quarterly.head())

data_bdf_monthly_1 = bdf_request('BSI1', monthly_bdf_ids_1)
df_bdf_monthly_1 = parse_bdf_request(data_bdf_monthly_1)

data_bdf_monthly_2 = bdf_request('MIR1', monthly_bdf_ids_2)
df_bdf_monthly_2 = parse_bdf_request(data_bdf_monthly_2)
for col in df_bdf_monthly_2.columns:
    if col != 'Date':
        df_bdf_monthly_2[col] = df_bdf_monthly_2[col] * 1e3

data_bdf_monthly_3 = bdf_request('MIR', monthly_bdf_ids_3)
df_bdf_monthly_3 = parse_bdf_request(data_bdf_monthly_3)

df_bdf_monthly = df_bdf_monthly_1.merge(df_bdf_monthly_2, on='Date', how='outer').merge(df_bdf_monthly_3, on='Date', how='outer')
df_bdf_monthly = df_bdf_monthly.sort_values('Date', ascending=False)
df_bdf_monthly[["Titres d'OPC monétaires, taux de valorisation de l'encours", "Taux d'intérêt sur contrats nouveaux, DAT, SNF, durée supérieure à 2 ans"]] = df_bdf_monthly[["Titres d'OPC monétaires, taux de valorisation de l'encours", "Taux d'intérêt sur contrats nouveaux, DAT, SNF, durée supérieure à 2 ans"]] / 100
display(df_bdf_monthly.head())


In [None]:
def insee_request(id_list):
    token = "fd13b343-b9d7-385d-9ae3-a3e53b5a0440"
    url = "https://bdm.insee.fr/series/sdmx/data/SERIES_BDM/"
    
    for id in id_list:
        url = url + id + "+"

    headers = {
    "Authorization": f"Bearer {token}"
    }

    params = {
    # "startPeriod": "2020-Q1",
    # "endPeriod": "2020-Q2"
    }

    response = requests.get(url, headers=headers, params=params)
    data = xmltodict.parse(response.content)
    return data

    

def parse_insee_request(data):
    df = pd.DataFrame(columns=['Date'])

    if type(data['message:StructureSpecificData']['message:DataSet']['Series']) == list:
        series = data['message:StructureSpecificData']['message:DataSet']['Series']
    else:
        series = [data['message:StructureSpecificData']['message:DataSet']['Series']]

    for serie in series:
        sub_df = pd.DataFrame()
        unit_mult = 10**float(serie['@UNIT_MULT'])
        dates = []
        values = []
        for obs in serie['Obs']:
            dates.append(obs['@TIME_PERIOD'])
            values.append(float(obs['@OBS_VALUE']) * unit_mult)
        sub_df['Date'] = dates
        
        sub_df[serie['@TITLE_FR']] = values
        df = df.merge(sub_df, on='Date', how='outer')

    if serie['@FREQ'] == 'A':
        df['Date'] = pd.to_datetime(df['Date']) + pd.offsets.MonthEnd(12)
    elif serie['@FREQ'] == 'T':
        df['Date'] = pd.to_datetime(df['Date']) + pd.offsets.MonthEnd(3)  
    elif serie['@FREQ'] == 'M':
        df['Date'] = pd.to_datetime(df['Date']) + pd.offsets.MonthEnd(1)  
    
    return df
    

In [None]:
annual_insee_ids = [
    '010564058',
    '010563884'
    # '010563893',
]

quarterly_insee_ids = [
    '010564925', #RDB courant
    '010565711', #Conso ménages courant
    '010565712', #Conso ménages constant
    '010564934', #Epargne ménages courant,
    '010565738', #FBCF courant ménages
    '010565734', #FBCF constant ménages
    '010565724', #Exportation courant
    '010565725', #Exportation constant
    '010565726', #Import courant
    '010565727', #Import constant
    '010565717', #Conso totale courant
    '010565718', #Conso totale constant
    '010564879', #Menages aide à l'investissement,
    '010564890',
    '010564889',
    '010564874',
    '010564983',
    '010564909',
    '010564694',
    '010564707',
    '010564734',
    '010564744',
    '010564755',
    '010564767',
    '010564766',
    '010564789',
    '010564848',
    '010564866',
    '010564865',
    '010564833',
    '010564840',
    '010564880',
    '010564892',
    '010564891',
    '010564975',
    '010564898',
    '010565707',
    '010565708',
    '010565730',
    '010565731',
    '010564901',
    '010564931',
    '010564875',
    '010564883',
    '010564884',
    '010564871',
    '010565536',
    '010565746',
    '010565745'
]

monthly_insee_ids = [
   '001769682' #CPI
]

data_insee_annual = insee_request(annual_insee_ids)
df_insee_annual = parse_insee_request(data_insee_annual)
display(df_insee_annual.head())

data_insee_quarterly = insee_request(quarterly_insee_ids)
df_insee_quarterly = parse_insee_request(data_insee_quarterly)
display(df_insee_quarterly.head())

data_insee_monthly = insee_request(monthly_insee_ids)
df_insee_monthly = parse_insee_request(data_insee_monthly)
display(df_insee_monthly.head())

# I. SNF

In [None]:
train_range = ('2003-03-01', '2022-08-01')

## 1. Arbitrage

In [None]:
df_scenario = pd.read_excel('20230915 - RUN/Envoi_Data_Scenario_France_Central_APR_20230918.xlsx')
df_scenario['Date'] = pd.to_datetime(df_scenario['Date'])
df_scenario.head()

In [None]:
""" Unique processing """
df_scenario['RDB_constant'] = df_scenario['RDB_constant (monthly transformed)']
df_scenario['RDB_courant'] = df_scenario['RDB_courant (monthly transformed)']
df_scenario["GDP / current prices, % ch yoy / quarterly"] = df_scenario["GDP / current prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["GDP / constant prices, % ch yoy / quarterly"] = df_scenario["GDP / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Households disposable income / constant prices (deflated by household consumption deflator), % ch yoy / quarterly"] = df_scenario["Households disposable income / constant prices (deflated by household consumption deflator), % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Household consumption / constant prices, % ch yoy / quarterly"] = df_scenario["Household consumption / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Public consumption / constant prices, % ch yoy / quarterly"] = df_scenario["Public consumption / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Investment, private, households / constant prices, % ch yoy / quarterly"] = df_scenario["Investment, private, households / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Imports of goods and services / constant prices, % ch yoy / quarterly"] = df_scenario["Imports of goods and services / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Exports of goods and services / constant prices, % ch yoy / quarterly"] = df_scenario["Exports of goods and services / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["CPI index / % ch yoy / quarterly"] = df_scenario["CPI index / % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Investment, private, NFC / constant prices, % ch yoy / quarterly"] = df_scenario["Investment, private, NFC / constant prices, % ch yoy / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Non financial companies' saving ratio / constant prices, % of value added / quarterly"] = df_scenario["Non financial companies' saving ratio / constant prices, % of value added / quarterly"].replace(0, np.nan).bfill() / 100
df_scenario["Non financial companies' profit ratio / constant prices, % of value added / quarterly"] = df_scenario["Non financial companies' profit ratio / constant prices, % of value added / quarterly"].replace(0, np.nan).bfill() / 100

pd.options.display.max_columns = None
df_scenario.head()

In [None]:
df_target = df_bdf_monthly[['Date', 'Dépôts à vue des SNF résidentes, encours', 'Dépôts à terme des SNF résidentes, encours']]
df_target = df_target.merge(df_bdf_quarterly[["Date", "Actif Toutes dénominations monétaires des SNF vis-à-vis de toutes contreparties - encours d'OPC monétaires"]], on='Date', how='left')
df_target["Actif Toutes dénominations monétaires des SNF vis-à-vis de toutes contreparties - encours d'OPC monétaires"] = df_target["Actif Toutes dénominations monétaires des SNF vis-à-vis de toutes contreparties - encours d'OPC monétaires"].bfill() / 1e3

In [None]:
### SNF
model_dav_part_stable_SNF = load(f'models/dav_part_stable_SNF.joblib')
drivers = ['OAT_10Y_diff', 'EUSW10V3_diff', 'Inflation_diff', 'SNF_flux_credit'] + [f"month_{i}" for i in range(3, 13)]

df_dav_SNF_histo = df_bdf_monthly[['Date', "Crédits à l'investissement accordés aux sociétés non financières résidentes, encours", "Crédits de trésorerie accordés aux sociétés non financières résidentes, encours", "Dépôts à vue des SNF résidentes, encours"]]
df_dav_SNF_histo = df_dav_SNF_histo.merge(df_scenario, on='Date', how='left')
df_dav_SNF_histo['SNF_flux_credit'] = df_dav_SNF_histo[["Crédits à l'investissement accordés aux sociétés non financières résidentes, encours", "Crédits de trésorerie accordés aux sociétés non financières résidentes, encours"]].sum(axis=1).diff()
df_dav_SNF_histo['OAT_10Y_diff'] = df_dav_SNF_histo['OAT_10Y'].diff()
df_dav_SNF_histo['EUSW10V3_diff'] = df_dav_SNF_histo['EUSW10V3'].diff()
df_dav_SNF_histo['Inflation_diff'] = df_dav_SNF_histo['Inflation'].diff()
for i in range(1,13):
    df_dav_SNF_histo[f"month_{i}"] = (df_dav_SNF_histo['Date'].dt.month == i).astype(int)
df_dav_SNF_histo = df_dav_SNF_histo[df_dav_SNF_histo['Date'] >= "2013-12"]
df_dav_SNF_histo = df_dav_SNF_histo.sort_values('Date', ascending=True)

df_dav_SNF_histo['dav_part_stable_SNF_diff'] = model_dav_part_stable_SNF.predict(df_dav_SNF_histo[drivers].fillna(0))
dav_part_stable_init =  df_dav_SNF_histo[(df_dav_SNF_histo['Date'].dt.year==2013) & (df_dav_SNF_histo['Date'].dt.month==12)]['Dépôts à vue des SNF résidentes, encours'].values[0] 
df_dav_SNF_histo['dav_part_stable_SNF_histo'] = dav_part_stable_init + (df_dav_SNF_histo['dav_part_stable_SNF_diff'] * (df_dav_SNF_histo['Date'].dt.year > 2013)).cumsum()

df_target = df_target.merge(df_dav_SNF_histo[['Date', 'dav_part_stable_SNF_histo']], on='Date', how='left')


In [None]:
df_target['SNF_DAV_Vol'] = (df_target['Dépôts à vue des SNF résidentes, encours'] - df_target['dav_part_stable_SNF_histo']).fillna(0.)
df_target['SNF_DAV_Vol'] = df_target['SNF_DAV_Vol'].apply(lambda x: max(x, 0))
df_target['SNF_DAT'] = df_target['Dépôts à terme des SNF résidentes, encours']
df_target['SNF_OPC'] = df_target["Actif Toutes dénominations monétaires des SNF vis-à-vis de toutes contreparties - encours d'OPC monétaires"]

df_drivers = df_scenario.copy()

In [None]:
""" Arbitration model fit """

'''LOADING DATA SOURCES'''


# Loading file containing historical drivers used to fit models as well as the drivers used for projection 
df_drivers['Date'] = pd.to_datetime(df_drivers['Date'], format='%YM%m')

# Loading file containing historical targets (here. France deposits) used to fit models

# Building a common dataframe containing drivers and targets
df = df_drivers.merge(df_target, on='Date', how='left').replace('n.a.', np.nan)


'''Feature engineering'''

df['LA_minus_OAT'] = df['LA_rate'] - df['OAT_10Y'].rolling(window=5, min_periods=1).mean()
df['OAT_10Y_3YROLLING'] = df['OAT_10Y'].rolling(window=12*3, min_periods=1).mean()
df['OAT_10Y_7YROLLING'] = df['OAT_10Y'].rolling(window=12*7, min_periods=1).mean()
# df['Menages_DAV_Stable'] = df['Menages_DAV'] - df['Menages_DAV_Vol']
# df['Epargne_menages_ratio'] = df['Epargne_menages_courant'] / df['RDB_courant']


'''Selecting training range and prediction range'''

# Picking historical training range in years/date (bounds included)
# train_range = ('2003-03-01', '2022-09-01')
# backtest_range = ('2022-04-01', '2022-09-01')
# prediction_range = ('2022-06-01', '2027-12-01')


'''Selecting drivers used to make predictions as well as model types'''

target_to_drivers = {
    "SNF_DAV_Vol": ["Euribor_3M"],
    "SNF_DAT": ["OAT_10Y_3YROLLING", "Euribor_3M_6MROLLING"],
    "SNF_OPC": ["OAT_10Y_7YROLLING", "CAC40"]
}
reference = 'Euribor_3M'

models = {
    "SNF_DAV_Vol": DecisionTreeRegressor(max_depth=3, min_samples_leaf=5),
    "SNF_DAT": DecisionTreeRegressor(max_depth=3),
    "SNF_OPC": DecisionTreeRegressor(max_depth=3)
}


'''CALIBRAGE ET PREDICTION'''

df_train = df[(df['Date']<=train_range[1]) & (df['Date']>=train_range[0])]

global_shift = 0
df_train['total'] = df_train[target_to_drivers.keys()].sum(axis=1)
for (k,v) in target_to_drivers.items():
    df_train[k+'_ratio'] = df_train[k]/df_train['total']
    max_shift = 0
    for driver in v:
        if '_shifted_' in driver:
            shift_val = int(driver.split('_')[-1])
            df_train[driver] = df_train['_'.join(driver.split('_')[:-2])].shift(shift_val).fillna(0)
            max_shift = max(max_shift, shift_val)
            global_shift = max(global_shift, shift_val)
        elif '_variation' in driver:
            df_train[driver] = ((df_train['_'.join(driver.split('_')[:-1])] - df_train['_'.join(driver.split('_')[:-1])].shift(1)) / df_train['_'.join(driver.split('_')[:-1])].shift(1)).fillna(0.)
        elif '_referenced' in driver:
            df_train[driver] = (df_train['_'.join(driver.split('_')[:-1])] / df_train[reference]).fillna(0.)
        elif "_6MROLLING" in driver:
            df_train[driver] = df_train['_'.join(driver.split('_')[:-1])].rolling(6, min_periods=1).mean()
    df_train_shift = df_train[df_train['Date']>=df_train['Date'].min()+ DateOffset(months=max_shift)]
    # if k=="SNF_DAV_Vol":
    #     df_train_shift = df_train_shift[df_train_shift['Date']>="2013-04-01"]
    models[k] = models[k].fit(df_train_shift[v], df_train_shift[k+'_ratio'])
    model = models[k]
    df_train[k+'_ratio_predicted'] = model.predict(df_train[v])
    df_train[k+'_ratio_predicted_adjusted'] = df_train[k+'_ratio_predicted'].apply(lambda x: max(x,0))

standardization_factor = df_train[[x+'_ratio_predicted_adjusted' for x in target_to_drivers.keys()]].sum(axis=1)
for (k,v) in target_to_drivers.items():
    df_train[k+'_ratio_predicted_adjusted'] = df_train[k+'_ratio_predicted_adjusted'] / standardization_factor
    df_train[k+'_predicted'] = df_train[k+'_ratio_predicted_adjusted']*df_train['total']

df_plot = df_train.copy()
df_plot = df_plot[df_plot['Date']>=df_plot['Date'].min()+ DateOffset(months=global_shift)].set_index('Date')
for k in target_to_drivers.keys():
    fig, ax =plt.subplots(1,1)
    fig.set_figheight(4.5)
    fig.set_figwidth(15.2)
    
    if k=="Menages_DAV_Vol":
        figure = sns.lineplot(data=df_plot[df_plot.index>="2015-04-01"][[k+'_ratio', k+'_ratio_predicted']], ax=ax)
        figure.set_title(f"{k} ratio fit : train on [2015-04-01, {train_range[1]}]")
        ticks = pd.Series(df_plot[df_plot.index>="2015-04-01"].index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[df_plot.index>="2015-04-01"][[k, k+'_predicted']], ax=ax)
        figure.set_title(f"{k} : train on [2015-04-01, {train_range[1]}]")
        ticks = pd.Series(df_plot[df_plot.index>="2015-04-01"].index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        for driver in target_to_drivers[k]:
            df_plot[driver+'_normalized'] = (df_plot[driver] - df_plot[driver].min()) / (df_plot[driver].max() - df_plot[driver].min())

        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[df_plot.index>="2015-04-01"][[d+'_normalized' for d in target_to_drivers[k]]], ax=ax)
        figure.set_title(f"{k} : Drivers in [2015-04-01, {train_range[1]}]")
        ticks = pd.Series(df_plot[df_plot.index>="2015-04-01"].index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

    else:
        figure = sns.lineplot(data=df_plot[[k+'_ratio', k+'_ratio_predicted']], ax=ax)
        figure.set_title(f"{k} ratio fit : train on [{train_range[0]}, {train_range[1]}]")
        ticks = pd.Series(df_plot.index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[[k, k+'_predicted']], ax=ax)
        figure.set_title(f"{k} : train on [{train_range[0]}, {train_range[1]}]")
        ticks = pd.Series(df_plot.index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        for driver in target_to_drivers[k]:
            df_plot[driver+'_normalized'] = (df_plot[driver] - df_plot[driver].min()) / (df_plot[driver].max() - df_plot[driver].min())
        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[[d+'_normalized' for d in target_to_drivers[k]]], ax=ax)
        figure.set_title(f"{k} : Drivers in [{train_range[0]}, {train_range[1]}]")
        ticks = pd.Series(df_plot.index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

    fig, ax =plt.subplots(1,1)
    fig.set_figheight(5)
    fig.set_figwidth(16)

    if k=="Menages_DAV_Vol":
        tree.plot_tree(models[k], feature_names=[d[:12] for d in target_to_drivers[k]], ax=ax)
        fig.suptitle(f"{k} : tree representation, train on [2015-04-01, {train_range[1]}]", fontsize=20)
    else:  
        tree.plot_tree(models[k], feature_names=[d[:15] for d in target_to_drivers[k]], ax=ax)
        fig.suptitle(f"{k} : tree representation, train on [{train_range[0]}, {train_range[1]}]", fontsize=20)
    fig.show()



for k, v in models.items():
    joblib.dump(v, f"models/arbitrage_SNF/{k}.joblib")


## 2. Crédit investissement

In [None]:
# Data loading

df_y = df_bdf_monthly[["Date", "Crédits à l'investissement accordés aux sociétés non financières résidentes, encours"]]
df_x = pd.read_excel("Data_Scenario.xlsx")
df_x = df_x.merge(df_insee_quarterly, on='Date', how='left')
for col in df_insee_quarterly.columns:
    if col != 'Date':
        df_x[col] = df_x[col].bfill() / (1e6 * 3)

df = df_y.merge(df_x, on="Date", how="left")
df = df.sort_values('Date', ascending=True)

In [None]:
# Feature engineering

df.rename(columns={"Crédits à l'investissement accordés aux sociétés non financières résidentes, encours": "Crédit_invest"}, inplace=True)

df['OAT_10Y_minus_inflation'] = df['OAT_10Y'] - df['Inflation']
df['Crédit_invest_diff'] = df['Crédit_invest'].diff()

for i in range(1, 7):
    df[f'Crédit_invest_shift_{i}'] = df['Crédit_invest'].shift(i)


In [None]:
train_range = ("2001-01-01", "2018-06-01")
backtest_range = ("2018-07-01", "2019-12-01")

df_train = df[(df['Date']>=train_range[0]) & (df['Date']<=train_range[1])]
display(df_train.head())

df_backtest = df[(df['Date']>=backtest_range[0]) & (df['Date']<=backtest_range[1])]

In [None]:
amort_stock = 12 * 4
amort = 12 * 7

drivers = ["Investissement des entreprises non financières - Total - Volume aux prix de l'année précédente chaînés - Série CVS-CJO", 'OAT_10Y_minus_inflation']

timesteps = df_train.shape[0]
encours_init = df[df["Date"]<train_range[0]]['Crédit_invest'].values[-1]

stock_outstanding = [max(encours_init * (1 - i / amort_stock), 0) for i in range(1, timesteps+1)]
target_outstanding = df_train['Crédit_invest'].values

target_np = target_outstanding - stock_outstanding

np_amort_matrix_init = np.array([max(1 - i / amort, 0) for i in range(timesteps)])
np_amort_matrix = np.array([np_amort_matrix_init])
for i in range(1, timesteps):
    np_amort_matrix = np.concatenate([np_amort_matrix, np.roll(np_amort_matrix_init, i)[None, :]])
np_amort_matrix = np.triu(np_amort_matrix)

new_prod = np.linalg.solve(np_amort_matrix.T, target_np)

lr = LinearRegression().fit(df_train[drivers], new_prod)

from joblib import dump, load
dump(lr, 'models/_new/credit_investissement.joblib') 

In [None]:
predict_new_prods = lr.predict(df_train[drivers])
predict_new_prods = np.triu(np.ones((timesteps, timesteps))) * predict_new_prods[:, None]
predict_new_prods = predict_new_prods * np_amort_matrix

predict_outstanding = np.sum(predict_new_prods, axis=0) + stock_outstanding

df_train['Crédit_invest_predict'] = predict_outstanding

fig, ax = plt.subplots(1,1)
fig.set_figheight(4.5)
fig.set_figwidth(15.2)
figure = sns.lineplot(data=df_train.set_index("Date")[["Crédit_invest", "Crédit_invest_predict"]], ax=ax)
figure.set_title(f"fit : train on [{train_range[0]}, {train_range[1]}]")
mape = sk.metrics.mean_absolute_percentage_error(df_train['Crédit_invest'], df_train['Crédit_invest_predict'])
print(f"fit : MAPE of {round(100*mape, 2)}%")
fig.show()

In [None]:
timesteps_total = timesteps + df_backtest.shape[0]

past_new_prods = new_prod
predict_new_prods = lr.predict(df_backtest[drivers])
total_new_prods = np.concatenate([past_new_prods, predict_new_prods])
total_new_prods = np.triu(np.ones((timesteps_total, timesteps_total))) * total_new_prods[:, None]

np_amort_matrix_init = np.array([max(1 - i / amort, 0) for i in range(timesteps_total)])
np_amort_matrix = np.array([np_amort_matrix_init])
for i in range(1, timesteps_total):
    np_amort_matrix = np.concatenate([np_amort_matrix, np.roll(np_amort_matrix_init, i)[None, :]])
np_amort_matrix = np.triu(np_amort_matrix)

total_new_prods = total_new_prods * np_amort_matrix
stock_outstanding = [max(encours_init * (1 - i / amort_stock), 0) for i in range(1, timesteps_total+1)]

total_outstanding = np.sum(total_new_prods, axis=0) + stock_outstanding

df_backtest['Crédit_invest_predict'] = total_outstanding[timesteps:]

fig, ax = plt.subplots(1,1)
fig.set_figheight(4.5)
fig.set_figwidth(15.2)
figure = sns.lineplot(data=df_backtest.set_index("Date")[["Crédit_invest", "Crédit_invest_predict"]], ax=ax)
figure.set_title(f"validation : backtest on [{backtest_range[0]}, {backtest_range[1]}]")
mape = sk.metrics.mean_absolute_percentage_error(df_backtest['Crédit_invest'], df_backtest['Crédit_invest_predict'])
print(f"fit : MAPE of {round(100*mape, 2)}%")
fig.show()

## 2. Crédit trésorerie

In [None]:
# Data loading

df_y = df_bdf_monthly[["Date", "Crédits de trésorerie accordés aux sociétés non financières résidentes, encours"]]
df_x = pd.read_excel("Data_Scenario.xlsx")
df_x = df_x.merge(df_insee_quarterly, on='Date', how='left')
for col in df_insee_quarterly.columns:
    if col != 'Date':
        df_x[col] = df_x[col].bfill() / (1e6 * 3)

df = df_y.merge(df_x, on="Date", how="left")
df = df.sort_values('Date', ascending=True)

In [None]:
# Feature engineering

df.rename(columns={"Crédits de trésorerie accordés aux sociétés non financières résidentes, encours": "Crédit_treso"}, inplace=True)

In [None]:
train_range = ("2001-01-01", "2018-06-01")
backtest_range = ("2018-07-01", "2019-12-01")

df_train = df[(df['Date']>=train_range[0]) & (df['Date']<=train_range[1])]
display(df_train.head())

df_backtest = df[(df['Date']>=backtest_range[0]) & (df['Date']<=backtest_range[1])]

In [None]:
amort_stock = 12 * 4
amort = 12 * 7

drivers = ["Investissement des entreprises non financières - Total - Valeur aux prix courants - Série CVS-CJO", 'OAT_10Y']

lr = LinearRegression().fit(df_train[drivers], df_train["Crédit_treso"])

from joblib import dump, load
dump(lr, 'models/_new/credit_tresorerie.joblib') 

In [None]:
df_train['Crédit_treso_predict'] = lr.predict(df_train[drivers])

fig, ax = plt.subplots(1,1)
fig.set_figheight(4.5)
fig.set_figwidth(15.2)
figure = sns.lineplot(data=df_train.set_index("Date")[["Crédit_treso", "Crédit_treso_predict"]], ax=ax)
figure.set_title(f"fit : train on [{train_range[0]}, {train_range[1]}]")
mape = sk.metrics.mean_absolute_percentage_error(df_train['Crédit_treso'], df_train['Crédit_treso_predict'])
print(f"fit : MAPE of {round(100*mape, 2)}%")
fig.show()

In [None]:
df_backtest['Crédit_treso_predict'] = lr.predict(df_backtest[drivers])

fig, ax = plt.subplots(1,1)
fig.set_figheight(4.5)
fig.set_figwidth(15.2)
figure = sns.lineplot(data=df_backtest.set_index("Date")[["Crédit_treso", "Crédit_treso_predict"]], ax=ax)
figure.set_title(f"validation : backtest on [{backtest_range[0]}, {backtest_range[1]}]")
mape = sk.metrics.mean_absolute_percentage_error(df_backtest['Crédit_treso'], df_backtest['Crédit_treso_predict'])
print(f"fit : MAPE of {round(100*mape, 2)}%")
fig.show()

# II. Menages

## 1. Arbitrage

In [None]:
df_target = df_bdf_monthly.copy()
df_target = df_target.merge(df_bdf_quarterly, on='Date', how='left')
df_target = df_target.sort_values('Date', ascending=True)

In [None]:
""" Fitting DAV PART STABLE model """

df_scenario['RDB_courant_1Yrolling'] = df_scenario['RDB_courant'].rolling(12).mean().bfill()

start = pd.to_datetime('2003-12-01')
end = pd.to_datetime('2015-05-01')
x_train = df_scenario[(df_scenario['Date'] >= start) & (df_scenario['Date'] <= end)].sort_values('Date', ascending=True)['RDB_courant_1Yrolling'].values
y_train = df_bdf_monthly[(df_bdf_monthly['Date'] >= start) & (df_bdf_monthly['Date'] <= end)].sort_values('Date', ascending=True)['Dépôts à vue des ménages et ISBLSM résidents, encours'].values

model_dav_part_stable = LinearRegression()
model_dav_part_stable.fit(x_train.reshape(-1, 1), y_train)

print(model_dav_part_stable.intercept_, model_dav_part_stable.coef_)

In [None]:
### Ménages
model_dav_part_stable = load(f'models/dav_part_stable.joblib')

print(model_dav_part_stable.intercept_, model_dav_part_stable.coef_) #Please note coefficient might slightly change due to BdF data inconsistency over time

df_scenario['RDB_courant_1Yrolling'] = df_scenario['RDB_courant'].rolling(12).mean().bfill()
df_scenario['dav_part_stable'] = model_dav_part_stable.predict(df_scenario[['RDB_courant_1Yrolling']])

df_target = df_target.merge(df_scenario[['Date', 'dav_part_stable']], on='Date', how='left')

In [None]:
df_target["__Livrets A des ménages et ISBLSM résidents, encours"] = df_target["Livrets A des ménages et ISBLSM résidents, encours"].bfill()
df_target["__Livrets A des ménages et ISBLSM résidents, encours"] = df_target["__Livrets A des ménages et ISBLSM résidents, encours"]
df_target["__coeff"] = df_target["Livrets de développement durable et solidaire des ménages résidents, encours"].shift(1) / df_target["Livrets de développement durable et solidaire des ménages résidents, encours"]
df_target["__coeff"] = ~df_target["Livrets A des ménages et ISBLSM résidents, encours"].isnull() + df_target["Livrets A des ménages et ISBLSM résidents, encours"].isnull() * df_target["__coeff"]
df_target["__coeff"] = df_target['__coeff'][::-1].cumprod()[::-1]
df_target['Livrets A des ménages et ISBLSM résidents, encours'] = df_target['__Livrets A des ménages et ISBLSM résidents, encours'] * df_target['__coeff']

In [None]:

df_target['Menages_DAV'] = df_target["Dépôts à vue des ménages et ISBLSM résidents, encours"]
df_target['DAV_STABLE'] = df_target['dav_part_stable']
df_target['Menages_DAV_Vol'] = df_target["Dépôts à vue des ménages et ISBLSM résidents, encours"] - df_target['DAV_STABLE']
df_target['Menages_DAT'] = df_target['Dépôts à terme des ménages et ISBLSM résidents, encours'] - df_target['PEL des ménages résidents, encours']
df_target['Menages_PEL'] = df_target['PEL des ménages résidents, encours']
df_target['Menages_Livrets'] = df_target["Livrets d'épargne des ménages et ISBLSM résidents, encours"] - (df_target["Livrets A des ménages et ISBLSM résidents, encours"] + df_target["Livrets de développement durable et solidaire des ménages résidents, encours"])
df_target['Menages_LA_LDD'] = df_target["Livrets A des ménages et ISBLSM résidents, encours"] + df_target["Livrets de développement durable et solidaire des ménages résidents, encours"]
df_target['Autres'] = (df_target['Actif Toutes dénominations monétaires des ménages vis-à-vis de toutes contreparties - encours de monnaie et dépôts']) / (3 * 1e3) - (df_target['Dépôts à vue des ménages et ISBLSM résidents, encours']+ df_target['Dépôts à terme des ménages et ISBLSM résidents, encours'] + df_target["Livrets d'épargne des ménages et ISBLSM résidents, encours"])
df_target['Menages_AV_Euro_Corrige'] = df_target['Assurance-vie support euro, actif des ménages, encours trimestriel'].bfill()
df_target['Menages_AV_UC'] = df_target['Assurance-vie en unités de compte, actif des ménages, encours trimestriel'].bfill()

df_drivers = df_scenario.copy()

In [None]:
df_train[['Date', "Livrets de développement durable et solidaire des ménages résidents, encours"]]

In [None]:
df_target['Assurance-vie en unités de compte, actif des ménages, encours trimestriel']

In [None]:
df_drivers

In [None]:
df_train.isnull().mean()

In [None]:
df_train[['Date', 'Menages_AV_Euro_Corrige_ratio']]

In [None]:
""" Arbitration model fit """

'''LOADING DATA SOURCES'''

scenario = 'Central' # Stagflation # Adverse

# Building a common dataframe containing drivers and targets
df = df_drivers.merge(df_target, on='Date', how='left').replace('n.a.', np.nan)


'''Feature engineering'''

df['LA_minus_OAT'] = df['LA_rate'] - df['OAT_10Y'].rolling(window=5, min_periods=1).mean()
df['OAT_10Y_3YROLLING'] = df['OAT_10Y'].rolling(window=12*3, min_periods=1).mean()
df['Menages_DAV_Stable'] = df['Menages_DAV'] - df['Menages_DAV_Vol']
df = df.merge(df_insee_quarterly[['Date', 'Dépenses de consommation des ménages - Total - Valeur aux prix courants - Série CVS-CJO']], on='Date', how='left')
df['Consommation_menages_courant'] = df['Dépenses de consommation des ménages - Total - Valeur aux prix courants - Série CVS-CJO'].bfill().ffill() / (1e6 * 3)
df['Epargne_menages_courant'] = df['RDB_courant'] - df['Consommation_menages_courant']
df['Epargne_menages_ratio'] = df['Epargne_menages_courant'] / df['RDB_courant']


'''Selecting training range and prediction range'''

# Picking historical training range in years/date (bounds included)
train_range = ('2003-03-01', '2022-09-01')
backtest_range = ('2022-04-01', '2022-09-01')
prediction_range = ('2022-06-01', '2027-12-01')


'''Selecting drivers used to make predictions as well as model types'''

target_to_drivers = {
    "Menages_AV_Euro_Corrige": ["Euribor_3M_6MROLLING", "OAT_10Y_3YROLLING"],
    "Menages_AV_UC": ["Epargne_menages_ratio_6MROLLING", "CAC40_6MROLLING"],
    "Menages_DAT": ["Epargne_menages_ratio_6MROLLING"],
    "Menages_DAV_Vol": ['Euribor_3M_6MROLLING'], 
    "Menages_LA_LDD": ["LA_rate", 'Inflation'],
    "Menages_Livrets": ["Inflation", "LA_rate"],
    "Menages_PEL": ["Epargne_menages_ratio_6MROLLING", "LA_rate_6MROLLING"]
}
reference = 'Euribor_3M'

models = {
    "Menages_AV_Euro_Corrige": DecisionTreeRegressor(max_depth=3),
    "Menages_AV_UC": DecisionTreeRegressor(max_depth=3),
    "Menages_DAT": DecisionTreeRegressor(max_depth=3),
    "Menages_DAV_Vol": DecisionTreeRegressor(max_depth=3, min_samples_leaf=5),
    "Menages_LA_LDD": DecisionTreeRegressor(max_depth=3),
    "Menages_Livrets": DecisionTreeRegressor(max_depth=3),
    "Menages_PEL": DecisionTreeRegressor(max_depth=3)
}


'''CALIBRAGE ET PREDICTION'''

df_train = df[(df['Date']<=train_range[1]) & (df['Date']>=train_range[0])]

global_shift = 0
df_train['total'] = df_train[target_to_drivers.keys()].sum(axis=1)
for (k,v) in target_to_drivers.items():
    df_train[k+'_ratio'] = df_train[k]/df_train['total']
    max_shift = 0
    for driver in v:
        if '_shifted_' in driver:
            shift_val = int(driver.split('_')[-1])
            df_train[driver] = df_train['_'.join(driver.split('_')[:-2])].shift(shift_val).fillna(0)
            max_shift = max(max_shift, shift_val)
            global_shift = max(global_shift, shift_val)
        elif '_variation' in driver:
            df_train[driver] = ((df_train['_'.join(driver.split('_')[:-1])] - df_train['_'.join(driver.split('_')[:-1])].shift(1)) / df_train['_'.join(driver.split('_')[:-1])].shift(1)).fillna(0.)
        elif '_referenced' in driver:
            df_train[driver] = (df_train['_'.join(driver.split('_')[:-1])] / df_train[reference]).fillna(0.)
        elif "_6MROLLING" in driver:
            df_train[driver] = df_train['_'.join(driver.split('_')[:-1])].rolling(6, min_periods=1).mean()
    df_train_shift = df_train[df_train['Date']>=df_train['Date'].min()+ DateOffset(months=max_shift)]
    if k=="Menages_DAV_Vol":
        df_train_shift = df_train_shift[df_train_shift['Date']>="2015-04-01"]
    models[k] = models[k].fit(df_train_shift[v], df_train_shift[k+'_ratio'])
    model = models[k]
    df_train[k+'_ratio_predicted'] = model.predict(df_train[v])
    df_train[k+'_ratio_predicted_adjusted'] = df_train[k+'_ratio_predicted'].apply(lambda x: max(x,0))

standardization_factor = df_train[[x+'_ratio_predicted_adjusted' for x in target_to_drivers.keys()]].sum(axis=1)
for (k,v) in target_to_drivers.items():
    df_train[k+'_ratio_predicted_adjusted'] = df_train[k+'_ratio_predicted_adjusted'] / standardization_factor
    df_train[k+'_predicted'] = df_train[k+'_ratio_predicted_adjusted']*df_train['total']

df_plot = df_train.copy()
df_plot = df_plot[df_plot['Date']>=df_plot['Date'].min()+ DateOffset(months=global_shift)].set_index('Date')
for k in target_to_drivers.keys():
    fig, ax =plt.subplots(1,1)
    fig.set_figheight(4.5)
    fig.set_figwidth(15.2)
    
    if k=="Menages_DAV_Vol":
        figure = sns.lineplot(data=df_plot[df_plot.index>="2015-04-01"][[k+'_ratio', k+'_ratio_predicted']], ax=ax)
        figure.set_title(f"{k} ratio fit : train on [2015-04-01, {train_range[1]}]")
        ticks = pd.Series(df_plot[df_plot.index>="2015-04-01"].index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[df_plot.index>="2015-04-01"][[k, k+'_predicted']], ax=ax)
        figure.set_title(f"{k} : train on [2015-04-01, {train_range[1]}]")
        ticks = pd.Series(df_plot[df_plot.index>="2015-04-01"].index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        for driver in target_to_drivers[k]:
            df_plot[driver+'_normalized'] = (df_plot[driver] - df_plot[driver].min()) / (df_plot[driver].max() - df_plot[driver].min())

        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[df_plot.index>="2015-04-01"][[d+'_normalized' for d in target_to_drivers[k]]], ax=ax)
        figure.set_title(f"{k} : Drivers in [2015-04-01, {train_range[1]}]")
        ticks = pd.Series(df_plot[df_plot.index>="2015-04-01"].index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

    else:
        figure = sns.lineplot(data=df_plot[[k+'_ratio', k+'_ratio_predicted']], ax=ax)
        figure.set_title(f"{k} ratio fit : train on [{train_range[0]}, {train_range[1]}]")
        ticks = pd.Series(df_plot.index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[[k, k+'_predicted']], ax=ax)
        figure.set_title(f"{k} : train on [{train_range[0]}, {train_range[1]}]")
        ticks = pd.Series(df_plot.index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

        for driver in target_to_drivers[k]:
            df_plot[driver+'_normalized'] = (df_plot[driver] - df_plot[driver].min()) / (df_plot[driver].max() - df_plot[driver].min())
        fig, ax =plt.subplots(1,1)
        fig.set_figheight(4.5)
        fig.set_figwidth(15.2)
        figure = sns.lineplot(data=df_plot[[d+'_normalized' for d in target_to_drivers[k]]], ax=ax)
        figure.set_title(f"{k} : Drivers in [{train_range[0]}, {train_range[1]}]")
        ticks = pd.Series(df_plot.index).dt.strftime('%Y-%m')
        ticks = [x for x in ticks if x[-2:] in ['06', '12']]
        figure.set_xticks(ticks)
        plt.xticks(rotation=90)
        fig.show()

    fig, ax =plt.subplots(1,1)
    fig.set_figheight(5)
    fig.set_figwidth(16)

    if k=="Menages_DAV_Vol":
        tree.plot_tree(models[k], feature_names=[d[:12] for d in target_to_drivers[k]], ax=ax)
        fig.suptitle(f"{k} : tree representation, train on [2015-04-01, {train_range[1]}]", fontsize=20)
    else:  
        tree.plot_tree(models[k], feature_names=[d[:15] for d in target_to_drivers[k]], ax=ax)
        fig.suptitle(f"{k} : tree representation, train on [{train_range[0]}, {train_range[1]}]", fontsize=20)
    fig.show()




