# Ressources

In [1]:
import pandas as pd
from sklearn.metrics import make_scorer, confusion_matrix
import pickle
import numpy as np
from imblearn import pipeline
import shap

# API avec Dash (en lien avec plotly express)

# https://plotly.com/dash/app-manager/
# https://medium.com/@olegkomarov_77860/how-to-embed-a-dash-app-into-an-existing-flask-app-ea05d7a2210b
# https://towardsdatascience.com/web-visualization-with-plotly-and-flask-3660abf9c946
# https://vikinganalytics.github.io/daeploy-docs/1.0.1/content/special_usecases/ui_plotly_dash.html#plotly-dash-application-as-part-of-a-sdk-based-application



from dash import Dash, dcc, html, Input, Output
import plotly.express as px

# Data

In [84]:
df_api = pd.read_csv('api/data_api.csv')

# On modifie la data qui servira à l'API

In [85]:
df_api.drop(['TARGET'], axis=1, inplace=True)
df_api.set_index('SK_ID_CURR', inplace=True)
df_api = df_api.iloc[:100] # on ne prend qu'un échantillon pour l'exemple
df_api = df_api.to_dict('index')

df_api

{100002: {'PAYMENT_RATE': 0.0607492667810303,
  'EXT_SOURCE_1': 0.0830369673913225,
  'EXT_SOURCE_3': 0.1393757800997895,
  'EXT_SOURCE_2': 0.2629485927471776,
  'DAYS_BIRTH': 9461,
  'AMT_ANNUITY': 24700.5,
  'DAYS_EMPLOYED': -637.0,
  'APPROVED_CNT_PAYMENT_MEAN': 24.0,
  'DAYS_ID_PUBLISH': -2120,
  'INCOME_CREDIT_PERC': 0.1219777777777777,
  'ACTIVE_DAYS_CREDIT_MAX': -103.0,
  'INSTAL_DAYS_ENTRY_PAYMENT_MAX': -49.0,
  'INSTAL_DPD_MEAN': 0.0,
  'DAYS_REGISTRATION': -3648.0,
  'DAYS_EMPLOYED_PERC': 0.0673290349857309,
  'ACTIVE_DAYS_CREDIT_ENDDATE_MIN': 780.0,
  'AMT_CREDIT': 406597.5,
  'PREV_CNT_PAYMENT_MEAN': 24.0,
  'AMT_GOODS_PRICE': 351000.0,
  'INSTAL_AMT_PAYMENT_SUM': 219625.695,
  'REGION_POPULATION_RELATIVE': 0.018801,
  'INSTAL_DBD_SUM': 388.0,
  'DAYS_LAST_PHONE_CHANGE': -1134.0,
  'BURO_AMT_CREDIT_MAX_OVERDUE_MEAN': 1681.029,
  'CLOSED_DAYS_CREDIT_MAX': -476.0,
  'OWN_CAR_AGE': nan,
  'CLOSED_DAYS_CREDIT_ENDDATE_MAX': 85.0,
  'APPROVED_DAYS_DECISION_MAX': -606.0,
  'POS_MO

In [86]:
with open('api/backend/data/data.pkl', 'wb') as f:
    pickle.dump(df_api, f)

# Pipeline

In [87]:
with open('api/backend/data/data.pkl', 'rb') as f:
    fichier = pickle.load(f)
    
df_data = pd.DataFrame(pd.read_pickle('api/backend/data.pkl')).transpose()
# ---------------------- Pipeline

def cout_metier(y_test, pred_test_y):
    poids_tn = 1 # on maximise le nombre de personnes pouvant rembourser son prêt
    poids_fp = 0
    poids_fn = -10 # on veut à tout prix éviter les personnes ne remboursant pas le prêt que l'algorithme n'arrive pas à détecter
    poids_tp = 0
    conf_mat = confusion_matrix(y_test, pred_test_y)
    tn, fp, fn, tp = conf_mat.ravel()
    total = tn+fp+fn+tp
    
    return (tn*poids_tn + fp*poids_fp + fn*poids_fn + tp * poids_tp)/total


metric_custom = make_scorer(cout_metier)




url_pipeline = 'pipeline/pipeline_perso_random_iter50.pkl'
pipe = pickle.load(open(url_pipeline, 'rb'))

# Test Shap

In [88]:
import requests
import json

def collect_donnees(lien):
    # http://127.0.01:5000/ is from the flask api
    response = requests.get(lien)

    data_table = pd.DataFrame.from_dict(response.json(), orient="index")
    data_table = data_table.transpose()
    return data_table

In [89]:
# response = requests.get(f'https://kevin-oc-api.herokuapp.com/client/all/').json()
# response = np.asarray(json.loads(response))
# response

In [90]:
explainer = pickle.load(open('api/frontend/shap_model/explainer.pkl', 'rb'))
shap_values = pickle.load(open('api/frontend/shap_model/values.pkl', 'rb'))
expected_values = pickle.load(open('api/frontend/shap_model/expected_values.pkl', 'rb'))
shap_value_decision0 = pickle.load(open('api/frontend/shap_model/decision_plot0.pkl', 'rb'))
shap_value_decision1 = pickle.load(open('api/frontend/shap_model/decision_plot1.pkl', 'rb'))

In [91]:
# shap.initjs()
# df_shap = collect_donnees('https://kevin-oc-api.herokuapp.com/client/100002')
# df_shap.drop(['PROBABILITY'], axis=1, inplace=True)
# df_shap

In [92]:
# data = collect_donnees('https://kevin-oc-api.herokuapp.com/alldata/?remboursement=false').transpose()

In [93]:
def isNan(num):
    return num != num

In [94]:
metric_custom = make_scorer(cout_metier)

url_pipeline = './pipeline/pipeline_perso_random_iter50.pkl'
pipe = pickle.load(open(url_pipeline, 'rb'))
# pipe = pickle.load(open(url_pipeline, 'rb'))

# pipe.best_estimator_[0].transform utilisation d'iterativeimputer

def predict(dict):
    # si on a déjà predict, la valeur est retenue, donc :
    if len(dict) >= 41:
        del dict['PROBABILITY']
        
    df = pd.DataFrame.from_dict(dict, orient="index")
    df = df.transpose()
    df = df.replace('Indisp', None)
    proba = pipe.predict_proba(df)[:,1] # Probabilité de remboursement
    proba = round(np.array(proba)[0],2)
    return proba

In [12]:
def positive_nombre(x):
    if type(x) == int or type(x) == float:
        if x < 0:
            x = x * (-1) 
    return x

def collect_donnees(lien, shap:bool=False, client_connu:bool=True):

    if client_connu:
        response = requests.get(lien)
        data_table = pd.DataFrame.from_dict(response.json(), orient="index")
    else:
        data_table = pd.DataFrame.from_dict(lien, orient="index")

    
    data_table = data_table.transpose()
    
    # on transforme tous les nombres négatifs dans la data de départ en positif ... Ce sera plus lisible pour les graphiques.
    
    for col in ['DAYS_BIRTH', 'DAYS_ID_PUBLISH', 'DAYS_EMPLOYED']:
        try:
            data_table[col] = data_table[col].apply(positive_nombre)
        except KeyError:
            data_table = data_table.transpose()
            data_table[col] = data_table[col].apply(positive_nombre)
        
    if shap:
        data_table.drop(['PROBABILITY'], axis=1, inplace=True)
        
    return data_table

In [13]:
import requests
# data_avg_rembourse = collect_donnees('https://kevin-oc-api.herokuapp.com/client/client_avg/?remboursement=true')
# data_avg_no_rembourse = collect_donnees('https://kevin-oc-api.herokuapp.com/client/client_avg/?remboursement=false')
data_all_rembourse = collect_donnees('https://kevin-oc-api.herokuapp.com/alldata/?remboursement=true')
data_all_no_rembourse = collect_donnees('https://kevin-oc-api.herokuapp.com/alldata/?remboursement=false')

In [14]:
data_all_rembourse

Unnamed: 0,PAYMENT_RATE,EXT_SOURCE_1,EXT_SOURCE_3,EXT_SOURCE_2,DAYS_BIRTH,AMT_ANNUITY,DAYS_EMPLOYED,APPROVED_CNT_PAYMENT_MEAN,DAYS_ID_PUBLISH,INCOME_CREDIT_PERC,...,BURO_DAYS_CREDIT_MAX,BURO_DAYS_CREDIT_ENDDATE_MAX,INSTAL_AMT_PAYMENT_MIN,ACTIVE_DAYS_CREDIT_ENDDATE_MAX,ACTIVE_DAYS_CREDIT_MEAN,INSTAL_DBD_MAX,CLOSED_AMT_CREDIT_SUM_MEAN,BURO_AMT_CREDIT_SUM_DEBT_MEAN,ACTIVE_DAYS_CREDIT_ENDDATE_MEAN,PROBABILITY
100003,0.027598,0.311267,Indisp,0.622246,16765,35698.5,1188.0,10.0,291,0.132217,...,-606.0,1216.0,6662.97,1216.0,-606.0,14.0,69133.5,0.0,1216.0,0.31
100004,0.05,Indisp,0.729567,0.555912,19046,6750.0,225.0,4.0,2531,0.1,...,-408.0,-382.0,5357.25,Indisp,Indisp,11.0,94518.9,0.0,Indisp,0.23
100006,0.094941,Indisp,Indisp,0.650442,19005,29686.5,3039.0,18.0,2437,0.2199,...,Indisp,Indisp,2482.92,Indisp,Indisp,77.0,Indisp,Indisp,Indisp,0.31
100007,0.042623,Indisp,Indisp,0.322738,19932,21865.5,3038.0,20.666667,3458,0.179963,...,-1149.0,-783.0,0.18,Indisp,Indisp,31.0,146250.0,0.0,Indisp,0.41
100008,0.056101,Indisp,0.621226,0.354225,16941,27517.5,1588.0,14.0,477,0.277955,...,-78.0,471.0,227.16,471.0,-78.0,28.0,100419.75,80019.0,471.0,0.29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100110,0.067173,Indisp,0.328063,0.640549,10744,25065.0,741.0,10.5,2381,0.185667,...,-86.0,218.0,8232.3,218.0,-86.0,29.0,Indisp,52820.595,218.0,0.47
100111,0.032408,0.563323,0.228883,0.608604,10485,27954.0,1249.0,11.2,3110,0.24848,...,-270.0,9233.0,1897.65,98.0,-325.333333,85.0,242554.635,13009.03875,98.0,0.36
100113,0.0496,0.475762,0.78988,0.593019,15137,6696.0,7980.0,9.0,4576,0.087529,...,-753.0,1073.0,6.615,1073.0,-776.5,40.0,43722.0,133789.5,1049.5,0.22
100116,0.035575,0.671272,0.071055,0.700274,14751,32017.5,6737.0,11.2,4788,0.131759,...,-104.0,31066.0,9.81,31066.0,-662.875,95.0,352153.525,142965.518824,4146.75,0.46
