In [2]:
import pandas as pd
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go

from lime.lime_tabular import LimeTabularExplainer

from lime.lime_tabular import LimeTabularExplainer
from sklearn.neighbors import NearestNeighbors

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler 
from sklearn.impute import SimpleImputer

In [3]:
data = pd.read_csv('df_feature_importance.csv', index_col=0)
data.head()

Unnamed: 0_level_0,adaboost_proba,adaboost_result,DAYS_BIRTH,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_EMPLOYED,REGION_POPULATION_RELATIVE,FLAG_EMAIL,HOUR_APPR_PROCESS_START,FLAG_DOCUMENT_6,...,weekday_process_SATURDAY,type_suite_Family,flag_realty_Y,flag_car_Y,anciennete_pro/age,credit/revenu,credit/revenu/age,EXT_SOURCE_2/credit,EXT_SOURCE_2/age,nb_enf/age
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100001,0.491635,0,-19241,-812,-1740.0,-2329,0.01885,1,18,0,...,0,0,1,0,0.121044,4.213333,-0.000219,1.388281e-06,-4.1e-05,-0.0
100005,0.489395,0,-18064,-1623,0.0,-4469,0.035792,0,9,0,...,0,0,1,0,0.247398,2.250182,-0.000125,1.309234e-06,-1.6e-05,-0.0
100013,0.492283,0,-20038,-3503,-856.0,-4458,0.019101,0,14,0,...,0,0,1,1,0.222477,3.275378,-0.000163,1.055065e-06,-3.5e-05,-0.0
100028,0.492573,0,-13976,-4208,-1805.0,-1866,0.026392,0,11,0,...,0,0,1,0,0.133515,5.0,-0.000358,3.236045e-07,-3.6e-05,-0.000143
100038,0.494876,0,-13040,-4262,-821.0,-2191,0.010032,0,5,0,...,0,0,0,1,0.168021,3.475,-0.000266,6.805552e-07,-3.3e-05,-7.7e-05


In [4]:
data['Solvable'] = data.adaboost_proba
data['Non Solvable']= 1-data.adaboost_proba


In [5]:
# Interprétabilité du modèle
lime1 = LimeTabularExplainer(data,
                             feature_names=data.columns,
                             class_names=["Solvable", "Non Solvable"],
                             discretize_continuous=False)

In [6]:
data.loc[100001]

adaboost_proba                0.491635
adaboost_result               0.000000
DAYS_BIRTH               -19241.000000
DAYS_ID_PUBLISH            -812.000000
DAYS_LAST_PHONE_CHANGE    -1740.000000
                              ...     
EXT_SOURCE_2/credit           0.000001
EXT_SOURCE_2/age             -0.000041
nb_enf/age                   -0.000000
Solvable                      0.491635
Non Solvable                  0.508365
Name: 100001, Length: 68, dtype: float64

In [7]:
# Calcul des 20 plus proches voisins
nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(data)
nbrs.kneighbors(np.array(data.loc[100001]).reshape(1, -1))[1].flatten()

array([    0,  8412, 47754, 33619,  4648, 11162, 40989, 25074, 34105,
        9747,  3124, 45976, 26077, 11944,  4563, 37017, 10643, 40546,
       38838, 11206], dtype=int64)

In [8]:
num_columns = data.select_dtypes(include=["float64"]).columns

In [9]:
data.reset_index().columns

Index(['SK_ID_CURR', 'adaboost_proba', 'adaboost_result', 'DAYS_BIRTH',
       'DAYS_ID_PUBLISH', 'DAYS_LAST_PHONE_CHANGE', 'DAYS_EMPLOYED',
       'REGION_POPULATION_RELATIVE', 'FLAG_EMAIL', 'HOUR_APPR_PROCESS_START',
       'FLAG_DOCUMENT_6', 'FLAG_PHONE', 'AMT_CREDIT', 'DAYS_REGISTRATION',
       'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_16', 'AMT_INCOME_TOTAL',
       'EXT_SOURCE_2', 'REGION_RATING_CLIENT', 'REG_CITY_NOT_WORK_CITY',
       'REG_CITY_NOT_LIVE_CITY', 'FLAG_DOCUMENT_3', 'DEF_30_CNT_SOCIAL_CIRCLE',
       'LIVE_CITY_NOT_WORK_CITY', 'FLAG_WORK_PHONE', 'CNT_CHILDREN',
       'OBS_30_CNT_SOCIAL_CIRCLE', 'sex_M', 'educ_level_Lower secondary',
       'educ_level_Secondary / secondary special', 'income_type_Working',
       'orga_type_Business Entity Type 3', 'orga_type_Construction',
       'orga_type_Industry: type 3', 'orga_type_Restaurant',
       'orga_type_Trade: type 3', 'orga_type_Trade: type 7',
       'orga_type_Transport: type 3', 'name_contract_Cash loans',
       'hous

In [10]:
import dash_table
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    
    dcc.Tabs([
        # Premier onglet: Solvability Client
        dcc.Tab(label='Solvability Client', children=[
            # Permet de séléctionner dans une liste déroulante le numéro du client
            html.Div([
                html.H3("Id Client"),
                dcc.Dropdown(
                id='id-client',
                options=[{'label': i, 'value': i} for i in data.index],
                value=data.index[0]
                ),
            ]),
            html.Div([
                # Affiche la probabilité de solvabilité d'un client
                # sous forme de pie plot
                html.Div([
                    html.H3("Probability of Solvability Client"),
                    dcc.Graph(id='proba',
                              figure={},
                              style={"height": 500,
                                     "width": 500}
                             ),
                ], className='six columns'),
                # Affiche pour l'id client séléctionné
                # l'importance des features qui ont eu le plus d'impacte
                # sur la solvabilité d'un client ou non
                html.Div([
                    html.H3("Feature Importances"), 
                    dcc.Graph(id='graph',
                              figure={},
                              style={"height":500,
                                     "width":800}
                             ),       
                ], className='six columns'),        
            ], className="row"),
            # Affiche un tableau contenant les informations relatives
            # au client séléctionné ainsi que les clients similaires
            html.Div([
                html.H3("Similary Clients"),
                dash_table.DataTable(
                    id='table',
                    columns=[
                       {"name": i, "id": i} for i in data.reset_index().columns
                    ],
                    filter_action='custom',
                    filter_query='',
                    fixed_rows={'headers': True, 'data': 0 },
                    style_cell={'width': '200px'},
                    style_table={'minWidth': '80%'},
                    style_data_conditional=[
                        {
                            'if': {'row_index': 'odd'},
                            'backgroundColor': 'rgb(248, 248, 248)'
                        }
                    ],
                    style_header={
                                    'backgroundColor': 'rgb(230, 230, 230)',
                                    'fontWeight': 'bold'
                                }, 
                    virtualization=True,
                ), 
            ], className='row'),
                
        ]),
       
      
    dcc.Tab(label='Data exploration', children=[
           html.Div([
                html.Div([
                    dcc.Dropdown(
                        id='xaxis-column',
                        options=[{'label': i, 'value': i} for i in num_columns],
                        value='AMT_CREDIT'
                    ),
                    dcc.RadioItems(
                        id='xaxis-type',
                        options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],
                        value='Linear',
                        labelStyle={'display': 'inline-block'}
                    )
                ],
                style={'width': '48%', 'display': 'inline-block'}),

                html.Div([
                    dcc.Dropdown(
                        id='yaxis-column',
                        options=[{'label': i, 'value': i} for i in num_columns],
                        value='AMT_ANNUITY'
                    ),
                    dcc.RadioItems(
                        id='yaxis-type',
                        options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],
                        value='Linear',
                        labelStyle={'display': 'inline-block'}
                    )
                ],style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
            ]),

            dcc.Graph(id='indicator-graphic'),

        ]),
         ]),
     ])
# Création d'un système de filtre
# operators = [['ge ', '>='],
#              ['le ', '<='],
#              ['lt ', '<'],
#              ['gt ', '>'],
#              ['ne ', '!='],
#              ['eq ', '='],
#              ['contains '],
#              ['datestartswith ']]

# def split_filter_part(filter_part):
#     # Permet d'avoir un outil de filtrage des données
#     for operator_type in operators:
#         for operator in operator_type:
#             if operator in filter_part:
#                 name_part, value_part = filter_part.split(operator, 1)
#                 name = name_part[name_part.find('{') + 1: name_part.rfind('}')]

#                 value_part = value_part.strip()
#                 v0 = value_part[0]
#                 if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
#                     value = value_part[1: -1].replace('\\' + v0, v0)
#                 else:
#                     try:
#                         value = float(value_part)
#                     except ValueError:
#                         value = value_part

#                 # word operators need spaces after them in the filter string,
#                 # but we don't want these later
#                 return name, operator_type[0].strip(), value

#     return [None] * 3


# # Met à jour le tableau de données
# # Le tableau correspond aux clients similaires de l'id client choisie
# @app.callback(
#     Output('table', 'data'),
#     [Input('table', "filter_query"),
#      Input('id-client', "value")])
# def update_table(filter, id_client):
    
#     # Déterminer les individus les plus proches du client dont l'id est séléctionné
#     indices_similary_clients = nbrs.kneighbors(np.array(data.loc[100001]).reshape(1, -1))[1].flatten()
     
#     filtering_expressions = filter.split(' && ')
#     dff = data.iloc[indices_similary_clients].reset_index()
#     for filter_part in filtering_expressions:
#         col_name, operator, filter_value = split_filter_part(filter_part)

#         if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
#             # these operators match pandas series operator method names
#             dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
#         elif operator == 'contains':
#             dff = dff.loc[dff[col_name].str.contains(filter_value)]
#         elif operator == 'datestartswith':
#             # this is a simplification of the front-end filtering logic,
#             # only works with complete fields in standard format
#             dff = dff.loc[dff[col_name].str.startswith(filter_value)]
    
#     return dff.to_dict('records')
 
# Met à jour le pieplot de la solvabilité du client dont l'id est choisie
@app.callback(
    Output('proba', 'figure'),
    [Input('id-client', 'value')])
def proba_pie(id_client):
    
    values = data.loc[id_client]
    values = (values['Solvable'],values['Non Solvable'])

        
    # Retourne le pie plot mis à jour pour l'id client
    return {
        'data': [go.Pie(labels=['Solvable', "Non Solvable"],
                        values=values,
                        marker_colors=["#2ecc71", "#e74c3c"],
                        hole=.5
                       )],
        'layout': go.Layout(margin=dict(b=100)
                           )
    }
    del values
    
    
# Met à jour le graphique de l'importance des features pour 
# le client dont l'id est séléctionné

#@app.callback(
#    Output('graph', 'figure'),
#    [Input('id-client', 'value'),
#    ])
#def update_graphic(id_client) :
     
#    exp = lime1.explain_instance(data.loc[id_client],
#                                 data.adaboost_proba,
#                                 num_samples=100)
    
#    indices, values = [], []
    

#    for ind, val in sorted(exp.as_list(), key=itemgetter(1)):
#        indices.append(ind)
#        values.append(val)
#    dat = pd.DataFrame(values, columns=["values"], index=indices)
#    dat["positive"] = data["values"]>0
#    del indices, values
    
    # Retourne le barplot correspondant aux 'feature importances'
    # du client dont l'id est séléctionné sur le dashboard
#   return {
        
#       'data': [go.Bar(
#                   x=dat["values"],
#                    y=dat.index,
#                   orientation='h',
#                    marker_color=list(dat.positive.map({True: '#e74c3c', False: '#2ecc71'}).values)
#        )],
        
#        'layout': go.Layout(
#                            margin=dict(l=300, r=0, t=30, b=100)
#                           )
#    } ///

@app.callback(
    Output('indicator-graphic', 'figure'),
    [Input('xaxis-column', 'value'),
     Input('yaxis-column', 'value'),
     Input('xaxis-type', 'value'),
     Input('yaxis-type', 'value')])
def update_graph_2(xaxis_column_name, yaxis_column_name,
                 xaxis_type, yaxis_type):
       
    traces = []
    solvable_labels = ["Solvable", "Non Solvable"]
    for i, target in enumerate(data.adaboost_result.unique()):
        filtered_df = data[data['adaboost_result'] == target].reset_index()
        traces.append(dict(
            x=filtered_df[xaxis_column_name],
            y=filtered_df[yaxis_column_name],
            text=filtered_df['SK_ID_CURR'],
            mode='markers',
            opacity=0.7,
            marker={
                'color':list(filtered_df["adaboost_result"].map({0.0: '#e74c3c', 1.0: "#2ecc71"}).values),
                'size': 5,
                'line': {'width': 0.15, 'color': 'white'}
            },
            name=solvable_labels[i]
        ))   
        
    return {
        'data': traces,
        'layout': dict(
            xaxis={
                'title': xaxis_column_name,
                'type': 'linear' if xaxis_type == 'Linear' else 'log'
            },
            yaxis={
                'title': yaxis_column_name,
                'type': 'linear' if yaxis_type == 'Linear' else 'log'
            },
            margin={'l': 40, 'b': 40, 't': 10, 'r': 0},
            hovermode='closest'
        )
    }
            
if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 in production, use a production WSGI server like gunicorn instead.

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [31/Aug/2020 15:54:45] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [31/Aug/2020 15:54:45] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [31/Aug/2020 15:54:45] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\dbe1MO\.conda\envs\dash\lib\site-packages\pandas\core\indexes\base.py", line 2889, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 97, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 1675, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 1683, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'AMT_ANNUITY'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\dbe1MO\.conda\envs\dash\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\dbe1MO\.conda\envs\dash\lib\site-packages\flask\app.py", line 1952, in full_dispat

127.0.0.1 - - [31/Aug/2020 15:54:45] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [31/Aug/2020 15:54:45] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
