/
app_dash.py
155 lines (132 loc) · 5.94 KB
/
app_dash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
# We start with the import of standard ML librairies
import pandas as pd
import math
from joblib import load
from src.data_processing import make_full_pipeline
import pickle
# We add all Plotly and Dash necessary librairies
import dash
import plotly.graph_objects as go
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
HOME_PATH = os.getcwd()
DATA_PATH = os.path.join(HOME_PATH, 'data')
MODELS_PATH = os.path.join(HOME_PATH, 'model')
ASSETS_PATH = os.path.join(HOME_PATH, 'assets')
df = pd.read_csv(os.path.join(DATA_PATH, "Customer-Value-Analysis.csv")).set_index('Customer')
sk_best = load(os.path.join(MODELS_PATH, 'best.joblib'))
# full_pipeline = load(os.path.join(MODELS_PATH, 'transformer.joblib'))
full_pipeline = make_full_pipeline(df)
ohe_path = os.path.join(MODELS_PATH, 'ohe_categories.pkl')
perfs_path = os.path.join(MODELS_PATH, 'sk_best_performances.pkl')
with open(ohe_path, 'rb') as input:
ohe_categories = pickle.load(input)
categories = []
for k, l in ohe_categories.items():
categories.append([f'{k}_{cat}' for cat in list(l)])
flatten = lambda l: [item for sublist in l for item in sublist]
categories = flatten(categories)
with open(perfs_path, 'rb') as input:
perfs = pickle.load(input)
# scaling
cats = [var for var, var_type in df.dtypes.items() if var_type == 'object']
nums = [var for var in df.columns if var not in cats]
cats.remove('Response')
TOP = 10
# We create a DataFrame to store the features' importance and their corresponding label
df_feature_importances = pd.DataFrame(sk_best.feature_importances_ * 100, columns=["Importance"],
index=nums + categories)
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)
df_feature_importances = df_feature_importances.loc[df_feature_importances.index[:TOP]]
# We create a Features Importance Bar Chart
fig_features_importance = go.Figure()
fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
y=df_feature_importances["Importance"],
marker_color='rgb(171, 226, 251)')
)
fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
# We create a Features perfomances Bar Chart
fig_perfs = go.Figure()
fig_perfs.add_trace(go.Bar(y=list(perfs.keys()),
x=list(perfs.values()),
marker_color='rgb(171, 226, 251)',
orientation='h')
)
fig_perfs.update_layout(title_text='<b>Best Model Performances<b>', title_x=0.5)
cat_children = []
for var in cats:
# Categorical children
sorted_modalities = list(df[var].value_counts().index)
cat_children.append(html.H4(children=var))
cat_children.append(dcc.Dropdown(
id='{}-dropdown'.format(var),
options=[{'label': value, 'value': value} for value in sorted_modalities],
value=sorted_modalities[0]
))
linear_children = []
for var in nums:
# linear children
linear_children.append(html.H4(children=var))
desc = df[var].describe()
linear_children.append(dcc.Slider(
id='{}-dropdown'.format(var),
min=math.floor(desc['min']),
max=round(desc['max']),
step=None,
value=round(desc['mean']),
marks={i: '{}°'.format(i) for i in
range(int(desc['min']), int(desc['max']) + 1, max(int((desc['std'] / 1.5)), 1))}
))
# The command below can be activated in a standard notebook to display the chart
# fig_features_importance.show()
app = dash.Dash(__name__,
# external CSS stylesheets
external_stylesheets = [
#"https://raw.githack.com/Athena75/IBM-Customer-Value-Dashboarding/main/assets/style.css",
"https://rawcdn.githack.com/Athena75/IBM-Customer-Value-Dashboarding/df971ae38117d85c8512a72643ce6158cde7a4eb/assets/style.css"
]
)
# We apply basic HTML formatting to the layout
app.layout = html.Div(children=[
# first row : Title
html.Div(children=[
html.Div(children=[html.H1(children="Simulation Tool : IBM Customer Churn")],
className='title'),
],
style={"display": "block"}),
# second row :
html.Div(children=[
# first column : fig feature importance + linear + prediction
html.Div(children=[
html.Div(children=[dcc.Graph(figure=fig_features_importance, className='graph')] + linear_children),
# prediction result
html.Div(children=[html.H2(children="Prediction:"),
html.H2(id="prediction_result")],
className='prediction')],
className='column'),
# second column : fig performances categorical
html.Div(children=[dcc.Graph(figure=fig_perfs, className='graph')] + cat_children,
className='column')
],
className='row')
]
)
# The callback function will provide one "Output" in the form of a string (=children)
@app.callback(Output(component_id="prediction_result", component_property="children"),
# The values corresponding to sliders and dropdowns of respectively numerical and categorical features
[Input('{}-dropdown'.format(var), 'value') for var in nums + cats])
# The input variable are set in the same order as the callback Inputs
def update_prediction(*X):
# get the data input and map it to the correponding feature names
payload = dict(zip(nums + cats, X))
# create one line dataframe
frame_X = pd.DataFrame(payload, index=[0])
# pass it through the pre-fitted transformer
X_processed = full_pipeline.transform(frame_X)
prediction = sk_best.predict_proba(X_processed)[0]
# And retuned to the Output of the callback function
return " {}% No , {}% Yes".format("%.2f" % (prediction[0] * 100),
"%.2f" % (prediction[1] * 100))
server = app.server