In [77]:
# все импорты!!

# для dash
import dash
from dash import dcc, Input, Output, State, html, Dash, dash_table, callback_context
import dash_bootstrap_components as dbc
import plotly.express as px     

import pandas as pd     

# для модели машинного обучения
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from catboost import CatBoostClassifier

In [78]:
# считываем данные и смотрим на них
df = pd.read_csv('Downloads/heart_2020_cleaned.csv')
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


## Тут находятся все общие графики и всякие тесты 

In [79]:
# пирог с общей статистикой заболеваний
heart = df.groupby('HeartDisease')['BMI'].count()
heart_pie = px.pie(heart, names=heart.index, values=heart, title='HeartDisease Rate')\
            .update_layout(showlegend=False, title_x=0.5).update_traces(textposition='inside',  textinfo='label+percent')

def graph(dfr):
    age = dfr.groupby('AgeCategory')['BMI'].count()
    age_pie = px.pie(age, names=age.index, values=age, title='Age Rate')\
            .update_layout(showlegend=False, title_x=0.5).update_traces(textposition='inside',  textinfo='label+percent')
    
    bmi = px.strip(dfr, x="BMI", y="HeartDisease")
    
    return age_pie, bmi

# СОЗДАДИМ ОТДЕЛЬНЫЙ ДАТАСЕТ ДЛЯ ПРЕДСКАЗАНИЙ
df_for_pred = df.copy()
to_boolean = []
for i in df_for_pred.select_dtypes(include='object'):
    if len(df_for_pred[i].unique()) == 2:
        to_boolean.append(i)
yn_to_bool = {'No':0, 'Yes':1}
df_for_pred['Sex'] = df_for_pred['Sex'].map({'Male':0, 'Female':1}).astype('bool')
to_boolean.remove('Sex')

for i in to_boolean:
    df_for_pred[i] = df_for_pred[i].map(yn_to_bool).astype('bool')
for i in df_for_pred.select_dtypes(include='object'):
    df_for_pred[i] = df_for_pred[i].astype('category')
        

# На самом деле тут изначально была функция, но ее выполнение занимало слишком много времени, поэтому я обучаю модель заранее
#MODEL-------------------------------------------------------------------------------------------------------
x_train, x_test, y_train, y_test = train_test_split(df_for_pred.drop('HeartDisease', axis=1), df_for_pred['HeartDisease'], shuffle=True,
                                                   stratify=df_for_pred['HeartDisease'], random_state=17)
    
cat_features = [i for i in df_for_pred.select_dtypes(include='category')]
    
model_balanced = CatBoostClassifier(random_seed=17, 
                                     custom_metric=['AUC', 'Accuracy'], 
                                     auto_class_weights='Balanced', 
                                     use_best_model=True)
model_balanced.fit(x_train, y_train,
          cat_features=cat_features,
         eval_set=(x_test, y_test),
         early_stopping_rounds=10,
         plot=False, verbose=0)

<catboost.core.CatBoostClassifier at 0x1fda6da9a60>

## Само приложение

In [89]:
#external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = 'Выпускной проект!!:)'

In [90]:
app.layout = html.Div([
    dbc.Row(dbc.Col(html.H1("Аналитика Сердечных Заболеваний с помощью Dash!"), width={'offset':1})),
    html.Hr(),
    dbc.Container([
        dbc.Label('Кликни на ячейку таблички:)'),
        dash_table.DataTable(df.head().to_dict('records'),[{"name": i, "id": i} for i in df.columns], id='tbl', page_size=10),
        dbc.Alert(id='tbl_out'),
]),
    dbc.Row(dbc.Col(html.H3("Общая статистика заболевших:"), width={"offset": 4})),
    dbc.Row(
            [
                dbc.Col(dcc.Graph(figure=heart_pie),
                        width=8, lg={'size': 6,  "offset": 0}
                        ),
    
                dbc.Col([dbc.Label('* Выбери признак:'),
                        dcc.Dropdown(id="feature", 
                                     value="Race",
                                     options=[{'label': x, 'value': x} for x in
                                      df.columns], clearable=False),
                         html.Div(
                             children=dcc.Graph(id="feature-graph", )),
                        ] ),
            ]
            ),
    dbc.Row([
        dbc.Col([
            dcc.Markdown('* Выбери числовой признак, чтобы посмотреть на его распределение'),
            dcc.Dropdown(id = 'num-feature',
                        value = 'BMI',
                        options=[{'label':x, 'value':x} for x in df.select_dtypes(include='number')],
                        clearable = False),
            html.Div(id='table_num'),
            
        ], width={'offset':1, 'size':5}),
        dbc.Col([
            dcc.Markdown('''* Выбери нужное значение HeartDisease, чтобы посмотреть на графики.'''),
            dcc.RadioItems(id='dis_yes', 
                                options=['Yes', 'No'],
                                value='Yes', 
                                inline=True)
            ])
        ]),
    html.Hr(),
    dbc.Row(id='button_respond'),
    html.Hr(),
    html.Hr(),
    dbc.Row(dbc.Col(html.H3('НАТРЕНИРОВАТЬ МОДЕЛЬ МАШИННОГО ОБУЧЕНИЯ?)'),width={'offset':3})),
    dbc.Row([
        dbc.Col(html.Button("YEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE", id='model', n_clicks=0), width={'offset':5})
    ]),
    dbc.Row(id='model_resp')
])


@app.callback([Output('tbl_out', 'children'),
               Output("feature-graph", "figure"),
              Output('table_num', 'children'),
              Output('button_respond', 'children'),
              Output('model_resp', 'children')],
              [Input('tbl', 'active_cell'),
              Input('feature', 'value'),
              Input('num-feature', 'value'),
              Input('dis_yes', 'value'),
              Input('model', 'n_clicks')])

def update_graphs(active_cell, feature, num_feature, btn_y, model):
    # HISTOGRAMM
    feature_hist = px.histogram(df, title=feature + ' distribution', x=feature)
    feature_hist.update_xaxes(categoryorder="total descending")
    
    # describe num-feature table
    column = dict(df[num_feature].describe()).keys()
    data = [dict(df[num_feature].describe().apply("{0:.3f}".format))]
    columns =  [{"name": i, "id": i,} for i in column]
    table = dash_table.DataTable(data=data,columns=columns)
    
    #graphics on button
    hd = df.loc[df['HeartDisease']==btn_y]
    age_pie, bmi = graph(hd)
    
    answer = [
            dbc.Col([dcc.Graph(figure=age_pie)]),
            dbc.Col([dcc.Graph(figure=bmi)])
        ]
    
    # MODEL YAAAAAAAAAAAAAY
    changed_id = [p['prop_id'] for p in callback_context.triggered][0]
    if 'model' in changed_id:
        scores = model_balanced.best_score_['validation'] 
        matrix = confusion_matrix([str(i) for i in y_test], model_balanced.predict(x_test), normalize='all')
        fig = px.imshow(matrix, text_auto=True, color_continuous_scale='viridis')
        model_ans = [
            dbc.Col([dcc.Markdown("* Результаты предсказаний:"),
                     dash_table.DataTable([scores],[{"name": i, "id": i} for i in scores.keys()]),
                     html.H2('* Матрица ошибок:'),
                     dcc.Graph(figure=fig)], width={'offset':1}),
        ]
    else:
        model_ans = []
    return str(active_cell) if active_cell else "Click the table", feature_hist, table, answer, model_ans

In [91]:
if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [25/May/2022 13:58:19] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-component-suites/dash/dash_table/async-highlight.js HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-component-suites/dash/dash_table/async-table.js HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:20] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:21] "GET /_dash-component-suites/dash/dcc/async-markdown.js HTTP/1.1" 200 -
127.0.0.1 - - [25/May/2022 13:58:21] "GET /_dash-component-suites/d