Making a Dashboard using Dash and Plotly to research the representation that UMAP gives of a dataset

In [1]:
import pandas as pd
import numpy as np
from jupyter_dash import JupyterDash
from dash import html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate

In [2]:
from sklearn.preprocessing import StandardScaler

In [3]:
import sys
sys.version

'3.9.12 (main, Apr  5 2022, 06:56:58) \n[GCC 7.5.0]'

In [4]:
import plotly.express as px

In [5]:
import dash
from jupyter_dash import JupyterDash
from dash import dcc, ctx
import dash_daq as daq

In [6]:
import plotly
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [7]:
from sklearn.datasets import load_breast_cancer, load_iris

In [8]:
bc = load_iris()
breast_cancer_df = pd.DataFrame(bc.data, columns=bc.feature_names)
breast_cancer_df['target'] = bc.target

In [9]:
toy_df = breast_cancer_df#pd.read_csv('./data/archive/diabetes.csv')

In [10]:
import umap

In [11]:
# categorical and numeric columns differently

In [14]:
app = JupyterDash(__name__)
label_value_pairs = [{'label': x, 'value': x} for x in toy_df.columns]
colors = ['blue', 'red', 'green', 'purple', 'orange', 'cyan', 'magenta']


app.layout = html.Div([
    dcc.Store(id='umap_data'),
    html.Div([
        html.Div(dcc.Graph(id='main_graph'), className="six columns",
                style={'display': 'inline-block'}
                ),
        html.Div(dcc.Graph(id='corr_graph', figure=px.imshow(toy_df.corr())), className="six columns",
                style={'display': 'inline-block'}
            )], className="row"),
    dcc.Dropdown(
            id = 'graph_x', 
            options = label_value_pairs,
            value = toy_df.columns[0]
        ),
    dcc.Dropdown(
            id = 'graph_y', 
            options = label_value_pairs,
            value = toy_df.columns[1]
        ),
    dcc.Dropdown(
            id = 'maingraph_color', 
            options = label_value_pairs,
            #value = 'Age'
        ),
    dcc.Checklist(
        options=list(toy_df.columns),
        value=list(toy_df.columns),
        id='umap_input'
    ),
    html.Div(dcc.Graph(id='umapped_graph'), 
            style={}
            ),
    daq.ToggleSwitch(
        id='switch-2d3d',
        label='2D - 3D',
        labelPosition='bottom'
    ),
    dcc.Dropdown(
            id = 'umap_color', 
            options = label_value_pairs,
            #value = 'Age'
        ),
    dcc.Slider(1,100, 1, value=12, 
        marks=None, id='n_neighbors_slider',  
        tooltip={"placement": "bottom", "always_visible": True}
    ),
    html.Div(dcc.Graph(
        id='violin_plot'
        ), 
            style={}
            ),
    dcc.Checklist(
        options=list(toy_df.columns),
        value=list(toy_df.columns),
        id='violin_colums'
    ),
    dcc.Dropdown(
        id='boxplot_color',
        options = label_value_pairs
    )
])

@app.callback(
    Output('corr_graph', 'figure'),
    Input('maingraph_color', 'value')
)
def plot_heatmap(maingraph_value):
    if maingraph_value is None:
        raise PreventUpdate

    categories = toy_df[maingraph_value].unique()
    print('lala')
    if len(categories)<7:
        fig = make_subplots(
            cols=len(toy_df[maingraph_value].unique())+1, 
            rows=1, 
            #subplot_titles=[' '] + categories,
            #horizontal_spacing=0.05,
            #shared_yaxes=True,
            specs=[[{"colspan": 0},{"colspan": 1}, {"colspan": 1}, {"colspan": 1}]]
            )
        for i, cat in enumerate(categories):
            corr_df = toy_df[toy_df[maingraph_value] == cat].corr()
            fig.add_trace(
                go.Heatmap(
                    z=corr_df, 
                    y=corr_df.columns,
                    x=corr_df.columns,
                    coloraxis = "coloraxis"
                ),
                col=2+i, row = 1)

    #categories = toy_df[maingraph_value].unique()
    #if len(categories)<7:
    #    fig = make_subplots(
    #        cols=len(toy_df[maingraph_value].unique()), 
    #        rows=1, 
    #        subplot_titles=categories
    #        )
    #    for i, cat in enumerate(categories):
    #        fig.add_trace(
    #            go.Heatmap(
    #                z=toy_df[toy_df[maingraph_value] == cat].corr(), 
    #                y=toy_df[toy_df[maingraph_value] == cat].columns,
    #                x = toy_df[toy_df[maingraph_value] == cat].columns
    #            ),
    #            col=1+i, row = 1)
        return fig
    else:
        return px.imshow(toy_df.corr())



@app.callback(
    Output('graph_y', 'value'),
    Input('corr_graph', 'clickData')
)
def update_y_box(click_data):
    if click_data:
        return click_data['points'][0]['y']
    else:
        raise PreventUpdate
@app.callback(
    Output('graph_x', 'value'),
    Input('corr_graph', 'clickData')
)
def update_y_box(click_data):
    if click_data:
        return click_data['points'][0]['x']
    else:
        raise PreventUpdate



@app.callback(
    Output('violin_plot', 'figure'),
    [
        Input('violin_colums', 'value'), 
        Input('boxplot_color', 'value'),
        Input("umapped_graph", "hoverData")
        ]
)
def make_violin_plot(columns, color, hover):
    
    

    fig = make_subplots(rows=1, cols=len(columns), subplot_titles=columns)
    
    if color:
        colornames = toy_df[color].unique()
        if len(colornames) < 6:
        
            for j, column in enumerate(columns):
                for i, c in enumerate(colornames):
                    df_plot = toy_df[toy_df[color] == c][columns]
                    fig.add_trace(
                        go.Box(
                            y=df_plot[column], 
                            name=str(c), 
                            showlegend= j == 0,
                            legendgroup=str(c),
                            marker_color=colors[i]
                            ), row = 1, col = j+1
                )
                if hover:
                    hover_idx = hover['points'][0]['customdata'][0]
                    val = toy_df.loc[hover_idx][column]
                    fig.add_hline(val, row = 1, col = j+1)

            return fig

       
    df = toy_df[columns]

    for j, column in enumerate(columns):

        fig.add_trace(
            go.Box(y=df[column], name=column),
            row = 1, col = j+1
        )

        if hover:
            hover_idx = hover['points'][0]['customdata'][0]
            val = toy_df.loc[hover_idx][column]
            fig.add_hline(val, row = 1, col = j+1)
    return fig 


@app.callback(
    Output('main_graph', 'figure'),
    [
        Input("umapped_graph", "hoverData"), 
        Input("umapped_graph", "selectedData"),
        Input('graph_x', 'value'), 
        Input('graph_y', 'value'),
        Input('maingraph_color', 'value')
        ],
    State('main_graph', 'figure')
)
def update_sampletext(hover, selected_data, valuex, valuey, c, fig):
    """update the main graph to display the dataframe based on columns (valuex) and {valuey}"""

    triggered_id = ctx.triggered_id if not None else 'No clicks yet'
    df= toy_df
    if c and len(df[c].unique())<7:
        df[c] = df[c].astype(str)


    if (fig is None) or (triggered_id in ['graph_x', 'graph_y', 'maingraph_color']):
        fig = px.scatter(toy_df, x=valuex, y =valuey, color=c)
    elif selected_data:
        fig = go.Figure(fig)
        df['idx'] = df.index
        df['selected'] = df['idx'].isin(selected_data)

        #print(selected_data)
        selected_data = [x['customdata'][0] for x in selected_data['points']]
        print(selected_data)

        #todo: don't want to alter df
        df['selected'] = df['idx'].isin(selected_data)
        df['opacity'] = df['selected'].apply(lambda x: 1 if x else 0.15)

        print(df['selected'])
        fig = px.scatter(toy_df, x=valuex, y =valuey, color=c, opacity='opacity')

        #todo: this part does not work
        #fig.update_traces(
        #    selectedpoints=selected_data,
            #customdata=df.index,
            #mode='markers+text', 
            #marker={ 'color': 'rgba(0, 116, 217, 0.7)', 'size': 20 }, 
        #    unselected={'marker': { 'opacity': 0.15 }}
        #    )
    else:
        hover_idx = hover['points'][0]['customdata'][0]
        hover_x, hover_y = toy_df.loc[hover_idx][[valuex, valuey]]

        if len(fig['data']) > 1 and triggered_id=='umapped_graph' and len(fig['data'][-1]['x'])==1:

            del fig['data'][-1]

        fig = go.Figure(fig)
        fig.add_trace(
            go.Scatter(
                x=[hover_x], 
                y=[hover_y], 
                mode='markers', 
                marker_symbol =  'star', 
                marker_size=12)
            )



    
    return fig


@app.callback(
    Output('umap_data', 'data'),
    [
        Input('n_neighbors_slider', 'value'),
        Input('umap_input', 'value'),
        Input('switch-2d3d', 'value')
        ]
)
def store_umap_transfo(n_neighbors, umap_input, switch_3d):
    """get the projection from the umap algorithm and store it"""

    df = toy_df[umap_input]


    scaled_array = StandardScaler().fit_transform(df)
    df = pd.DataFrame(scaled_array, columns=df.columns)

    mapper = umap.UMAP(
        n_neighbors=n_neighbors,
        n_components=3 if switch_3d else 2, 
        random_state=88
        ).fit(df)
        
    umapped_df = pd.DataFrame(mapper.transform(df), index=df.index)
    

    umapped_df['idx'] = umapped_df.index
    df = umapped_df.join(toy_df)


    return df.to_dict()


@app.callback(
    Output('umapped_graph', 'figure'),
    [
        Input('umap_color', 'value'), 
        Input('umap_data', 'data')
        ]
)
def update_umapped(umap_color, umap_data):
    """update the umap graph without computing the umap projection"""

    df = pd.DataFrame.from_dict(umap_data)

    if umap_color and len(df[umap_color].unique()) < 6:
        df[umap_color] = df[umap_color].astype(str)

    if '2' in df.columns:
        # 3d rendering because umap gave columns 0, 1, and 2
        fig = px.scatter_3d(
            df,
            x='0', y='1', z='2',
            hover_data=df.columns,
            color = umap_color
        )
    else:
        fig =  px.scatter(
            df, 
            x='0', y='1', 
            hover_data=df.columns,
            color = umap_color)
        fig.update_layout(margin={'l': 20, 'r': 0, 'b': 15, 't': 5}, dragmode='select')

    return fig


if __name__ == '__main__':
    app.run_server(
        debug=True, host='127.0.0.3'
    )


Dash app running on http://127.0.0.3:8050/
lala


In [15]:
toy_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,idx,selected,opacity
0,5.1,3.5,1.4,0.2,0,0,False,0.15
1,4.9,3.0,1.4,0.2,0,1,False,0.15
2,4.7,3.2,1.3,0.2,0,2,False,0.15
3,4.6,3.1,1.5,0.2,0,3,False,0.15
4,5.0,3.6,1.4,0.2,0,4,False,0.15
...,...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,145,True,1.00
146,6.3,2.5,5.0,1.9,2,146,True,1.00
147,6.5,3.0,5.2,2.0,2,147,True,1.00
148,6.2,3.4,5.4,2.3,2,148,True,1.00


In [13]:
dash.__version__

'2.6.1'

lala
[100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149]
0      False
1      False
2      False
3      False
4      False
       ...  
145     True
146     True
147     True
148     True
149     True
Name: selected, Length: 150, dtype: bool


In [32]:
toy_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [28]:
toy_df.loc[136]

sepal length (cm)    6.3
sepal width (cm)     3.4
petal length (cm)    5.6
petal width (cm)     2.4
target                 2
Name: 136, dtype: object

In [None]:
px.imshow(toy_df.corr())

lala
lala


In [29]:
plotly.__version__

'5.6.0'

In [15]:
categories = toy_df['target'].unique()
if len(categories)<7:
        fig = make_subplots(
            cols=len(toy_df['target'].unique()), 
            rows=1, 
            subplot_titles=categories,
            horizontal_spacing=0.05,
            shared_yaxes=True,
            )
        for i, cat in enumerate(categories):
            corr_df = toy_df[toy_df['target'] == cat].corr()
            fig.add_trace(
                go.Heatmap(
                    z=corr_df, 
                    y=corr_df.columns,
                    x=corr_df.columns,
                    coloraxis = "coloraxis"

                ),
                col=1+i, row = 1)

In [16]:
fig

lala
lala
lala
