In [1]:
import pandas as pd

from bokeh.plotting import figure, ColumnDataSource
from bokeh.io import output_file, show, output_notebook, curdoc
from bokeh.models import HoverTool, Slider, Select, Dropdown, Div, Button, Slider, Range1d
from bokeh.models.widgets import Panel, Tabs
from bokeh.layouts import row, column, gridplot, widgetbox
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20, Spectral10, Turbo256, Turbo

In [2]:
def get_cmap(df,fld:str):
    print(df[fld].dtype)
    cat = sorted(df[fld].unique())
    print(cat)
    return factor_cmap(field_name=fld,palette=Turbo[len(cat)],factors=cat)

def get_source(selected_vars:list):
    df_src = pd.read_csv('./Data/main_dataframe.csv',parse_dates=['record_date'])
    df_src['record_year'] = df_src['record_year'].astype(str)
    return df_src[selected_vars]

def tab1_list_df_vars(var1,var2,var3,var_cat,var_size):
    lst = []
    if var1 == '':
        lst.append(selectable_columns[1])
    else:
        lst.append(var1)

    if var2 == '':
        lst.append(selectable_columns[2])
    else:
        lst.append(var2)

    if var3 == '':
        lst.append(selectable_columns[3])
    else:
        lst.append(var3)
    
    if var_cat != '':
        lst.append(var_cat)
        
    if var_size != '':
        lst.append(var_size)

    return lst


    

In [3]:
cat_columns = ['','country_category','work_station_category','production_line_category','plant_category','division_category','record_day_name','record_month_name','record_year_month','record_year']
idx_columns = ['tenant_id','record_date']

In [14]:
df = pd.read_csv('./Data/main_dataframe.csv',parse_dates=['record_date'])
df['record_year'] = df['record_year'].astype(str)

selectable_columns = df.columns.tolist()
selectable_columns = list(set(selectable_columns) - set(idx_columns)  - set(cat_columns))
selectable_columns.insert(0,'')
selectable_columns.sort()

In [15]:
selectable_columns

['',
 'active_users',
 'activities',
 'connected_once_users',
 'created_forms',
 'created_news',
 'created_problems',
 'created_skills',
 'created_skills_endorsement_requests',
 'created_skills_endorsements',
 'divisions',
 'form_completions',
 'modified_forms',
 'modified_news',
 'modified_problems',
 'modified_skills',
 'modified_skills_endorsements',
 'plants',
 'production_lines',
 'views',
 'work_stations',
 'workinstructions']

In [16]:
get_cmap(df,'record_year')

object
['2016', '2017', '2018', '2019', '2020', '2021']


{'field': 'record_year', 'transform': CategoricalColorMapper(id='2021', ...)}

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102352 entries, 0 to 102351
Data columns (total 32 columns):
 #   Column                               Non-Null Count   Dtype         
---  ------                               --------------   -----         
 0   tenant_id                            102352 non-null  object        
 1   record_date                          102352 non-null  datetime64[ns]
 2   connected_once_users                 102352 non-null  int64         
 3   activities                           102352 non-null  int64         
 4   active_users                         102352 non-null  int64         
 5   created_skills_endorsement_requests  102352 non-null  int64         
 6   created_skills_endorsements          102352 non-null  int64         
 7   created_forms                        102352 non-null  int64         
 8   created_news                         102352 non-null  int64         
 9   created_problems                     102352 non-null  int64         
 

In [18]:
output_notebook()

In [33]:
def update_doc(doc):
    select_val1 = Select(title='Variable 1',options=selectable_columns,value=selectable_columns[1])
    select_val2 = Select(title='Variable 2',options=selectable_columns,value=selectable_columns[2])
    select_val3 = Select(title='Variable 3',options=selectable_columns,value=selectable_columns[3])
    select_size = Select(title='Points Size by',options=selectable_columns,value=None)
    select_cat = Select(title='Points Color by',options=cat_columns,value=None)
    load_graph = Button(label='Load Plots',button_type='success')
    alpha_slide = Slider(start=0.1,end=1,value=0.3,step=0.05,title='Points Transparency')
    plot_1 = figure(tools="pan,wheel_zoom,box_zoom,reset")
    plot_2 = figure(tools="pan,wheel_zoom,box_zoom,reset")
    plot_3 = figure(tools="pan,wheel_zoom,box_zoom,reset")

    def build_plot(p, df, var_x, var_y, transparency, var_cat, var_size):
        if var_size != '':
            print('before ->',df[var_size].max())
            temp = ((df[var_size] - df[var_size].min()) / (df[var_size].max() - df[var_size].min())) * 100
            print('after ->',temp.max())
            df[var_size] = temp.round(0).astype(int)
        src = ColumnDataSource(df)
        if var_cat == '':
            cat_cmap = 'blue'
        else:
            print(var_cat)
            print(df.head())
            cat_cmap = get_cmap(df,var_cat)
        
        p.title.text = '''Variable '{0}' against Variable '{1}' '''.format(var_x,var_y)
        p.renderers = []
        if var_cat != '' and var_size != '':
            c = p.circle(var_x,var_y,source=src,alpha=transparency,fill_color=cat_cmap,legend_field=var_cat,size=var_size)
        elif var_cat != '' and var_size == '':
            c = p.circle(var_x,var_y,source=src,alpha=transparency,fill_color=cat_cmap,legend_field=var_cat) #get_cmap(df,var_cat)
        elif var_cat == '' and var_size != '':
            c = p.circle(var_x,var_y,source=src,alpha=transparency,size=var_size)
        else:
            c = p.circle(var_x,var_y,source=src,alpha=transparency)
        
        p.x_range = Range1d(0, df[var_x].max())
        p.y_range = Range1d(0, df[var_y].max())
        p.xaxis.axis_label = var_x
        p.yaxis.axis_label = var_y
        p.title.align = 'center'
        return c
    
    vars_lst = tab1_list_df_vars(select_val1.value,select_val2.value,select_val3.value,select_cat.value,select_size.value)
#     print(vars_lst)
    df_selected = get_source(vars_lst)
#     print(tst_df.head())
#     source = ColumnDataSource(get_source(vars_lst))
    
    circle1 = build_plot(plot_1, df_selected, select_val1.value, select_val2.value, alpha_slide.value, select_cat.value, select_size.value)
    circle2 = build_plot(plot_2, df_selected, select_val3.value, select_val2.value, alpha_slide.value, select_cat.value, select_size.value)
    circle3 = build_plot(plot_3, df_selected, select_val1.value, select_val3.value, alpha_slide.value, select_cat.value, select_size.value)
    
#     out_legend = figure()
#     plot_1.title.align = 'center'
#     plot_2.title.align = 'center'
#     plot_3.title.align = 'center'
    
#     plot_1.circle(selectable_columns[1],selectable_columns[2],source=source,alpha=alpha_slide.value,fill_color=get_cmap(df,select_cat.value),legend_field=select_cat.value)
#     plot_1.circle(selectable_columns[1],selectable_columns[2],source=source,alpha=alpha_slide.value)
    
    out_legend = None
#     out_legend = figure()
#     for fig_component in [out_legend.grid[0],out_legend.ygrid[0],out_legend.xaxis[0],out_legend.yaxis[0]]:
#         fig_component.visible = False
#     out_legend.renderers += [plot_1] #.renderers
#     out_legend.add_layout(plot_1.legend[0])
#     plot_1.legend.items = []
    
#     plot_2.circle(selectable_columns[3],selectable_columns[2],source=source,alpha=alpha_slide.value)
#     plot_3.circle(selectable_columns[1],selectable_columns[3],source=source,alpha=alpha_slide.value)
    
#     plot_1.xaxis.axis_label = selectable_columns[0]
#     plot_1.yaxis.axis_label = selectable_columns[1]
#     plot_2.xaxis.axis_label = selectable_columns[2]
#     plot_2.yaxis.axis_label = selectable_columns[1]
#     plot_3.xaxis.axis_label = selectable_columns[0]
#     plot_3.yaxis.axis_label = selectable_columns[2]
    
    plot_1.x_range = plot_3.x_range
    plot_1.y_range = plot_2.y_range
    plot_2.x_range = plot_3.y_range
    
#     out_legend = None
    
    def select_on_change(event):
        global circle1
        global circle2
        global circle3
        
        vars_lst = tab1_list_df_vars(select_val1.value,select_val2.value,select_val3.value,select_cat.value,select_size.value)
        df_selected = get_source(vars_lst)
        
        circle1 = build_plot(plot_1, df_selected, select_val1.value, select_val2.value, alpha_slide.value, select_cat.value, select_size.value)
        circle2 = build_plot(plot_2, df_selected, select_val3.value, select_val2.value, alpha_slide.value, select_cat.value, select_size.value)
        circle3 = build_plot(plot_3, df_selected, select_val1.value, select_val3.value, alpha_slide.value, select_cat.value, select_size.value)
#         if select_val1.value == '':
#             val1 = selectable_columns[0]
#         else:
#             val1 = select_val1.value

#         if select_val2.value == '':
#             val2 = selectable_columns[1]
#         else:
#             val2 = select_val2.value

#         if select_val3.value == '':
#             val3 = selectable_columns[2]
#         else:
#             val3 = select_val3.value

#         if select_cat.value == '':
#             cat_cmap = 'blue'
#         else:
#             cat_cmap = get_cmap(df,select_cat.value)
            
        
#         plot_1.renderers = []
#         plot_2.renderers = []
#         plot_3.renderers = []
#         plot_1.circle(val1,val2,source=source,fill_color=cat_cmap,legend_field=select_cat.value)
#         plot_2.circle(val3,val2,source=source,fill_color=cat_cmap)
#         plot_3.circle(val1,val3,source=source,fill_color=cat_cmap)
#         plot_1.xaxis.axis_label = val1
#         plot_1.yaxis.axis_label = val2
#         plot_2.xaxis.axis_label = val3
#         plot_2.yaxis.axis_label = val2
#         plot_3.xaxis.axis_label = val1
#         plot_3.yaxis.axis_label = val3
        
#         if select_cat.value == '':
#             out_legend = None
#         else:
# #             plot_1.legend = select_cat.value
#             out_legend = figure()
#             for fig_component in [out_legend.grid[0],out_legend.ygrid[0],out_legend.xaxis[0],out_legend.yaxis[0]]:
#                 fig_component.visible = False
#             out_legend.renderers += plot_1.renderers
#             out_legend.add_layout(plot_1.legend[0])
#         plot_1.legend.items = []
        print('---')
        
    def change_transparency(attr, old, new):
#         glyph = circle1.glyph
        for glyph in [circle1.glyph, circle2.glyph, circle3.glyph]:
            glyph.fill_alpha = alpha_slide.value
#         circle1.fill_alpha = alpha_slide.value
#         glyph = plot_1.renderers.glyph
#         glyph.fill_alpha = alpha_slide.value
#         plot_2.circle.alpha = alpha_slide.value
#         plot_3.circle.alpha = alpha_slide.value
        print('+++')
        
        
    load_graph.on_click(select_on_change)
    alpha_slide.on_change('value',change_transparency)
    
    page_title = Div(text='<h1>Exploration des données brutes</h1>')
    widget_select_val = column(select_val1,select_val2,select_val3,select_size,select_cat,load_graph,Div(),Div(),alpha_slide)
    plot_grid = gridplot([[plot_1,plot_2],[plot_3,out_legend]],merge_tools=True)
    row_1 = row(widget_select_val,plot_grid)
#     row_2 = row(Div(),plot_3,Div())
    layout = column(page_title,row_1)
    
    doc.add_root(layout)

show(update_doc)

before -> 82
after -> 100.0
plant_category
   active_users  activities  connected_once_users       plant_category  \
0             0           0                  2272  More than 50 Plants   
1             0           0                   495      11 to 20 Plants   
2             0           0                    79             2 Plants   
3             0           0                   987      11 to 20 Plants   
4             0           0                   275      11 to 20 Plants   

   modified_forms  
0               0  
1               0  
2               0  
3               0  
4               0  
object
['1 Plant', '11 to 20 Plants', '2 Plants', '21 to 50 Plants', '3 Plants', '4 Plants', '5 to 10 Plants', 'More than 50 Plants', 'No Plant']
before -> 100
after -> 100.0
plant_category
   active_users  activities  connected_once_users       plant_category  \
0             0           0                  2272  More than 50 Plants   
1             0           0                   495     

In [14]:
show(column(
    row(Div(text='test text')),
    row(Div(text='r2-c1'), Div(text='r2-c2'), Div(text='r2-c3')),
    row(Div(text='r3-c1'), Div(text='r3-c2'), Div(text='r3-c3'))
))