# 4D Graph Template

Click Cell in the toolbar above, then click **Run All** to get started.


In [91]:
%%html
<script>
    // AUTORUN ALL CELLS ON NOTEBOOK-LOAD!
    require(
        ['base/js/namespace', 'jquery'], 
        function(jupyter, $) {
            $(jupyter.events).on("kernel_ready.Kernel", function () {
                console.log("Auto-running all cells-below...");
                jupyter.actions.call('jupyter-notebook:run-all-cells-below');
                jupyter.actions.call('jupyter-notebook:save-notebook');
            });
        }
    );
</script>

In [92]:
#import warnings
#warnings.filterwarnings("ignore")

In [93]:
#This cell is used to make a toggle, so you can show or hide the code.
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The code for this jupyter notebook has been hidden by default for easier reading.
To toggle on/off the code, click <a href="javascript:code_toggle()">here</a>.''')

In [94]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [95]:
#Import necessary packages
import os
import numpy as np
import pandas as pd

from bokeh.models import Select, ColorBar, ContinuousColorMapper, LinearColorMapper, HoverTool, Button, NumeralTickFormatter, Toggle
from bokeh.layouts import row, widgetbox, column
from bokeh.io import show, output_notebook, push_notebook, output_file
from bokeh.io.saving import save
from bokeh.palettes import all_palettes
from bokeh.plotting import curdoc, figure, show, ColumnDataSource, save
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models.grids import Grid
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler

from sklearn.preprocessing import MinMaxScaler

import ipywidgets as widgets
from IPython.display import clear_output

output_notebook()

In [96]:
#Widgets

#style and layout

style = {
    'description_width': 'auto'
    ,'description_width': 'auto'
    ,'SelectionSlider_width':'auto'
    ,'IntRangeSlider_width':'auto'
    ,'slider_width':'auto'
    ,'select_width':'auto'
}

layout = {
    'width':'auto'
    ,'height':'auto'
    ,'align_content':'stretch'
    ,'align_items':'stretch'
    ,'justify_content':'space-around'
}

#import file
file_path = widgets.Text(
    value=''
    ,placeholder=r'e.g. C:\Users\Dan\Desktop\file.csv'
    ,description='Enter file path of csv:'
    ,disabled=False
    ,style=style
    ,layout=layout    
)

#dimenstions
plot_height = widgets.IntText(
    value=1000
    ,description='Plot height:'
    ,step=100
    ,min=0
    ,disabled=False
    ,style=style    
)

plot_width = widgets.IntText(
    value=1600
    ,description='Plot width:'
    ,step=100
    ,min=0
    ,disabled=False
    ,style=style    
)

#Colour

#text colour
text_colour=  widgets.ColorPicker(
    concise=False
    ,description='Text and axis colour:'
    ,value='black'
    ,continuous_update=True
    ,disabled=False
    ,style=style    
)

#hover colour
default_hover_colour = widgets.ColorPicker(
    concise=False
    ,description='Hover colour:'
    ,value='black'
    ,continuous_update=True
    ,disabled=False
    ,style=style    
)

#plot colour
default_plot_colour = widgets.ColorPicker(
    concise=False
    ,description='Plot colour:'
    ,value='whitesmoke'
    ,continuous_update=True
    ,disabled=False
    ,style=style    
)

#background colour
default_bg_colour = widgets.ColorPicker(
    concise=False
    ,description='Background colour:'
    ,value='whitesmoke'
    ,continuous_update=True
    ,disabled=False
    ,style=style    
)

#colour palette
colour_palette = widgets.Select(
    options = list(all_palettes.keys())
    ,value='RdYlGn'
    ,description='Select colour palette:<br><a href="https://bokeh.pydata.org/en/latest/docs/reference/palettes.html#built-in-palettes", target="_blank">Colour scheme info.</a>'
    ,disabled=False
    ,continuous_update=True
    ,style=style
    ,layout=layout    
)

#colour scheme info
colour_scheme_info = widgets.HTML(
    value='<a href="https://bokeh.pydata.org/en/latest/docs/reference/palettes.html#built-in-palettes", target="_blank">Colour scheme info.</a>'
)

#number of colour categories
colour_category_number = widgets.SelectionSlider(
    options=list(all_palettes.get(colour_palette.value).keys())
    ,value=list(all_palettes.get(colour_palette.value).keys())[2]
    ,description='Number of colour categories:'
    ,disabled=False
    ,continuous_update=True
    ,orientation='horizontal'
    ,readout=True
    ,style=style
)

#colour reverse
colour_palette_reverse = widgets.Checkbox(
    value=True
    ,description='Reverse colour scheme?'
    ,disabled=False    
)

#default circle colour
default_c = widgets.ColorPicker(
    concise=False
    ,description='Default circle color:'
    ,value='white'
    ,disabled=False
    ,style=style    
)

#Transparency

#plot transparency
default_transparency = widgets.FloatSlider(
    value=1
    ,min=0
    ,max=1.0
    ,step=0.1
    ,description='Background transparency:'
    ,disabled=False
    ,continuous_update=True
    ,orientation='horizontal'
    ,readout=True
    ,readout_format='.1f'
    ,style=style    
)

#hover transparency
hover_transparency = widgets.FloatSlider(
    value=0.8
    ,min=0
    ,max=1.0
    ,step=0.1
    ,description='Hover transparency:'
    ,disabled=False
    ,continuous_update=True
    ,orientation='horizontal'
    ,readout=True
    ,readout_format='.1f'
    ,style=style    
)

#default size of circle
default_sz = widgets.BoundedIntText(
    value=20                          
    ,min=0
    ,max=50
    ,step=1
    ,description='Default circle size:'
    ,disabled=False
    ,style=style    
)

#min and max of circle size
min_max_circle = widgets.IntRangeSlider(
    value=[5, 80]
    ,min=15
    ,max=200
    ,step=5
    ,description='Circle size limits:'
    ,disabled=False
    ,continuous_update=False
    ,orientation='horizontal'
    ,readout=True
    ,readout_format='d'
    ,style=style
    ,width='100%'
)

#font sizes
title_font_size = widgets.BoundedIntText(
    value=18
    ,min=4
    ,max=30
    ,step=2
    ,description='Title font size:'
    ,disabled=False
    ,layout=layout
    ,style=style    
)

axis_font_size = widgets.BoundedIntText(
    value=10
    ,min=4
    ,max=30
    ,step=2
    ,description='Axis and colour bar font size:'
    ,disabled=False
    ,layout=layout
    ,style=style    
)

axis_label_font_size = widgets.BoundedIntText(
    value=14
    ,min=4
    ,max=30
    ,step=2
    ,description='Axis label font size:'
    ,disabled=False
    ,layout=layout
    ,style=style
)


In [97]:
#function for scaling size values
def scaling(data, min_size, max_size):
    data = np.array(data)
    data = data.reshape(-1, 1)
    scaler = MinMaxScaler(copy=True, feature_range=(min_size, max_size))
    scaler.fit(data)
    data = scaler.transform(data)
    data = data.reshape(-1)
    return data

#this function runs dynamically, so the number of options for colours remains accurate whenever the user selects a colour scheme
def update_colour_category_number(*args):
    colour_category_number.value = list(all_palettes.get(colour_palette.value).keys())[2]
    colour_category_number.options = list(all_palettes.get(colour_palette.value).keys())
#this line ensures that whenever the colour palette widget is changed, the function above will run.
colour_palette.observe(update_colour_category_number, 'value')

In [98]:
#this function runs when a user attempts to import a file
def import_file():
    
    global df
    global discrete_variables
    
    try:
        df = pd.read_csv(file_path.value, encoding='raw_unicode_escape')
    #if this import doesn't work, the custom error message below let's the user know it's probably due to a file path error.
    except:
        raise ImportError("Error on import. Check you've entered a valid file path")
    
    #this widget is used so users can select discrete variables
    discrete_variables = widgets.SelectMultiple(
        options=df.columns
        ,description='Select discrete variables, select multiple variables by holding Ctrl/Cmd:'
        ,disabled=False
        ,style=style
        ,layout=layout    
    )
    
    display(df.head())
    display(discrete_variables)
    
    #feeds into next function
    widgets.interact_manual(discrete_variables_func)

def discrete_variables_func():
    global columns
    global discrete
    global continuous
    global quantileable
    global default_x
    global default_y
    global default_colour
    global default_size
    
    #Declare data types
    df[list(discrete_variables.value)] = df[list(discrete_variables.value)].astype(str) #discrete variables are strings
    
    #for 
    for non_discrete_column in df.columns.difference(list(discrete_variables.value)):
    
        try:
            df[non_discrete_column] = df[non_discrete_column].astype(float)
        except:
            raise TypeError("Error in converting the '{}' column to a float, check for non-numeric values.".format(non_discrete_column))
   
    #Partition the columns into data types
    columns = sorted(df.columns)
    discrete = [x for x in columns if df[x].dtype == object]
    continuous = [x for x in columns if x not in discrete]
    quantileable = [x for x in continuous if len(df[x].unique()) > 20]
    
    #widgets for variables 
    default_x = widgets.Select(
        options=df.columns
        ,description='Select default x-axis variable:'
        ,disabled=False
        ,style=style
        ,layout=layout    
    )
    
    default_y = widgets.Select(
        options=df.columns
        ,description='Select default y-axis variable:'
        ,disabled=False
        ,style=style
        ,layout=layout    
    )
    
    default_colour = widgets.Select(
        options=df[df.columns.difference(list(discrete_variables.value))]
        ,description='Select default colour variable:'
        ,disabled=False
        ,style=style
        ,layout=layout    
    )
    
    default_size = widgets.Select(
        options=df[df.columns.difference(list(discrete_variables.value))]
        ,description='Select default size variable:'
        ,disabled=False
        ,style=style
        ,layout=layout    
    )
    
    
    #layout dictionaries for tab structure
    layout_dict = {'Default Variables':[[default_x, default_y]
                                        ,[default_size]
                                        ,[default_colour]]
                   ,'Style':[[colour_scheme_info, colour_palette, default_bg_colour, default_plot_colour, default_c, text_colour, default_hover_colour],
                             [colour_category_number, colour_palette_reverse]
                             ,[default_transparency, hover_transparency]]
                   ,'Size':[[plot_height, plot_width]
                            ,[title_font_size, axis_label_font_size, axis_font_size]
                            ,[default_sz, min_max_circle]]
                  }

    #make tab
    tab = widgets.Tab()

    #setting tab layout
    
    #left box is a list comprehension which makes one vertical box per tab
    first_box = [widgets.VBox([sub_list for sub_list in v[0]]) for k, v in layout_dict.items()]
    
    #second box is a list comprehension which makes one vertical box per tab, takes this from the second item in each sub list, otherwise it uses empty widget label as this is the
    #closest thing to an empty widget.
    second_box = [widgets.VBox([sub_list for sub_list in v[1]]) for k, v in layout_dict.items()]
    
    #likewise for third box on the right
    third_box = [widgets.VBox([sub_list for sub_list in v[2]]) for k, v in layout_dict.items()]
    
    #each element of each box is now zipped to one horizontal box for each tab
    all_widgets = [widgets.HBox(zipped_widget, layout=layout, style=style, column_width = 'auto') for zipped_widget in list(zip(first_box, second_box, third_box))] 

    tab.children = all_widgets

    #for loop to assign the correct title to each tab
    #the index variable is needed as there is no way to numerically index python dictionaries
    index = 0
    for k, v in layout_dict.items():
        tab.set_title(index ,str(k))
        index += 1

    #display tab
    display(tab)
    
    #button = widgets.Button(description="Click Me!")
    widgets.interact_manual(show_plot)

In [99]:
#this cell contains functions which create and maintain the bokeh plot
def modify_doc(doc):
    def create_figure():
        
        #declare global variables
        global source
        global x_title
        global y_title
        global c_title
        global sz_title
        global update
        global p
        
        try:
            COLORS = all_palettes[colour_palette.value][colour_category_number.value]
        except:
            display('Colour scheme import failed, check if the number of colours is available for the scheme you\'ve chosen')
            
        if colour_palette_reverse.value == True:
            COLORS = COLORS[::-1]
        
        #dynamic variables for plotting
        xs = df[x.value].values
        ys = df[y.value].values
        x_title = x.value.title()
        y_title = y.value.title()
        c_title = colour.value.title()
        sz_title = size.value.title()

        #size
        if size.value != 'None':
            sz = scaling(df[size.value], min(min_max_circle.value), max(min_max_circle.value))
            sz_data = df[size.value].values
            
        if size.value == 'None':
            sz = np.array(default_sz.value)
            sz = np.resize(sz,(len(df),1))
            sz_data = np.array('No size selected')
            sz_data = np.resize(sz_data,(len(df),1))
            
        #colour
        if colour.value != 'None':
            groups = pd.qcut(df[colour.value].values, len(COLORS), duplicates = 'drop')
            c = [COLORS[xx] for xx in groups.codes]
            c_data = df[colour.value].values
        
        if colour.value == 'None':
            c = np.array(default_c.value)
            c = np.resize(c,(len(df),1))
            c_data = np.array('No colour selected')
            c_data = np.resize(c_data,(len(df),1))
            
        #axes range
        kw = dict()
        if x.value in discrete:
            kw['x_range'] = sorted(set(xs))
        if y.value in discrete:
            kw['y_range'] = sorted(set(ys))
        
        #title
        kw['title'] = "%s vs %s \t \t Size: %s \t \t Colour: %s" % (x_title, y_title, sz_title, c_title)
        
        #make the figure
        p = figure(plot_height=plot_height.value
                   ,plot_width=plot_width.value
                   ,tools='pan,box_zoom,reset,save'
                   ,toolbar_location="above"
                   ,sizing_mode = 'scale_width'
                   ,**kw)
        
        p.xaxis.axis_label = x_title
        p.yaxis.axis_label = y_title
        
        #axis label rotation if variable is discrete
        if x.value in discrete:
            p.xaxis.major_label_orientation = pd.np.pi / 4
        
        #ColumnDataSource
        source = ColumnDataSource(data=dict(
            xs=xs
            ,ys=ys
            ,c=c
            ,sz=sz
            ,c_data=c_data
            ,sz_data=sz_data
        ))
        
        #glyphs
        p.circle(
            x='xs'
            ,y='ys'
            ,color ='c' 
            ,size='sz'
            ,line_color = text_colour.value
            ,alpha=1
            ,hover_fill_color=default_hover_colour.value
            ,hover_line_color = text_colour.value
            ,hover_alpha=hover_transparency.value
            ,source=source
        )

        #style
        p.title.align = 'center'
        p.border_fill_color = default_bg_colour.value
        p.background_fill_color = default_plot_colour.value
        p.background_fill_alpha = default_transparency.value
        p.min_border_right = 150
        
        #text and grid lines colours
        p.title.text_color = text_colour.value
        p.xaxis.axis_label_text_color = text_colour.value
        p.yaxis.axis_label_text_color = text_colour.value
        p.xaxis.major_label_text_color = text_colour.value
        p.yaxis.major_label_text_color = text_colour.value
        p.xaxis.major_tick_line_color = text_colour.value
        p.yaxis.major_tick_line_color = text_colour.value
        p.xaxis.minor_tick_line_color = text_colour.value
        p.yaxis.minor_tick_line_color = text_colour.value
        p.xaxis.axis_line_color = text_colour.value
        p.yaxis.axis_line_color = text_colour.value
        if x.value in discrete:
            p.xaxis.group_text_color = text_colour.value
            p.xaxis.subgroup_text_color = text_colour.value
        if y.value in discrete:
            p.yaxis.group_text_color = text_colour.value
            p.yaxis.subgroup_text_color = text_colour.value
        p.xgrid.grid_line_color = text_colour.value
        p.ygrid.grid_line_color = text_colour.value
        p.outline_line_color = text_colour.value

        
        #font sizes
        p.title.text_font_size =  str(title_font_size.value) + 'pt'
        p.xaxis.axis_label_text_font_size = str(axis_label_font_size.value) + 'pt'
        p.yaxis.axis_label_text_font_size = str(axis_label_font_size.value) + 'pt'
        p.xaxis.major_label_text_font_size = str(axis_font_size.value) + 'pt'
        p.yaxis.major_label_text_font_size = str(axis_font_size.value) + 'pt'
        
        
        #hover
        hover = HoverTool(tooltips=[
            (str(x.value), '@xs')
            ,(str(y.value), '@ys')
            ,(str(colour.value), '@c_data')
            ,(str(size.value), '@sz_data')
            ,('Row number', '$index')])
        hover.formatters.use_scientific = False
        p.add_tools(hover)
        
        #colorbar
        if colour.value != 'None':
            mapper = LinearColorMapper(palette=COLORS, low=min(c_data), high=max(c_data))
            color_bar = ColorBar(color_mapper=mapper, label_standoff=15, location=(20,0)
                                 ,background_fill_color = default_bg_colour.value)
            color_bar.formatter.use_scientific = False
            color_bar.major_label_text_font_size = str(axis_font_size.value) + 'pt'
            color_bar.major_label_text_color = text_colour.value
            color_bar.major_tick_line_color = text_colour.value
            p.add_layout(color_bar, 'right')
        
        #returns p
        return p
    
    #function for linked datatable
    def datatable_func():
        t_columns = [TableColumn(field='xs', title=x_title)
                     ,TableColumn(field='ys', title=y_title)
                     ,TableColumn(field='c_data', title=c_title)
                     ,TableColumn(field='sz_data', title=sz_title)
                    ]
        data_table = DataTable(
            source=source
            ,columns=t_columns
            ,scroll_to_selection=True
            ,fit_columns=True
        )                      
        data_table = widgetbox(data_table, sizing_mode='scale_width')
        return data_table

    def update(attr, old, new):
        layout.children[1] = create_figure()
        layout.children[2] = datatable_func()
    
    #defining callback actions
    x = Select(title='X-Axis', value=default_x.value, options=columns)
    x.on_change('value', update)

    y = Select(title='Y-Axis', value=default_y.value, options=columns)
    y.on_change('value', update)

    size = Select(title='Size', value=default_size.value, options=['None'] + quantileable + continuous)
    size.on_change('value', update)

    colour = Select(title='Colour', value=default_colour.value, options=['None'] + quantileable + continuous)
    colour.on_change('value', update)

    
    #bringing it all together
    controls = row(widgetbox([x, y], width=300), widgetbox([size, colour]), height = 150)
    layout = column(controls, create_figure(), datatable_func())
    doc.add_root(layout)
    
    #--------------------------------------------------------------------------------------
    #optional commands if you want to go down the bokeh server route
    #curdoc().add_root(layout)
    #curdoc().title = title
    #--------------------------------------------------------------------------------------

In [100]:
#this cell starts up the whole process for the end user, setting up the bokeh application in the notebook and showing it to the user allowing for file import
def show_plot():    
    #Set up the application
    handler = FunctionHandler(modify_doc)
    app = Application(handler)
    
    #Show the application
    #Make sure the URL matches the port of your Jupyter instance
    show(app, notebook_url="localhost:8888")

In [101]:
display(file_path)
widgets.interact_manual(import_file)

Text(value='', description='Enter file path of csv:', layout=Layout(align_content='stretch', align_items='stre…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

<function __main__.import_file()>