In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTERACTIVEVISUALIZATIONWITHBOKEH/INTERACTIVEPLOTS/INTERACTIVEVISUALIZATIONWITHBOKEH INTERACTIVEPLOTS 1 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 9: Load the libraries  ####

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show, output_notebook, save
from bokeh.transform import factor_cmap, factor_mark
from bokeh.layouts import column, row, gridplot
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter, GroupFilter, CDSView
import ipywidgets as widgets
from ipywidgets import interact, interact_manual




In [None]:
#=================================================-
#### Slide 10: Bokeh: simple plot  ####

# Input the sample data below.
x_values = [1, 2, 3, 4, 5, 6]
y_values = [6, 7, 2, 3, 6, 4]





In [None]:
#=================================================-
#### Slide 11: Bokeh: simple plot  ####

# Set the output method
output_notebook()

p = figure()
p.circle(x = x_values, y = y_values)
show(p)




In [None]:
#=================================================-
#### Slide 12: Bokeh: add size, color, and opacity  ####

# Create the blank plot.
p = figure(width = 400, height = 400)

# Add a circle glyph with a size, color, and alpha.
p.circle(x_values,
         y_values, 
         size = 20, 
         color = "red", 
         alpha = 0.7)
show(p)




In [None]:
#=================================================-
#### Slide 13: Bokeh: triangle glyph  ####

p = figure(width = 400, height = 400)

p.line(x_values, 
       y_values, 
       color = 'red')
       
p.triangle(x_values, 
           y_values, 
           size = 20, 
           color = "darkseagreen", 
           alpha = 0.7)
show(p)


#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################


In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTERACTIVEVISUALIZATIONWITHBOKEH/INTERACTIVEPLOTS/INTERACTIVEVISUALIZATIONWITHBOKEH INTERACTIVEPLOTS 2 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 2: Directory settings  ####

# Set 'main_dir' to location of the project folder
from pathlib import Path 
home_dir = Path(".").resolve()
main_dir = home_dir.parent.parent
print(main_dir)
data_dir = str(main_dir) + "/data"
print(data_dir)

plot_dir = str(main_dir) + "/plots"
print(plot_dir)




In [None]:
#=================================================-
#### Slide 5: Load data into Python  ####

df = pd.read_csv(str(data_dir)+"/"+ 'healthcare-dataset-stroke-data.csv')
print(df.head())




In [None]:
#=================================================-
#### Slide 6: Subset data  ####

df = df[['age', 'avg_glucose_level', 'heart_disease', 'ever_married', 'hypertension', 'Residence_type', 'gender', 'smoking_status', 'work_type', 'stroke']]
print(df.head())




In [None]:
#=================================================-
#### Slide 7: Convert target to binary  ####

# Target not binary - calculate the mean and assign the above mean to 1 and below to 0
print(df['stroke'].value_counts())




In [None]:
#=================================================-
#### Slide 10: The data at first glance  ####

# The first 3 rows.
print(df.head(3))
# The data types.
print(df.dtypes)
print(df['stroke'].value_counts())




In [None]:
#=================================================-
#### Slide 11: Data prep: label target data  ####

df['Target_class'] = np.where(df['stroke']==1, 'affected','not_affected')




In [None]:
#=================================================-
#### Slide 12: Data prep: check for NAs  ####

 # Check for NAs. 
print(df.isnull().sum())
percent_missing = df.isnull().sum() * 100 / len(df)
print(percent_missing)




In [None]:
#=================================================-
#### Slide 13: Data prep: check for NAs  ####

# Delete columns containing either 50% or more than 50% NaN Values
perc = 50.0
min_count =  int(((100-perc)/100)*df.shape[0] + 1)
df = df.dropna(axis=1, 
               thresh=min_count)
print(df.shape)
# Function to impute NA in both numeric and categorical columns
def fillna(df):
# Fill numerical columns with mean
    numerical_columns = df.select_dtypes(include=['number'])
    numerical_columns = numerical_columns.fillna(numerical_columns.mean())

    # Fill categorical columns with median
    categorical_columns = df.select_dtypes(exclude=['number'])
    categorical_columns = categorical_columns.fillna(categorical_columns.mode().iloc[0])

    # Combine the numerical and categorical columns back into the original DataFrame
    filled_df = pd.concat([numerical_columns, categorical_columns], axis=1)
    return filled_df
  
df = fillna(df)




In [None]:
#=================================================-
#### Slide 15: Use stroke data for plots  ####

p = figure(title = "age vs avg_glucose_level",
           x_axis_label = 'age',
           y_axis_label = 'avg_glucose_level',
           width = 600, height = 600)

p.diamond(df['age'],
          df['avg_glucose_level'],
          size = 20,
          color = "plum",
          alpha = 0.2)
show(p)





In [None]:
#=================================================-
#### Slide 16: vbar() and hbar()  ####

df.stroke.value_counts()
p = figure(width=400, height=400)

p.vbar(x = [0, 1], 
       width = 0.2, 
       bottom = 0,
       top = df.stroke.value_counts(),
       color = "firebrick")

show(p)




In [None]:
#=================================================-
#### Slide 17: vbar() and hbar() (cont'd)  ####

p = figure(width = 400, height = 400)

p.hbar(y = [0, 1], 
       height = 0.2, 
       left = 0,
       right = df.stroke.value_counts(), 
       color = "navy")

show(p)




In [None]:
#=================================================-
#### Slide 18: Markers for categorical data  ####

LEVELS = ['not_affected', 'affected']
MARKERS = ['hex', 'triangle']

p = figure(title = "Age vs average glucose level",
           x_axis_label = 'age',
           y_axis_label = 'avg_glucose_level')





In [None]:
#=================================================-
#### Slide 19: Markers for categorical data (cont'd)  ####

p.scatter("age", "avg_glucose_level", 
           source = df, 
           legend_label = "Target_class", 
           fill_alpha = 0.1, 
           size = 6,
           marker = factor_mark('Target_class',
                               MARKERS, 
                               LEVELS),
           color = factor_cmap('Target_class', 
                             'Category10_7', 
                              LEVELS))
show(p)


#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################


In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTERACTIVEVISUALIZATIONWITHBOKEH/INTERACTIVEPLOTS/INTERACTIVEVISUALIZATIONWITHBOKEH INTERACTIVEPLOTS 3 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 4: Laying out plots and plot tools  ####

# Set the output method
output_notebook()

tools = ["box_select", "hover", "reset"]

# create a new plot
p1 = figure(title = "age vs avg_glucose_level",
           width = 400, height = 400,
           tools = tools)

p1.xaxis.axis_label = 'age'
p1.yaxis.axis_label = 'avg_glucose_level'

p1.diamond(df['age'],
           df['avg_glucose_level'], 
           size = 20, 
           color = "plum", 
           alpha = 0.2)





In [None]:
#=================================================-
#### Slide 5: Laying out plots and widgets  ####

# Create another one.
LEVELS = ['not_affected', 'affected']
MARKERS = ['hex', 'triangle']
p2 = figure(width = 400, height = 400, tools = tools)

p2.hbar(y=[0, 1], 
        height = 0.2, 
        left = 0,
       right = df.stroke.value_counts(), 
       color = "navy")

# Create another graph.
p3 = figure(title = "Age vs average glucose level", 
            width = 400, 
            height = 400, 
            tools = tools)
p3.xaxis.axis_label = 'age'
p3.yaxis.axis_label = 'avg_glucose_level'

p3.scatter("age", "avg_glucose_level",
            source = df, 
            legend_group = "Target_class", 
            fill_alpha = 0.1, size = 12,
            marker = factor_mark('Target_class', 
                                MARKERS, LEVELS),
            color = factor_cmap('Target_class', 
                            'Category10_7', 
                             LEVELS))





In [None]:
#=================================================-
#### Slide 6: Laying out plots and widgets (cont'd)  ####

# Put the results in a column and show.
show(column(p1, p2, p3))




In [None]:
#=================================================-
#### Slide 7: Laying out plots and widgets (cont'd)  ####

# Put the results in a row.
show(row(p1, p2, p3))




In [None]:
#=================================================-
#### Slide 8: Laying out plots and widgets (cont'd)  ####

grid = gridplot([[p1, p2], 
                [None, p3]])

show(grid)




In [None]:
#=================================================-
#### Slide 9: ColumnDataSource  ####

# Import the ColumnDataSource class.
from bokeh.models import ColumnDataSource

# Convert dataframe to column data source.
src = ColumnDataSource(df)




In [None]:
#=================================================-
#### Slide 10: Customizing HoverTool  ####

# Hover tool refers to our own data field using @ and
# a position on the graph using $.
hover = HoverTool(tooltips = [('Age', '@age'),
                              ('Average glucose level', '@avg_glucose_level'),
                              ('(x,y)', '($x, $y)')])

p = figure(title = "age vs avg_glucose_level",
           width=400, height=400,
           x_axis_label = 'age',
           y_axis_label = 'avg_glucose_level')

p.diamond('age',
          'avg_glucose_level', 
          source = src, 
          size = 20, 
          color = "plum", 
          alpha = 0.2)
# Add the hover tool to the graph.
p.add_tools(hover)




In [None]:
#=================================================-
#### Slide 11: Customizing HoverTool (cont'd)  ####

show(p)




In [None]:
#=================================================-
#### Slide 12: Customizing HoverTool (cont'd)  ####

# Hover tool refers to our own data field using @ and
# a position on the graph using $.
hover = HoverTool(tooltips = [('Age', '@age'),
                          ('Average glucose level', '@avg_glucose_level'),
                          ('(x,y)', '($x, $y)')])
                          
p = figure(title = "age vs avg_glucose_level",
           width = 400, height = 400,
          x_axis_label = 'ppl_total',
          y_axis_label = 'num_adults')


p.diamond('age','avg_glucose_level', source = src, size=20, color = "plum", alpha=0.2,
         hover_fill_alpha = 1.0, hover_fill_color = 'navy')

# Add the hover tool to the graph.
p.add_tools(hover)




In [None]:
#=================================================-
#### Slide 13: Customizing HoverTool (cont'd)  ####

show(p)




In [None]:
#=================================================-
#### Slide 15: Highlighting data using HoverTool  ####

# Store the data in a ColumnDataSource.
stroke_cds = ColumnDataSource(df)
# Specify the selection tools to be made available.
select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']
# Create the figure.
fig = figure(height = 400,
             width = 600,
             x_axis_label = 'age',
             y_axis_label = 'avg_glucose_level',
             title = 'Interactive scatterplot',
             toolbar_location = 'below',
             tools = select_tools)
# Add square representing each layer.
fig.square(x = 'age',
           y = 'avg_glucose_level',
           source = df,
           color = 'royalblue',
           selection_color = 'deepskyblue',
           nonselection_color = 'lightgray',
           nonselection_alpha = 0.3)




In [None]:
#=================================================-
#### Slide 16: Customizing HoverTool  ####

# Format the tooltip.
tooltips = [
            ('Age','@age'),
            ('Average glucose level', '@avg_glucose_level')
           ]

# Add the HoverTool to the figure.
fig.add_tools(HoverTool(tooltips=tooltips))

# Visualize the graph.
show(fig)




In [None]:
#=================================================-
#### Slide 18: Customizing HoverTool (cont'd)  ####

# Store the data in a ColumnDataSource.
costa_cds = ColumnDataSource(df)
# Format the tooltip.
tooltips = [
            ('Age','@age'),
            ('Average glucose level', '@avg_glucose_level')
           ]
           
hover_glyph = fig.circle(x = 'age', y = 'avg_glucose_level', 
                         source = costa_cds,
                         size = 15, alpha = 0,
                         hover_fill_color = 'yellow', 
                         hover_alpha = 0.2)
                         
# Add the HoverTool to the figure.
fig.add_tools(HoverTool(tooltips = tooltips,  renderers = [hover_glyph]))

# Visualize the graph.
show(fig)




In [None]:
#=================================================-
#### Slide 19: Highlighting data using labels  ####

stroke_labels = ColumnDataSource(df)


# Create a view for each label.
vul_filters = [GroupFilter(column_name='Target_class', group = 'affected')]

vul_view = CDSView(source = stroke_labels,
                      filters = vul_filters)

# Create a view for each label.
nonvul_filters = [GroupFilter(column_name='Target_class', group = 'not_affected')]

nonvul_view = CDSView(source = stroke_labels,
                      filters = nonvul_filters)




In [None]:
#=================================================-
#### Slide 20: Highlighting data using labels (cont'd)  ####

# Consolidate the common keyword arguments in dictionaries.
common_figure_kwargs = {
    'width': 400,
    'height':500,
    'x_axis_label': 'age',
    'y_axis_label' : 'avg_glucose_level',
    'toolbar_location': None}
common_circle_kwargs = {
    'x': 'age',
    'y': 'avg_glucose_level',
    'source': stroke_labels,
    'size': 12,
    'alpha': 0.7,}
common_vul_kwargs = {
    'view': vul_view,
    'color': '#002859',
    'legend_label': 'affected'}
common_non_kwargs = {
    'view': nonvul_view,
    'color': '#FFC324',
    'legend_label': 'not_affected'}





In [None]:
#=================================================-
#### Slide 21: Highlighting data using labels (cont'd)  ####

hide_fig = figure(**common_figure_kwargs,
                  title = 'Click Legend to HIDE Data')
hide_fig.scatter(**common_circle_kwargs, **common_vul_kwargs)
hide_fig.scatter(**common_circle_kwargs, **common_non_kwargs)

mute_fig = figure(**common_figure_kwargs, title = 'Click Legend to MUTE Data')
mute_fig.circle(**common_circle_kwargs, **common_vul_kwargs,
                muted_alpha = 0.1)
mute_fig.circle(**common_circle_kwargs, **common_non_kwargs,
                muted_alpha = 0.1)




In [None]:
#=================================================-
#### Slide 22: Highlighting data using labels (cont'd)  ####

hide_fig.legend.click_policy = 'hide'
mute_fig.legend.click_policy = 'mute'

# Visualize the graph.
show(row(hide_fig, mute_fig))


#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################


In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTERACTIVEVISUALIZATIONWITHBOKEH/INTERACTIVEPLOTS/INTERACTIVEVISUALIZATIONWITHBOKEH INTERACTIVEPLOTS 4 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 4: Adding widgets to graphs (cont'd)  ####

@interact_manual
def scatter_plot(x = list(df.columns), y = list(df.columns)):
    p = figure(title = f'{x} vs {y}',
               x_axis_label = x,
               y_axis_label = y)
    
    p.circle(x = x,
             y = y,
             source = df,
             size=20, color = "thistle", alpha = 0.2)
    show(p)





In [None]:
#=================================================-
#### Slide 7: Saving your graph  ####

x_values = [1, 2, 3, 4, 5, 6]
y_values = [6, 7, 2, 3, 6, 4]

# Create figure.
p = figure(width = 400, height = 400)

# Add glyphs to it.
p.triangle(x_values, y_values, size = 20, color = "darkseagreen", alpha = 0.7)

# Save your plot.
output_file(plot_dir + "/bokeh-simple-plot.html", mode = 'inline')
save(p)




In [None]:
#=================================================-
#### Slide 10: Exercise   ####




#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################
