# Plotting for average daily users

### Imports

In [1]:
import os
import pandas as pd
import numpy as np

from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, FactorRange, HoverTool
from bokeh.plotting import figure, show
from bokeh.transform import dodge

from IPython.display import display_javascript, clear_output

output_notebook()

### Inits

In [2]:
file_path = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "processed", "daily_user_averages.csv"))
adu_df = pd.read_csv(file_path)

months = ('Dec', 'Jan', 'Feb', 'Mar', 'Apr')
adu_df['month'] = pd.Categorical(adu_df['month'], months)
adu_df.head(5)

Unnamed: 0,site,season,month,dayOfWeek,n,adu
0,BCC Butler,2017-2018,Apr,all,26,46.230769
1,BCC Cardiff,2017-2018,Apr,all,28,421.857143
2,BCC Mill D,2017-2018,Apr,all,24,17.541667
3,LCC Summer Road,2017-2018,Apr,all,30,123.866667
4,BCC Butler,2017-2018,Dec,all,14,38.428571


### Convert data to bokeh format

In [3]:
src = ColumnDataSource(adu_df)
src.data.keys()

dict_keys(['index', 'site', 'season', 'month', 'dayOfWeek', 'n', 'adu'])

In [4]:
seasons = '2017-2018'
temp_df = adu_df.query("site == 'LCC Our Lady' and dayOfWeek == 'all' and season in @seasons")
months = ('Dec', 'Jan', 'Feb', 'Mar', 'Apr')
adu = [0 if temp_df[temp_df['month'] == month].empty else temp_df[temp_df['month'] == month]['adu'].values[0] for month in months]
src = ColumnDataSource(data=dict(x=months, counts=adu))

In [5]:
clear_output()

# Create figure
p = figure(x_range=FactorRange(*months), plot_height=250, title='Average Daily Trailhead Users', toolbar_location=None, 
           tools="save")

# Add Bar plot
p.vbar(x='x', top='counts', width=0.9, source=src, fill_alpha = 0.75, hover_fill_alpha = 1.0)

# Add hover tool
p.add_tools(HoverTool(tooltips=[("Average Daily Users", "@counts{int}"),]))

# Title
p.title.text_font_size = '12pt'
p.title.text_font_style = 'bold'

# x-axis modifications
p.x_range.range_padding = 0
p.xgrid.grid_line_color = None
p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks
p.xaxis.major_label_orientation = 0
p.xaxis.major_label_text_font_size =  '12pt'
p.xaxis.major_label_text_font_style = 'bold'
p.xaxis.major_label_standoff = 10

# y-axis modifications
p.y_range.start = 0
p.yaxis.minor_tick_line_color = None
p.yaxis.major_label_text_font_size ='12pt'
p.yaxis.major_label_text_font_style = 'bold'

show(p)

In [6]:
def make_data_set(site, seasons, dayOfWeek, adu_df):
    """
    Set the data based on site, day of week, and seasons selected. Then sort based on
    season then month
    """
    temp_df = adu_df.query("site == @site and dayOfWeek == @dayOfWeek and season in @seasons")
   
    temp_df = temp_df.sort_values(['season'])
    temp_df.sort_values(['season', 'month'], inplace=True)
    
    return temp_df


In [247]:
def make_plot(src_df):
    """
    Generate the bokeh plot based on specific data
    """
    
    # Base colors - blues in increasing darkness
    colors = ('#0EBFE9', '#0BB5FF', '#009ACD', '#00688B', '#0D4F8B')

    # Create main figure
    p = figure(x_range=FactorRange(*months), y_range=(0, src_df['adu'].max() * 1.1), 
               plot_height=400, plot_width=800, toolbar_location='right', tools="save", 
               title="Average Daily Trailhead Users at Site: {}".format(src_df['site'].values[0]))
    
    # Determine how many seasons there are 
    seasons = sorted(src_df['season'].unique())
    n_seasons = len(seasons)
    
    # Determine column widths and offsets based on seasons [brute force cause I can't think at the moment]
    width = 0.8 / n_seasons
    if n_seasons == 1:   offsets = [0]
    elif n_seasons == 2: offsets = [-0.22, 0.22]
    elif n_seasons == 3: offsets = [-(width + 0.02), 0, (width + 0.02)]
    elif n_seasons == 4: offsets = [-0.33, -0.11, 0.11, 0.33]
    elif n_seasons == 5: offsets = [-(width*2 + 0.04), -(width + 0.02), 0, (width + 0.02), (width*2 + 0.04)]
    
    for offset, season, color in zip(offsets, seasons, colors[0:len(offsets)]):
        
        temp_df = src_df.query("season == @season")
        adu = [0 if temp_df[temp_df['month'] == month].empty 
               else temp_df[temp_df['month'] == month]['adu'].values[0] 
               for month in months]
        src = ColumnDataSource(data=dict(x=months, counts=adu))
        
        p.vbar(x=dodge('x', offset, range=p.x_range), top='counts', width=width, source=src, 
               fill_alpha=0.9, hover_fill_alpha=1.0, legend_label=season, name=season, color=color)
    
    # Add hover tool
    p.add_tools(HoverTool(tooltips=[("Season", "$name"),
                                    ("Average Daily Users", "@counts{int}")]))

    # Title
    p.title.text_font_size = '12pt'
    p.title.text_font_style = 'bold'

    # x-axis modifications
    p.x_range.range_padding = 0
    p.xgrid.grid_line_color = None
    p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
    p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks
    p.xaxis.major_label_orientation = 0
    p.xaxis.major_label_text_font_size =  '12pt'
    p.xaxis.major_label_text_font_style = 'bold'
    p.xaxis.major_label_standoff = 10

    # y-axis modifications
    p.y_range.start = 0
    p.yaxis.minor_tick_line_color = None
    p.yaxis.major_label_text_font_size ='12pt'
    p.yaxis.major_label_text_font_style = 'bold'
    
    # legend
    p.legend.location = "top_left"
    p.legend.orientation = "horizontal"

    return p

In [248]:
site = 'LCC Our Lady'
seasons = ['2017-2018', '2018-2019']
dayOfWeek = 'all'
src = make_data_set(site, seasons, dayOfWeek, adu_df)

In [249]:
clear_output()
p = make_plot(src)
show(p)