# Week 8 - Visualizations with Bokeh
### 8.1 Initialization
#### 8.1.1 Import Python Libraries

In [1]:
import numpy as np
import pandas as pd
import os 
from IPython.display import display
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')

#### 8.1.2 Import Bokeh Library

In [2]:
from bokeh.models import ColumnDataSource # to trans data to bokeh data 
from bokeh.models import FactorRange
from bokeh.plotting import figure # to start a figure inctace 
from bokeh.io import show # to show bokeh figures 
from bokeh.io import output_notebook # to run the output in the notebook not html page
from bokeh.palettes import Category20
from bokeh.models import Legend
output_notebook()

### 8.2 Datasets Loading
#### 8.2.1 Add Focus Crimes List

In [3]:
focuscrimes = set(['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE',
                   'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'DRUG/NARCOTIC',
                   'TRESPASS', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT',
                   'STOLEN PROPERTY', 'DISORDERLY CONDUCT'])
focuscrimes_lst = list(focuscrimes)
focuscrimes_lst.sort()

#### 8.2.2 Import Crimes Dataset

In [4]:
fileName = 'Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv'
filePath = os.path.abspath(os.path.join(os.getcwd(), '..' ,'Datasets', fileName))

# Create raw dataframe from file using specific columns only
df_raw =  pd.read_csv(filePath, usecols=['Category', 'Date', 'Time', 'PdDistrict', 'PdId'])

#### 8.2.3 Preprocess Crime Dataframe

In [5]:
df_raw['Year'] = pd.to_datetime(df_raw['Date']).dt.year
df = df_raw[(df_raw['Year'] != 2018) & (df_raw['Year']>=2010)]
df = df[df['Category'].isin(focuscrimes_lst)].copy()

df['Hour'] = pd.to_datetime(df['Time']).dt.hour

df = df.dropna()
df = df.reset_index(drop=True)

#### 8.2.4 Extract Focus Crimes Average Counts Per Hour

In [6]:
category_count = df.groupby(['Category'])['PdId'].count().astype('float64').copy() # float64 to avoid float to int cast when div by sum later in the for loop
per_hour_count = df.groupby(['Category','Hour'])['PdId'].count().astype('float64').copy()  

plt_data = per_hour_count.unstack().T
for i,crime in enumerate(focuscrimes_lst):
    plt_data[crime] = plt_data[crime]/category_count[crime]

### 8.3 Interactive Plotting with Bokeh
#### 8.3.1 Data Preparation

In [9]:
source = ColumnDataSource(plt_data) # trans to bokeh data
x = [str(i) for i in plt_data.index] # trans the x's to string (x_range expect list of strings)

p = figure(plot_width=970,
           plot_height=500,
           x_range=FactorRange(factors=x),
           toolbar_location=None,
           tools='',
           title='Crimes per Hour',
           x_axis_label='Hours of the Day',
           y_axis_label='Relative Frequency')

#### 8.3.2 Create the Interactive Plot

In [10]:
bar ={} # to store vbars
colors = Category20[len(focuscrimes_lst)]
legend_items = []

for i,crime in enumerate(focuscrimes_lst): # start a loop to make vbars:
    bar[crime] = p.vbar(x='Hour',  
                        top=crime,
                        source= source,
                        fill_color=colors[i],
                        line_color=colors[i],
                        alpha=0.62,
                        muted_alpha=0.03,
                        muted=True,
                        width=0.75) # figure.vbar : to make vertical bars
    legend_items.append((crime, [bar[crime]]))

legend = Legend(items=legend_items, location="top")
p.add_layout(legend, 'left')

p.legend.click_policy="mute" # assigns the click policy (you can try to use ''hide')

show(p) # display plot