> Created by Teodora Szasz, tszasz@uchicago.edu

# <h1 align="center">2. Visualize reported incidents of crime in Chicago (3) </h1>

In [None]:
# import libraries
import pandas as pd

In [None]:
# import Bokeh libraries
from bokeh.io import output_file, show
from bokeh.plotting import figure

# import ColumnDataSource from Bokeh
from bokeh.models import ColumnDataSource

# categorical color mapper
from bokeh.models import CategoricalColorMapper

In [None]:
# categorical color mapper - 3rd phase
from bokeh.models import HoverTool

In [None]:
# create output file
output_file('crimes_categorical.html')

In [None]:
# locate the file
file = '../Dataset/Crimes_-_2016_to_present.csv'

In [None]:
# use pandas' read_csv() method
crimes = pd.read_csv(file)

In [None]:
# option to be able to see all the columns
pd.set_option('display.max_columns', None)

In [None]:
crimes.head(5)

In [None]:
# extract the District and Arrest columns
crimes_all = crimes[['District', 'Arrest']]

In [None]:
# extract only the dataset with arrests
c_d_arrest = crimes_all.loc[crimes_all['Arrest'] == True]

In [None]:
# extract only the dataset with arrests
c_d_non_arrest = crimes_all.loc[crimes_all['Arrest'] == False]

In [None]:
# as we did in previous example, count the number of arrests per district and remove the duplicates
c_d_arrest['Frequency_arrest'] = c_d_arrest.groupby('District')['District'].transform('count')
c_d_arrest_district = c_d_arrest.drop_duplicates('District')

In [None]:
# as we did in previous example, count the number of arrests per district and remove the duplicates
c_d_non_arrest['Frequency_non_arrest'] = c_d_non_arrest.groupby('District')['District'].transform('count')
c_d_non_arrest_district = c_d_non_arrest.drop_duplicates('District')

In [None]:
c_d_arrest_district

In [None]:
c_d_non_arrest_district.head(5)

In [None]:
c_d_all=pd.merge(c_d_arrest_district, c_d_non_arrest_district, on='District')

In [None]:
color_mapper = CategoricalColorMapper(palette=['#00FF00', '#FFD343', 'darkgray', 'brown', 'cyan', 'crimson', 'red', '#0000FF', 'purple', '#FFFF00','#808000', '#00FF00', '#FF00FF', 
                                               '#4000ff', '#00ff80', '#ff8000', '#660099', '#82E3BA', '#A3993D', '#2657AD', '#781C9E', '#F7D966'], factors=list(set(c_d_all['District'])))

In [None]:
c_d_all_data = ColumnDataSource(c_d_all)

In [None]:
# create a figure object
#plot = figure(x_axis_label='Frequency_arrest', y_axis_label='Frequency_non_arrest')

In [None]:
# create a figure object with 'tools' parameter defined - 3rd phase
plot = figure(x_axis_label='Number of Arrested Crimes', y_axis_label='Number of Non-Arrested Crimes', 
              tools = 'pan, wheel_zoom, box_zoom, reset, hover, save',
              title = 'Arested versus Non-Arested Crimes in Chicago')

In [None]:
# use diamond glyph for our figure object
plot.diamond(x='Frequency_arrest', y='Frequency_non_arrest', source=c_d_all_data, size=10, color=dict(field='District', transform=color_mapper), legend='District')

In [None]:
# customize the legend - 2nd phase
plot.legend.location = 'bottom_right'
plot.legend.background_fill_color = 'lightgrey'

http://bokeh.pydata.org/en/0.9.3/docs/user_guide/tools.html#hover-tool - for 4rd phase

In [None]:
# customize the Hover tool - 4rd phase
hover = plot.select_one(HoverTool)
hover.tooltips = [('District Number', '@District'),
                 ('Number of Arrested Crimes', '@Frequency_arrest'),
                ('Number of Non Arrested Crimes', '@Frequency_non_arrest')]

In [None]:
# show the result
show(plot)