> Created by Teodora Szasz, tszasz@uchicago.edu

# <h1 align="center">2. Visualize reported incidents of crime in Chicago (3) </h1>

In [1]:
# import libraries
import pandas as pd

In [2]:
# import Bokeh libraries
from bokeh.io import output_file, show
from bokeh.plotting import figure

# import ColumnDataSource from Bokeh
from bokeh.models import ColumnDataSource

# categorical color mapper
from bokeh.models import CategoricalColorMapper

In [3]:
# categorical color mapper - 3rd phase
from bokeh.models import HoverTool

In [4]:
# create output file
output_file('crimes_categorical.html')

In [5]:
# locate the file
file = '../Dataset/Crimes_-_2016_to_present.csv'

In [6]:
# use pandas' read_csv() method
crimes = pd.read_csv(file)

In [7]:
# option to be able to see all the columns
pd.set_option('display.max_columns', None)

In [8]:
crimes.head(5)

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,10842630,JA122203,01/19/2017 06:28:15 PM,059XX S ROCKWELL ST,2022,NARCOTICS,POSS: COCAINE,APARTMENT,True,False,824,8,16.0,66,18,,,2017,08/05/2017 03:50:08 PM,,,
1,10842633,JA122240,01/19/2017 03:18:38 PM,006XX N ST LOUIS AVE,2024,NARCOTICS,POSS: HEROIN(WHITE),APARTMENT,True,False,1121,11,27.0,23,18,,,2017,08/05/2017 03:50:08 PM,,,
2,10842652,JA122480,01/20/2017 12:52:00 AM,025XX E 106TH ST,2024,NARCOTICS,POSS: HEROIN(WHITE),RESIDENCE,True,False,434,4,10.0,51,18,,,2017,08/05/2017 03:50:08 PM,,,
3,10885990,JA185288,03/13/2017 08:00:00 AM,033XX W FILLMORE ST,1812,NARCOTICS,POSS: CANNABIS MORE THAN 30GMS,POLICE FACILITY/VEH PARKING LOT,True,False,1134,11,24.0,29,18,,,2017,08/05/2017 03:50:08 PM,,,
4,10886000,JA185322,03/13/2017 08:00:00 AM,033XX W FILLMORE ST,2024,NARCOTICS,POSS: HEROIN(WHITE),POLICE FACILITY/VEH PARKING LOT,True,False,1134,11,24.0,29,18,,,2017,08/05/2017 03:50:08 PM,,,


In [9]:
# extract the District and Arrest columns
crimes_all = crimes[['District', 'Arrest']]

In [10]:
# extract only the dataset with arrests
c_d_arrest = crimes_all.loc[crimes_all['Arrest'] == True]

In [11]:
# extract only the dataset with arrests
c_d_non_arrest = crimes_all.loc[crimes_all['Arrest'] == False]

In [12]:
# as we did in previous example, count the number of arrests per district and remove the duplicates
c_d_arrest['Frequency_arrest'] = c_d_arrest.groupby('District')['District'].transform('count')
c_d_arrest_district = c_d_arrest.drop_duplicates('District')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [13]:
# as we did in previous example, count the number of arrests per district and remove the duplicates
c_d_non_arrest['Frequency_non_arrest'] = c_d_non_arrest.groupby('District')['District'].transform('count')
c_d_non_arrest_district = c_d_non_arrest.drop_duplicates('District')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [14]:
c_d_arrest_district

Unnamed: 0,District,Arrest,Frequency_arrest
0,8,True,2532
1,11,True,6317
2,4,True,2778
13,16,True,1390
26,3,True,2167
42,5,True,2926
58,2,True,1643
60,7,True,3404
61,18,True,2149
62,19,True,1665


In [15]:
c_d_non_arrest_district.head(5)

Unnamed: 0,District,Arrest,Frequency_non_arrest
9,20,False,3687
10,24,False,6813
11,25,False,11713
14,7,False,10223
15,8,False,14155


In [16]:
c_d_all=pd.merge(c_d_arrest_district, c_d_non_arrest_district, on='District')

In [17]:
color_mapper = CategoricalColorMapper(palette=['#00FF00', '#FFD343', 'darkgray', 'brown', 'cyan', 'crimson', 'red', '#0000FF', 'purple', '#FFFF00','#808000', '#00FF00', '#FF00FF', 
                                               '#4000ff', '#00ff80', '#ff8000', '#660099', '#82E3BA', '#A3993D', '#2657AD', '#781C9E', '#F7D966'], factors=list(set(c_d_all['District'])))

In [18]:
c_d_all_data = ColumnDataSource(c_d_all)

In [19]:
# create a figure object
#plot = figure(x_axis_label='Frequency_arrest', y_axis_label='Frequency_non_arrest')

In [20]:
# create a figure object with 'tools' parameter defined - 3rd phase
plot = figure(x_axis_label='Number of Arrested Crimes', y_axis_label='Number of Non-Arrested Crimes', 
              tools = 'pan, wheel_zoom, box_zoom, reset, hover, save',
              title = 'Arested versus Non-Arested Crimes in Chicago')

In [21]:
# use diamond glyph for our figure object
plot.diamond(x='Frequency_arrest', y='Frequency_non_arrest', source=c_d_all_data, size=10, color=dict(field='District', transform=color_mapper), legend='District')

In [22]:
# customize the legend - 2nd phase
plot.legend.location = 'bottom_right'
plot.legend.background_fill_color = 'lightgrey'

http://bokeh.pydata.org/en/0.9.3/docs/user_guide/tools.html#hover-tool - for 4rd phase

In [23]:
# customize the Hover tool - 4rd phase
hover = plot.select_one(HoverTool)
hover.tooltips = [('District Number', '@District'),
                 ('Number of Arrested Crimes', '@Frequency_arrest'),
                ('Number of Non Arrested Crimes', '@Frequency_non_arrest')]

In [24]:
# show the result
show(plot)

You can access Timestamp as pandas.Timestamp
  if pd and isinstance(obj, pd.tslib.Timestamp):
