> Created by Teodora Szasz, tszasz@uchicago.edu

# <h1 align="center">2. Visualize reported incidents of crime in Chicago (3) </h1>

In [1]:
# import libraries
import pandas as pd
import numpy as np

In [2]:
# import Bokeh libraries
from bokeh.io import output_file, show
from bokeh.plotting import figure, ColumnDataSource

# import ColumnDataSource from Bokeh
from bokeh.models import Circle

In [3]:
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20b

In [4]:
# categorical color mapper - 3rd phase
from bokeh.models import HoverTool

In [5]:
# create output file
output_file('crimes_categorical.html')

In [6]:
# locate the file
file = '../Dataset/Crimes_-_2016_to_present.csv'

In [7]:
# use pandas' read_csv() method
crimes = pd.read_csv(file)

In [8]:
# option to be able to see all the columns
pd.set_option('display.max_columns', None)

In [9]:
crimes.head(5)

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,10842630,JA122203,01/19/2017 06:28:15 PM,059XX S ROCKWELL ST,2022,NARCOTICS,POSS: COCAINE,APARTMENT,True,False,824,8,16.0,66,18,,,2017,08/05/2017 03:50:08 PM,,,
1,10842633,JA122240,01/19/2017 03:18:38 PM,006XX N ST LOUIS AVE,2024,NARCOTICS,POSS: HEROIN(WHITE),APARTMENT,True,False,1121,11,27.0,23,18,,,2017,08/05/2017 03:50:08 PM,,,
2,10842652,JA122480,01/20/2017 12:52:00 AM,025XX E 106TH ST,2024,NARCOTICS,POSS: HEROIN(WHITE),RESIDENCE,True,False,434,4,10.0,51,18,,,2017,08/05/2017 03:50:08 PM,,,
3,10885990,JA185288,03/13/2017 08:00:00 AM,033XX W FILLMORE ST,1812,NARCOTICS,POSS: CANNABIS MORE THAN 30GMS,POLICE FACILITY/VEH PARKING LOT,True,False,1134,11,24.0,29,18,,,2017,08/05/2017 03:50:08 PM,,,
4,10886000,JA185322,03/13/2017 08:00:00 AM,033XX W FILLMORE ST,2024,NARCOTICS,POSS: HEROIN(WHITE),POLICE FACILITY/VEH PARKING LOT,True,False,1134,11,24.0,29,18,,,2017,08/05/2017 03:50:08 PM,,,


In [10]:
# extract the District and Arrest columns
crimes_all = crimes[['District', 'Arrest']]

In [11]:
# extract only the dataset with arrests
c_d_arrest = crimes_all.loc[crimes_all['Arrest'] == True]

In [12]:
# extract only the dataset with arrests
c_d_non_arrest = crimes_all.loc[crimes_all['Arrest'] == False]

In [13]:
# as we did in previous example, count the number of arrests per district and remove the duplicates
c_d_arrest['Frequency_arrest'] = c_d_arrest.groupby('District')['District'].transform('count')
c_d_arrest_district = c_d_arrest.drop_duplicates('District')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [14]:
# as we did in previous example, count the number of arrests per district and remove the duplicates
c_d_non_arrest['Frequency_non_arrest'] = c_d_non_arrest.groupby('District')['District'].transform('count')
c_d_non_arrest_district = c_d_non_arrest.drop_duplicates('District')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [15]:
c_d_all=pd.merge(c_d_arrest_district, c_d_non_arrest_district, on='District')

In [16]:
c_d_all

Unnamed: 0,District,Arrest_x,Frequency_arrest,Arrest_y,Frequency_non_arrest
0,8,True,2532,False,14155
1,11,True,6317,False,11620
2,4,True,2778,False,11810
3,16,True,1390,False,7552
4,3,True,2167,False,10182
5,5,True,2926,False,8874
6,2,True,1643,False,9703
7,7,True,3404,False,10223
8,18,True,2149,False,12104
9,19,True,1665,False,10366


In [17]:
districts=list(set(c_d_all['District']))

In [18]:
districts_str=[str(i) for i in districts]

In [19]:
c_d_all_data = ColumnDataSource(
    data=dict(
        freq_arrests = list(c_d_all['Frequency_arrest']),
        freq_non_arrests = list(c_d_all['Frequency_non_arrest']),
        districts=districts_str))

In [20]:
# create a figure object
#plot = figure(x_axis_label='Frequency_arrest', y_axis_label='Frequency_non_arrest')

In [21]:
# create a figure object with 'tools' parameter defined - 2nd phase
plot = figure(x_axis_label='Number of Arrested Crimes', y_axis_label='Number of Non-Arrested Crimes', 
             tools = 'pan, wheel_zoom, box_zoom, reset, save',
              title = 'Arested versus Non-Arested Crimes in Chicago')

In [22]:
#generate some random colors for each district
N = len(districts)
x = np.random.random(size=N) * 100
y = np.random.random(size=N) * 100
z = np.random.random(size=N) * 100
colors = [
    "#%02x%02x%02x" % (int(r), int(g), int(b)) for r, g, b in zip(50+2*x, 30+2*y, 10+2*z)
]

In [23]:
colors

['#ea4a1c',
 '#503344',
 '#f2db3e',
 '#3cd220',
 '#e76b29',
 '#e17615',
 '#adc2ad',
 '#9dc022',
 '#9c6491',
 '#cde214',
 '#52428f',
 '#4a5d52',
 '#9f9e3f',
 '#43af46',
 '#43a5b1',
 '#e3d134',
 '#5b9e27',
 '#a14768',
 '#762412',
 '#61d75b',
 '#cda051',
 '#d032bd']

http://bokeh.pydata.org/en/latest/docs/reference/models/markers.html

In [24]:
circle=Circle(x="freq_arrests", y="freq_non_arrests", size=20, 
              fill_color=factor_cmap('districts', palette=colors, factors=districts_str), fill_alpha=0.5)

In [25]:
plot.add_glyph(c_d_all_data, circle)

In [26]:
# customize the legend - 2nd phase
plot.legend.location = 'bottom_right'
plot.legend.background_fill_color = 'lightgrey'

http://bokeh.pydata.org/en/0.9.3/docs/user_guide/tools.html#hover-tool - for 2nd phase

In [28]:
# customize the Hover tool - 2nd phase
hover = HoverTool(tooltips = 
                  [("District Number", "@districts"),
                 ("Number of Arrested Crimes", "@freq_arrests"),
                ("Number of Non Arrested Crimes", "@freq_non_arrests"),])
plot.add_tools(hover)

In [29]:
# show the result
show(plot)

In [30]:
from bokeh.io import output_notebook
output_notebook()
show(plot)