In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

import folium
from folium.plugins import HeatMap

In [8]:
cd Chicago_Crime

/Users/connorheraty/Desktop/Chicago_Crime


In [None]:
# Load csv locally
df = pd.read_csv('Crimes_-_2001_to_present.csv')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
# Drop all rows not used in analysis
df = df.drop(['ID',
              'Case Number',
              'Block',
              'IUCR',
              'FBI Code',
              'Updated On',
              'Location',
              'Beat',
              'X Coordinate',
              'Y Coordinate',
              'Ward',
              'Domestic',
              'District',
              'Location Description',
              'Arrest'], axis=1)

df = df.rename({'Primary Type' : 'crime_type'}, axis=1)

In [None]:
df.head()

In [None]:
# Create 3 seperate dataframes for 2002, 2010, 2018 for analysis
_2010_df = df[df['Year'] == 2010]
_2010_df = _2010_df.reset_index()

_2002_df = df[df['Year'] == 2002]
_2002_df = _2002_df.reset_index()

_2018_df = df[df['Year'] == 2018]
_2018_df = _2018_df.reset_index()

In [None]:
# Create groupby tables for each dataframe totaling crimes by crimetype
_2018_group = (_2018_df
             .groupby('crime_type')
             .crime_type.agg(['count'])
             .rename({'crime_type':'Crime'}, axis=1))

_2018_group = pd.DataFrame(_2018_group)
_2018_crime_type = _2018_group.reset_index()
_2018_crime_type = _2018_crime_type.sort_values('count', ascending=False)

_2010_group = (_2010_df
             .groupby('crime_type')
             .crime_type.agg(['count'])
             .rename({'crime_type':'Crime'}, axis=1))

_2010_group = pd.DataFrame(_2010_group)
_2010_crime_type = _2010_group.reset_index()
_2010_crime_type = _2010_crime_type.sort_values('count', ascending=False)

_2002_group = (_2002_df
             .groupby('crime_type')
             .crime_type.agg(['count'])
             .rename({'crime_type':'Crime'}, axis=1))

_2002_group = pd.DataFrame(_2002_group)
_2002_crime_type = _2002_group.reset_index()
_2002_crime_type = _2002_crime_type.sort_values('count', ascending=False)

In [None]:
# Merge, rename, and sort maain table
df_test = pd.merge(_2018_crime_type, _2002_crime_type, on = 'crime_type')
df_test = pd.merge(df_test, _2010_crime_type, on='crime_type')
df_top10 = df_test.rename({'count_x' : '2018', 'count_y' : '2002', 'count' : '2010'}, axis=1).head(10)
df_top10 = df_top10.sort_values('2002', ascending=False)
df_top10

In [None]:
plt.style.use('fivethirtyeight')

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(30,15))


n_groups = 10
index = np.arange(n_groups)
width = .35

rects1 = ax.bar(index-width/1.5, df_top10['2002'], width,
               color='SkyBlue', label='2002')
rects2 = ax.bar(index, df_top10['2010'], width,
               color='White', label='2010')
rects2 = ax.bar(index+width/1.5, df_top10['2018'], width,
               color='IndianRed', label='2018')


ax.set_ylabel('Number of Crimes', fontsize=30)
ax.set_yticks([0, 20000, 40000, 60000, 80000, 100000])
ax.set_yticklabels(['0', '20,000','40,000', '60,000', '80,000', '100,000'],
                   fontsize=20)

ax.set_xticks(index)
ax.set_xticklabels(['Theft', 'Battery', 'Criminal Damage', 'Narcotics', 'Other Offense',
                    'Assault', 'Burglary', 'Motor Vehicle Theft', 'Robbery', 'Deceptive Practice'],
                  fontsize=20)

ax.legend(fontsize='xx-large')

ax.text(x = -1.2, y = 110000,
    s ="What crimes are being committed in Chicago?",
    fontsize = 40, weight = 'bold', alpha=.75)

ax.text(x = -1.2, y = 105000,
    s ="Top 10 List",
    fontsize = 30, alpha=.75)



#Add informational bar at bottom of graph
ax.text(x = -1.6, y = -7000, 
    s = '    connorpheraty.github.io                                                                            Source: Chicago Data Portal   ',
    fontsize = 25, color = '#f0f0f0', backgroundcolor = '#585859', family = 'monospace')

ax.set_facecolor('#919191')

plt.show()

In [None]:
#------------------------------------------ Moving on -----------------------------------------------#

In [None]:
# Database containing only possession offenses of Cannabis for 30 grams or less
df_marij_main = df[df['Description'] == 'POSS: CANNABIS 30GMS OR LESS']

# Database containing all other narcotics offenses
df_narc = df[df['crime_type'] == 'NARCOTICS']
df_narc_main = df_narc[df_narc['Description'] != 'POSS: CANNABIS 30GMS OR LESS']

In [None]:
# Create 
df_narc = df_narc_main.groupby('Year')['crime_type'].count()
df_narc = pd.DataFrame(df_narc)
df_narc = df_narc.reset_index()

df_marij = df_marij_main.groupby('Year')['crime_type'].count()
df_marij = pd.DataFrame(df_marij)
df_marij = df_marij.reset_index()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(30,15))


n_groups = 19
index = np.arange(n_groups)
width = .35

rects1 = ax.bar(index - width/2, df_narc['crime_type'], width,
               color='IndianRed')
rects2 = ax.bar(index + width/2, df_marij['crime_type'], width,
               color='SkyBlue')


ax.set_ylabel('Number of Offenses', fontsize=30)
ax.set_yticks([0, 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000])
ax.set_yticklabels(['0', '5,000','10,000', '15,000', '20,000', '25,000',
                    '30,000', '35,000', '40,000'],
                   fontsize=20)

ax.set_xticks(index)
ax.set_xticklabels(['2001', '2002', '2003', '2004', '2005',
                    '2006', '2007', '2008', '2009', '2010',
                    '2011', '2012', '2013', '2014', '2015',
                    '2016', '2017', '2018', '2019'], fontsize=20)

ax.legend((rects1[0], rects2[0]), ('Other Narcotics Offenses', 'Marijuana Offenses'), fontsize = 'xx-large')

ax.text(x = -2, y = 45000,
    s ="An Armistice in Chicago's Drug War",
    fontsize = 40, weight = 'bold', alpha=.75)

ax.text(x = -2, y = 43000,
    s ="The Windy City has seen drug offenses decrease year over year since 2007",
    fontsize = 30, alpha=.75)



#Add informational bar at bottom of graph
ax.text(x = -2.5, y = -2500, 
    s = '      connorpheraty.github.io                                                                         Source: Chicago Data Portal   ',
    fontsize = 25, color = '#f0f0f0', backgroundcolor = '#585859', family ='monospace')

plt.show()

In [None]:
#------------------------------------------ Moving on -----------------------------------------------#

In [None]:
df_marij_2002 = df_marij_main[df_marij_main['Year'] == 2002]
df_marij_2018 = df_marij_main[df_marij_main['Year'] == 2018]

df_narc_2002 = df_narc_main[df_narc_main['Year'] == 2002]
df_narc_2018 = df_narc_main[df_narc_main['Year'] == 2018]

In [None]:
# Function to generate folium map
def generateBaseMap(default_location=[41.896494, -87.690561], default_zoom_start=12):
    base_map = folium.Map(location = default_location, control_scale=True, zoom_start= default_zoom_start)
    return base_map

In [None]:
# Running folium function and formatting background
base_map = generateBaseMap()
folium.TileLayer('cartodbpositron').add_to(base_map)

In [None]:
# Plotting 'Non-Marijuana' possession offenses for 2002
df_copy = df_narc_2002.copy()
df_copy['count'] = 1
base_map = generateBaseMap()
folium.TileLayer('cartodbpositron').add_to(base_map)
HeatMap(data=df_copy[['Latitude', 'Longitude', 'count']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(),
       radius=8, max_zoom=13).add_to(base_map)

base_map

In [None]:
# Plotting 'Non-Marijuana' possession offenses for 2018
df_copy = df_narc_2018.copy()
df_copy['count'] = 1
base_map = generateBaseMap()
folium.TileLayer('cartodbpositron').add_to(base_map)
HeatMap(data=df_copy[['Latitude', 'Longitude', 'count']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(),
       radius=8, max_zoom=13).add_to(base_map)

base_map

In [None]:
# Plotting Marijuana possession offenses for 2002
df_copy = df_marij_2002.copy()
df_copy['count'] = 1
base_map = generateBaseMap()
folium.TileLayer('cartodbpositron').add_to(base_map)
HeatMap(data=df_copy[['Latitude', 'Longitude', 'count']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(),
       radius=8, max_zoom=13).add_to(base_map)

base_map

In [None]:
# Plotting Marijuana possession offenses for 2018
df_copy = df_marij_2018.copy()
df_copy['count'] = 1
base_map = generateBaseMap()
folium.TileLayer('cartodbpositron').add_to(base_map)
HeatMap(data=df_copy[['Latitude', 'Longitude', 'count']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(),
       radius=8, max_zoom=13).add_to(base_map)

base_map

In [None]:
#------------------------------------------ Moving on -----------------------------------------------#

In [None]:
# Dictionary containing neighborhood name, neighborhood location, and 2010 population

chi_dict = {
    '1.0': ('Rogers Park', 'Far North Side', 54991),
    '2.0': ('West Ridge', 'Far North Side', 71942),
    '3.0': ('Uptown', 'Far North Side', 56362),
    '4.0': ('Lincoln Square', 'Far North Side', 39493),
    '5.0': ('North Center', 'Far North Side', 31867),
    '6.0': ('Lake View', 'North Side', 94368),
    '7.0': ('Lincoln Park', 'North Side', 64116),
    '8.0': ('Near North Side', 'Central', 80484),
    '9.0': ('Edison Park', 'Far North Side', 11187),
    '10.0': ('Norwood Park', 'Far North Side', 37023),
    '11.0': ('Jefferson Park','Far North Side', 25448),
    '12.0': ('Forest Glen', 'Far North Side', 18508),
    '13.0': ('North Park', 'Far North Side', 17931),
    '14.0': ('Albany Park', 'Far North Side', 51542),
    '15.0': ('Portage Park', 'Northwest Side', 64124),
    '16.0': ('Irving Park', 'Northwest Side', 53359),
    '17.0': ('Dunning', 'Northwest Side', 41932),
    '18.0': ('Montclare', 'Northwest Side', 13426),
    '19.0': ('Belmont Cragin', 'Northwest Side', 78743),
    '20.0': ('Hermosa', 'Northwest Side', 25010),
    '21.0': ('Avondale', 'North Side', 39262),
    '22.0': ('Logan Square', 'North Side', 73595),
    '23.0': ('Humboldt Park', 'West Side', 56323),
    '24.0': ('West Town', 'West Side', 81432),
    '25.0': ('Austin', 'West Side', 98514),
    '26.0': ('W. Garfield Park', 'West Side', 18001),
    '27.0': ('E. Garfield Park', 'West Side', 20567),
    '28.0': ('Near West Side', 'West Side', 54881),
    '29.0': ('North Lawndale', 'West Side', 35912),
    '30.0': ('South Lawndale', 'West Side', 79288),
    '31.0': ('Lower West Side', 'West Side', 35769),
    '32.0': ('The Loop', 'Central', 29283),
    '33.0': ('Near South Side', 'Central', 21390),
    '34.0': ('Armour Square', 'South Side', 13391),
    '35.0': ('Douglas', 'South Side', 18238),
    '36.0': ('Oakland', 'South Side', 5918),
    '37.0': ('Fuller Park', 'South Side', 2876),
    '38.0': ('Grand Boulevard', 'South Side', 21929),
    '39.0': ('Kenwood', 'South Side', 17841),
    '40.0': ('Washington Park', 'South Side', 11717),
    '41.0': ('Hyde Park', 'South Side', 25681),
    '42.0': ('Woodlawn', 'South Side', 25983),
    '43.0': ('South Shore', 'South Side', 49767),
    '44.0': ('Chatham', 'Far Southeast Side', 31028),
    '45.0': ('Avalon Park', 'Far Southeast Side', 10185),
    '46.0': ('South Chicago', 'Far Southeast Side', 31198),
    '47.0': ('Burnside', 'Far Southeast Side', 2916),
    '48.0': ('Calumet Heights', 'Far Southeast Side', 13812),
    '49.0': ('Roseland', 'Far Southeast Side', 44619),
    '50.0': ('Pullman', 'Far Southeast Side', 7325),
    '51.0': ('South Deering', 'Far Southeast Side', 15109),
    '52.0': ('East Side', 'Far Southeast Side', 23042),
    '53.0': ('West Pullman', 'Far Southeast Side', 29651),
    '54.0': ('Riverdale', 'Far Southeast Side', 6482),
    '55.0': ('Hegeswisch', 'Far Southeast Side', 9426),
    '56.0': ('Garfield Ridge', 'Southwest Side', 34513),
    '57.0': ('Archer Heights', 'Southwest Side', 13393),
    '58.0': ('Brighton Park', 'Southwest Side', 45368),
    '59.0': ('McKinley Park', 'Southwest Side', 15612),
    '60.0': ('Bridgeport', 'South Side', 31977),
    '61.0': ('New City', 'Southwest Side', 44377),
    '62.0': ('West Eisdon', 'Southwest Side', 18109),
    '63.0': ('Gage Park', 'Southwest Side', 39894),
    '64.0': ('Clearing', 'Southwest Side', 23139),
    '65.0': ('West Lawn', 'Southwest Side', 33355),
    '66.0': ('Chicago Lawn', 'Southwest Side', 55628),
    '67.0': ('West Englewood', 'Southwest Side', 35505),
    '68.0': ('Englewood', 'Southwest Side', 30654),
    '69.0': ('Greater Grand Crossing', 'South Side', 32602),
    '70.0': ('Ashburn', 'Far Southwest Side', 41081),
    '71.0': ('Auburn Gresham', 'Far Southwest Side', 48743),
    '72.0': ('Beverly', 'Far Southwest Side', 20034),
    '73.0': ('Washington Heights', 'Far Southwest Side', 26493),
    '74.0': ('Mount Greenwood', 'Far Southwest Side', 19093),
    '75.0': ('Morgan Park', 'Far Southwest Side', 22544),
    '76.0': ("O'Hare", 'Far North Side', 12756)
    
}

In [None]:
# Add column of 1's for counting 
df_marij_main['Count'] = 1

In [None]:
#Eliminate years 2001 and 2019 (incomplete)
df_marij_main = df_marij_main[df_marij_main['Year'] != 2001]
df_marij_main = df_marij_main[df_marij_main['Year'] != 2019]

In [None]:
# Groupby dataframe for Community area and Year
df_test = df_marij_main.groupby(['Community Area', 'Year']).Count.agg('sum')

In [None]:
# Apply chi_dict to community area values
df_test = pd.DataFrame(df_test)
df_test = df_test.reset_index()
df_test['Community Area'] = df_test['Community Area'].astype(str)
df_test['Community'] = df_test['Community Area'].map(chi_dict)
df_test[['Neighborhood', 'Side', 'Population']] = df_test['Community'].apply(pd.Series) #unpack tuple
df_test = df_test.drop('Community', axis=1)

In [None]:
# Eliminate community area 0's
df_test = df_test[2:]

In [None]:
# Create column showing incidences per 100,000 residents (per capita)
df_test['PerCapita'] = (df_test['Count'] / df_test['Population']) * 100000

In [None]:
# Groupby dataframe on geographical location ('side')
df_group = df_test.groupby(['Side', 'Year']).PerCapita.agg(['sum'])
df_group = df_group.reset_index()
df_group

In [None]:
#Creating individual dataframes for each region to plot

df_group_central = df_group[df_group['Side'] == 'Central']
df_group_far_north = df_group[df_group['Side'] == 'Far North Side']
df_group_north = df_group[df_group['Side'] == 'North Side']
df_group_northwest = df_group[df_group['Side'] == 'Northwest Side']
df_group_south = df_group[df_group['Side'] == 'South Side']
df_group_southwest = df_group[df_group['Side'] == 'Southwest Side']
df_group_west = df_group[df_group['Side'] == 'West Side']
df_group_far_southeast = df_group[df_group['Side'] == 'Far Southeast Side']
df_group_far_southwest = df_group[df_group['Side'] == 'Far Southwest Side']

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(30,15))

n_groups = 17
index = np.arange(n_groups)

#Each individual lineplot
central = ax.plot(df_group_central['Year'], df_group_central['sum'], color = 'Green')
northwest = ax.plot(df_group_northwest['Year'], df_group_northwest['sum'], color = '#6141f4')
north = ax.plot(df_group_north['Year'], df_group_north['sum'], color = 'Blue')
west = ax.plot(df_group_west['Year'], df_group_west['sum'], linewidth=8, color ='#581845')
far_north = ax.plot(df_group_far_north['Year'], df_group_far_north['sum'], color = '#D8E0BB')
south = ax.plot(df_group_south['Year'], df_group_south['sum'], linewidth = 8, color = '#900C3F')
southwest = ax.plot(df_group_southwest['Year'], df_group_southwest['sum'], linewidth = 8, color= '#FF5733')
far_southwest = ax.plot(df_group_far_southwest['Year'], df_group_far_southwest['sum'], linewidth = 8, color = '#FFC300')
far_southeast = ax.plot(df_group_far_southeast['Year'], df_group_far_southeast['sum'], linewidth = 8, color= '#C70039')



# Labeling
ax.set_ylabel('Marijuana Offenses (per 100,000 inhabitants)', fontsize=30)
ax.set_yticks([0, 4000, 8000, 12000, 16000, 20000])
ax.set_yticklabels(['0', '4,000','8,000', '12,000', '16,000', '20,000'], fontsize=20)
ax.set_xticklabels(['2000', '2002', '2004',
                    '2006', '2008', '2010',
                    '2012', '2014',
                    '2016', '2018'], fontsize=20)


# Legend
ax.legend((west[0], south[0], far_southeast[0],
           southwest[0], far_southwest[0], far_north[0],
           central[0], northwest[0], north[0]),
          ('West Side', 'South Side', 'Far Southeast Side',
           'Southwest Side', 'Far Southwest Side', 'Far North Side',
           'Central Side', 'Northwest Side', 'North Side'), fontsize = 'xx-large')

# Descriptive Text
ax.text(x = 2000.5, y = 22500,
    s ="An Armistice in Chicago's Drug War",
    fontsize = 40, weight = 'bold', alpha=.75)

ax.text(x = 2000.5, y = 21500,
    s ="Marijuana Offenses per Capita",
    fontsize = 30, alpha=.75)



#Informational bar at bottom of graph
ax.text(x = 2000, y = -2500, 
    s = '   connorpheraty.github.io                                                                              Source: Chicago Data Portal   ',
    fontsize = 25, color = '#f0f0f0', backgroundcolor = '#585859', family ='monospace')

plt.show()