In [1]:
# Setup dependencies
import pandas as pd
import os

## Import population data
* 2011 data
* 2015 data
* 2021 data

In [2]:
# Import the population data by zip code
url_pop = "chicago_population_by_year.csv"
pop_df = pd.read_csv(url_pop, encoding='utf-8')
pop_df = pop_df.fillna(0)
pop_df.head()

Unnamed: 0,Zip Code,2011,2015,2021
0,60602,0.0,57368.0,33273.0
1,60605,75030.0,87668.0,102572.0
2,60607,79242.0,85917.0,92790.0
3,60608,35477.0,36216.0,36940.0
4,60609,34725.0,32284.0,30024.0


# 2011 Crime Data
* Import crime data
* Group by zip code
* Merge with population data
* Calculate crime rate per 100 people
* Export as a csv

In [68]:
# Import the crimes CSV data
url_crimes = "crimes_2011_clean_zc.csv"
crimes_df = pd.read_csv(url_crimes, encoding='utf-8')
crimes_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Primary Type,Description,Latitude,Longitude,Zip Code
0,0,0,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,41.747362,-87.708424,60652
1,1,3,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,41.891766,-87.766554,60644
2,2,4,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,41.895419,-87.766632,60644
3,3,6,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,41.875944,-87.743451,60644
4,4,7,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,41.987282,-87.787652,60631


In [69]:
# Group by the zip code and get a count of crimes per zip code
crimes_group_df = crimes_df.groupby(['Zip Code']).count()
crimes_group_df.reset_index(inplace=True)
crimes_group_df = crimes_group_df.rename(columns = {'index':'Zip Code'})
crimes_group_df = crimes_group_df.drop(['Unnamed: 0.1', 'Primary Type', 'Description', 'Latitude', 'Longitude'], axis=1)
crimes_group_df = crimes_group_df.rename(columns={'Unnamed: 0': 'Count of Crimes'})
crimes_group_df.head()

Unnamed: 0,Zip Code,Count of Crimes
0,46320,733
1,60018,21
2,60068,29
3,60077,3
4,60106,671


In [70]:
# Merge with the population data and clean up the data table
merge_df = crimes_group_df.merge(pop_df, left_on='Zip Code', right_on='Zip Code')
merge_df = merge_df.drop(['2015', '2021'], axis=1)
merge_df = merge_df[merge_df['2011'] != 0]
merge_df.head()

Unnamed: 0,Zip Code,Count of Crimes,2011
1,60605,2394,75030.0
2,60607,3946,79242.0
3,60608,4324,35477.0
4,60609,4936,34725.0
5,60610,3396,70268.0


In [71]:
# Calculate the crime rate per 100 people for that zip code
merge_df['Crime Rate Per 100 People'] = merge_df['Count of Crimes'] / merge_df['2011'] * 100
merge_df.head()

Unnamed: 0,Zip Code,Count of Crimes,2011,Crime Rate Per 100 People
1,60605,2394,75030.0,3.190724
2,60607,3946,79242.0,4.979682
3,60608,4324,35477.0,12.188178
4,60609,4936,34725.0,14.214543
5,60610,3396,70268.0,4.832925


In [72]:
# Save the file as a csv for further analysis
output_file = "crimes_rate_2011.csv"
merge_df.to_csv(output_file, encoding="utf-8")

# 2015 Crime Data
* Import crime data
* Group by zip code
* Merge with population data
* Calculate crime rate per 100 people
* Export as a csv

In [73]:
# Import the crimes CSV data
url_crimes = "crimes_2015_clean_zc.csv"
crimes_df = pd.read_csv(url_crimes, encoding='utf-8')
crimes_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Primary Type,Description,Latitude,Longitude,Zip Code
0,0,0,BATTERY,DOMESTIC BATTERY SIMPLE,41.815117,-87.67,60609
1,1,1,THEFT,POCKET-PICKING,41.89508,-87.7654,60644
2,2,2,NARCOTICS,POSS: HEROIN(BRN/TAN),41.937406,-87.71665,60618
3,3,3,ASSAULT,SIMPLE,41.881903,-87.755121,60644
4,4,4,BURGLARY,FORCIBLE ENTRY,41.744379,-87.658431,60620


In [74]:
# Group by the zip code and get a count of crimes per zip code
crimes_group_df = crimes_df.groupby(['Zip Code']).count()
crimes_group_df.reset_index(inplace=True)
crimes_group_df = crimes_group_df.rename(columns = {'index':'Zip Code'})
crimes_group_df = crimes_group_df.drop(['Unnamed: 0.1', 'Primary Type', 'Description', 'Latitude', 'Longitude'], axis=1)
crimes_group_df = crimes_group_df.rename(columns={'Unnamed: 0': 'Count of Crimes'})
crimes_group_df.head()

Unnamed: 0,Zip Code,Count of Crimes
0,46320,498
1,60018,23
2,60068,29
3,60106,505
4,60171,16


In [75]:
# Merge with the population data and clean up the data table
merge_df = crimes_group_df.merge(pop_df, left_on='Zip Code', right_on='Zip Code')
merge_df = merge_df.drop(['2011', '2021'], axis=1)
merge_df = merge_df[merge_df['2015'] != 0]
merge_df.head()

Unnamed: 0,Zip Code,Count of Crimes,2015
0,60602,5148,57368.0
1,60605,2341,87668.0
2,60607,3016,85917.0
3,60608,3247,36216.0
4,60609,3666,32284.0


In [76]:
# Calculate the crime rate per 100 people for that zip code
merge_df['Crime Rate Per 100 People'] = merge_df['Count of Crimes'] / merge_df['2015'] * 100
merge_df.head()

Unnamed: 0,Zip Code,Count of Crimes,2015,Crime Rate Per 100 People
0,60602,5148,57368.0,8.973644
1,60605,2341,87668.0,2.670302
2,60607,3016,85917.0,3.510365
3,60608,3247,36216.0,8.965651
4,60609,3666,32284.0,11.35547


In [77]:
# Save the file as a csv for further analysis
output_file = "crimes_rate_2015.csv"
merge_df.to_csv(output_file, encoding="utf-8")

# 2021 Crime Data
* Import crime data
* Group by zip code
* Merge with population data
* Calculate crime rate per 100 people
* Export as a csv

In [3]:
# Import the crimes CSV data
url_crimes = "crimes_2021_clean_zc.csv"
crimes_df = pd.read_csv(url_crimes, encoding='utf-8')
crimes_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Primary Type,Description,Latitude,Longitude,Zip Code
0,0,0,BATTERY,DOMESTIC BATTERY SIMPLE,41.766435,-87.635964,60621
1,1,1,THEFT,$500 AND UNDER,41.748474,-87.630607,60620
2,2,2,BATTERY,DOMESTIC BATTERY SIMPLE,41.882224,-87.766076,60644
3,3,3,NARCOTICS,POSSESS - COCAINE,41.790069,-87.654769,60621
4,4,4,HOMICIDE,FIRST DEGREE MURDER,41.771062,-87.586271,60637


In [4]:
# Group by the zip code and get a count of crimes per zip code
crimes_group_df = crimes_df.groupby(['Zip Code']).count()
crimes_group_df.reset_index(inplace=True)
crimes_group_df = crimes_group_df.rename(columns = {'index':'Zip Code'})
crimes_group_df = crimes_group_df.drop(['Unnamed: 0.1', 'Primary Type', 'Description', 'Latitude', 'Longitude'], axis=1)
crimes_group_df = crimes_group_df.rename(columns={'Unnamed: 0': 'Count of Crimes'})
crimes_group_df.head()

Unnamed: 0,Zip Code,Count of Crimes
0,46320,395
1,60007,4
2,60018,24
3,60068,113
4,60077,1


In [5]:
# Merge with the population data and clean up the data table
merge_df = crimes_group_df.merge(pop_df, left_on='Zip Code', right_on='Zip Code')
merge_df = merge_df.drop(['2011', '2015'], axis=1)
merge_df = merge_df[merge_df['2021'] != 0]
merge_df.head()

Unnamed: 0,Zip Code,Count of Crimes,2021
0,60602,2963,33273.0
1,60605,2230,102572.0
2,60607,2892,92790.0
3,60608,2654,36940.0
4,60609,2507,30024.0


In [6]:
# Calculate the crime rate per 100 people for that zip code
merge_df['Crime Rate Per 100 People'] = merge_df['Count of Crimes'] / merge_df['2021'] * 100
merge_df.head()

Unnamed: 0,Zip Code,Count of Crimes,2021,Crime Rate Per 100 People
0,60602,2963,33273.0,8.905118
1,60605,2230,102572.0,2.174083
2,60607,2892,92790.0,3.116715
3,60608,2654,36940.0,7.184624
4,60609,2507,30024.0,8.349987


In [7]:
# Save the file as a csv for further analysis
output_file = "crimes_rate_2021.csv"
merge_df.to_csv(output_file, encoding="utf-8")