In [149]:
import pandas as pd
import altair as alt

In [150]:
# read parquet data with date info
crimes = pd.read_parquet('../data/crimes.parquet')

In [151]:
crimes

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,10351578,HY542217,2015-12-10 08:00:00,019XX S MILLER ST,1130,DECEPTIVE PRACTICE,FRAUD OR CONFIDENCE GAME,RESIDENCE,False,False,...,25.0,31.0,11,1169771.0,1890901.0,2015,2018-02-10 15:50:01,41.856134,-87.652335,"(41.856133986, -87.652334517)"
1,10351580,HY542156,2015-11-16 15:59:00,019XX W CERMAK RD,1110,DECEPTIVE PRACTICE,BOGUS CHECK,CURRENCY EXCHANGE,False,False,...,25.0,31.0,11,1163681.0,1889417.0,2015,2018-02-10 15:50:01,41.852192,-87.674730,"(41.852192114, -87.674729636)"
2,10351582,HY542092,2015-12-18 13:00:00,034XX W 77TH ST,0460,BATTERY,SIMPLE,"SCHOOL, PUBLIC, BUILDING",False,False,...,18.0,70.0,08B,1154616.0,1853337.0,2015,2018-02-10 15:50:01,41.753370,-87.708963,"(41.753369537, -87.708962817)"
3,10351583,HY542284,2015-12-18 17:50:00,040XX W HARRISON ST,2024,NARCOTICS,POSS: HEROIN(WHITE),VEHICLE NON-COMMERCIAL,True,False,...,24.0,26.0,18,1149623.0,1897057.0,2015,2018-02-10 15:50:01,41.873441,-87.726128,"(41.873441474, -87.726128458)"
4,10351584,HY542135,2015-12-18 16:55:00,071XX S BENNETT AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,...,5.0,43.0,08B,1189943.0,1857970.0,2015,2018-02-10 15:50:01,41.765306,-87.579355,"(41.765306435, -87.579354758)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7643702,12830673,JF400391,2022-09-18 00:51:00,068XX S MARSHFIELD AVE,0261,CRIMINAL SEXUAL ASSAULT,AGGRAVATED - HANDGUN,ALLEY,False,False,...,17.0,67.0,02,,,2022,2022-10-07 16:46:51,,,
7643703,12832499,JF402441,2022-09-19 15:10:00,003XX W VAN BUREN ST,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,...,25.0,32.0,07,,,2022,2022-10-07 16:46:51,,,
7643704,12835537,JF406097,2022-09-21 17:00:00,097XX S PARNELL AVE,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,...,21.0,73.0,07,,,2022,2022-10-07 16:46:51,,,
7643705,12835278,JF405774,2022-09-21 22:27:00,012XX W 111TH ST,1310,CRIMINAL DAMAGE,TO PROPERTY,GAS STATION,False,False,...,34.0,75.0,14,,,2022,2022-10-07 16:46:51,,,


In [152]:
# get crime counts by primary type
crimes_by_type = crimes.groupby('Primary Type').size().to_frame(name='Total')
crimes_by_type = crimes_by_type.reset_index().sort_values(by=['Total'], ascending=False)
crimes_by_type

Unnamed: 0,Primary Type,Total
34,THEFT,1612023
2,BATTERY,1401115
6,CRIMINAL DAMAGE,871255
19,NARCOTICS,744105
1,ASSAULT,495960
26,OTHER OFFENSE,475452
3,BURGLARY,420155
18,MOTOR VEHICLE THEFT,357647
9,DECEPTIVE PRACTICE,334363
31,ROBBERY,287001


In [153]:
# create crimes by type bar chart
alt.Chart(crimes_by_type[0:20]).mark_bar().encode(
  x={'field': 'Total', 'type': 'quantitative', 'title': 'Reports'},
  y={'field': 'Primary Type', 'type': 'nominal', 'title': 'Crime', 'sort': '-x'},
  tooltip=[
    {'field': 'Primary Type', 'type': 'nominal', 'title': 'Crime Type'},
    {'field': 'Total', 'type': 'quantitative', 'title': 'Reports'}
  ],
  color=alt.value('crimson')
).properties(title='Chicago Crime Reports by Type - 2001-2022')

In [154]:
# set Date index for the time series plots
crimes.index = crimes['Date']
crimes

Unnamed: 0_level_0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-12-10 08:00:00,10351578,HY542217,2015-12-10 08:00:00,019XX S MILLER ST,1130,DECEPTIVE PRACTICE,FRAUD OR CONFIDENCE GAME,RESIDENCE,False,False,...,25.0,31.0,11,1169771.0,1890901.0,2015,2018-02-10 15:50:01,41.856134,-87.652335,"(41.856133986, -87.652334517)"
2015-11-16 15:59:00,10351580,HY542156,2015-11-16 15:59:00,019XX W CERMAK RD,1110,DECEPTIVE PRACTICE,BOGUS CHECK,CURRENCY EXCHANGE,False,False,...,25.0,31.0,11,1163681.0,1889417.0,2015,2018-02-10 15:50:01,41.852192,-87.674730,"(41.852192114, -87.674729636)"
2015-12-18 13:00:00,10351582,HY542092,2015-12-18 13:00:00,034XX W 77TH ST,0460,BATTERY,SIMPLE,"SCHOOL, PUBLIC, BUILDING",False,False,...,18.0,70.0,08B,1154616.0,1853337.0,2015,2018-02-10 15:50:01,41.753370,-87.708963,"(41.753369537, -87.708962817)"
2015-12-18 17:50:00,10351583,HY542284,2015-12-18 17:50:00,040XX W HARRISON ST,2024,NARCOTICS,POSS: HEROIN(WHITE),VEHICLE NON-COMMERCIAL,True,False,...,24.0,26.0,18,1149623.0,1897057.0,2015,2018-02-10 15:50:01,41.873441,-87.726128,"(41.873441474, -87.726128458)"
2015-12-18 16:55:00,10351584,HY542135,2015-12-18 16:55:00,071XX S BENNETT AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,...,5.0,43.0,08B,1189943.0,1857970.0,2015,2018-02-10 15:50:01,41.765306,-87.579355,"(41.765306435, -87.579354758)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-18 00:51:00,12830673,JF400391,2022-09-18 00:51:00,068XX S MARSHFIELD AVE,0261,CRIMINAL SEXUAL ASSAULT,AGGRAVATED - HANDGUN,ALLEY,False,False,...,17.0,67.0,02,,,2022,2022-10-07 16:46:51,,,
2022-09-19 15:10:00,12832499,JF402441,2022-09-19 15:10:00,003XX W VAN BUREN ST,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,...,25.0,32.0,07,,,2022,2022-10-07 16:46:51,,,
2022-09-21 17:00:00,12835537,JF406097,2022-09-21 17:00:00,097XX S PARNELL AVE,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,...,21.0,73.0,07,,,2022,2022-10-07 16:46:51,,,
2022-09-21 22:27:00,12835278,JF405774,2022-09-21 22:27:00,012XX W 111TH ST,1310,CRIMINAL DAMAGE,TO PROPERTY,GAS STATION,False,False,...,34.0,75.0,14,,,2022,2022-10-07 16:46:51,,,


In [155]:
# get crimes by type
crimes_by_type = crimes[['Primary Type']]

# create monthly crime report counts
monthly_reports = crimes_by_type.resample('M').count()
monthly_reports.columns = ['Reports'] # rename Primary Type column
monthly_reports = monthly_reports.reset_index()
monthly_reports

Unnamed: 0,Date,Reports
0,2001-01-31,38114
1,2001-02-28,33783
2,2001-03-31,40562
3,2001-04-30,40088
4,2001-05-31,41835
...,...,...
256,2022-05-31,19925
257,2022-06-30,20444
258,2022-07-31,21912
259,2022-08-31,21809


In [156]:
# plot monthly crime reports
alt.Chart(monthly_reports).mark_line().encode(
  x='Date:T',
  y='Reports:Q',
  color=alt.value('crimson')
).properties(title='Monthly Chicago Crime Reports - 2001-2022')

In [157]:
# get arrests
arrests = crimes[crimes['Arrest'] == True]['Arrest']

# sum arrests per month
monthly_arrests = arrests.resample('M').sum().to_frame(name='Arrests')
monthly_arrests = monthly_arrests.reset_index()
monthly_arrests

Unnamed: 0,Date,Arrests
0,2001-01-31,12239
1,2001-02-28,10964
2,2001-03-31,12491
3,2001-04-30,11870
4,2001-05-31,12059
...,...,...
256,2022-05-31,2727
257,2022-06-30,2397
258,2022-07-31,2245
259,2022-08-31,2247


In [158]:
# plot monthly arrests
alt.Chart(monthly_arrests).mark_line().encode(
  x='Date:T',
  y='Arrests:Q',
  color=alt.value('crimson')
).properties(title='Monthly Chicago Arrests - 2001-2022')

In [159]:
# get domestic crime reports
domestic_reports = crimes[crimes['Domestic'] == True]['Domestic']

# sum domestic crime reports per month
monthly_domestic_reports = domestic_reports.resample('M').sum().to_frame(name='Reports')
monthly_domestic_reports = monthly_domestic_reports.reset_index()
monthly_domestic_reports

Unnamed: 0,Date,Reports
0,2001-01-31,4720
1,2001-02-28,4242
2,2001-03-31,5080
3,2001-04-30,5149
4,2001-05-31,5417
...,...,...
256,2022-05-31,3922
257,2022-06-30,3940
258,2022-07-31,3887
259,2022-08-31,3840


In [160]:
# plot monthly domestic crime reports
alt.Chart(monthly_domestic_reports).mark_line().encode(
  x='Date:T',
  y='Reports:Q',
).properties(title='Monthly Domestic Chicago Crime Reports - 2001-2022')

In [161]:
# get crime location counts
crime_locations = crimes.groupby('Location Description').size()
crime_locations = crime_locations.sort_values(
    ascending=False).rename('Reports').reset_index()

# print crime location stats
print(crime_locations.head(10))
print("...\nTotal Locations: {:,}".format(crime_locations.size))

             Location Description  Reports
0                          STREET  1984400
1                       RESIDENCE  1288019
2                       APARTMENT   853137
3                        SIDEWALK   722889
4                           OTHER   270046
5  PARKING LOT/GARAGE(NON.RESID.)   203000
6                           ALLEY   169859
7        SCHOOL, PUBLIC, BUILDING   146392
8              SMALL RETAIL STORE   143299
9                RESIDENCE-GARAGE   135544
...
Total Locations: 430


In [162]:
# plot top 20 crime locations
alt.Chart(crime_locations[0:20]).mark_bar().encode(
    x={'field': 'Reports', 'type': 'quantitative'},
    y={'field': 'Location Description', 'type': 'nominal', 'sort': '-x'},
    tooltip=['Location Description', 'Reports'],
    color=alt.value('crimson')
).properties(title='Top 20 Chicago Crime Locations - 2001-2022')

In [163]:
# load Chicago community areas with pandas
# for plotting crime by Chicago sides and community areas
community_areas = pd.read_csv('../data/chicago-community-areas.csv')
community_areas.head()

# get community crime stats
community_areas['Reports'] = crimes.groupby(
    'Community Area').size().rename('Reports')
community_crime = community_areas.sort_values(
    by='Reports', ascending=False).dropna()

# print community crime stats
print(community_crime.head(10))
print("...\nTotal Communities: {:,}".format(community_crime.Reports.count()))

    CommunityArea    CommunityName                Side  Reports
25             25           Austin           West Side   441438
8               8  Near North Side             Central   246938
43             43      South Shore          South Side   231521
23             23    Humboldt Park           West Side   220210
28             28   Near West Side           West Side   211301
24             24        West Town           West Side   205859
29             29   North Lawndale           West Side   205808
67             67   West Englewood      Southwest Side   202499
71             71   Auburn Gresham  Far Southwest Side   199438
49             49         Roseland  Far Southeast Side   187192
...
Total Communities: 77


In [164]:
# drop unused columns and reindex
community_crime = community_crime.drop(
    ['CommunityArea', 'Side'], axis=1)  # denotes column
community_crime.head(10)

Unnamed: 0,CommunityName,Reports
25,Austin,441438
8,Near North Side,246938
43,South Shore,231521
23,Humboldt Park,220210
28,Near West Side,211301
24,West Town,205859
29,North Lawndale,205808
67,West Englewood,202499
71,Auburn Gresham,199438
49,Roseland,187192


In [165]:
# plot 20 high crime communities
alt.Chart(community_crime[0:20]).mark_bar().encode(
  x={'field': 'Reports', 'type': 'quantitative'},
  y={'field': 'CommunityName', 'type': 'nominal', 'title': 'Community', 'sort': '-x'},
  tooltip=['CommunityName', 'Reports'],
  color=alt.value('crimson')
).properties(title='High Crime Chicago Communities - 2001-2022')

In [166]:
# plot 20 low crime communities
alt.Chart(community_crime.tail(20)).mark_bar().encode(
  x={'field': 'Reports', 'type': 'quantitative'},
  y={'field': 'CommunityName', 'type': 'nominal', 'title': 'Community', 'sort': 'x'},
  tooltip=['CommunityName', 'Reports']
).properties(title='Low Crime Chicago Communities - 2001-2022')

In [173]:
# group crime totals by Chicago sides
crimes_by_side = community_areas.groupby('Side').sum()\
    .drop('CommunityArea', axis=1)  # denotes column
crimes_by_side.sort_values(by='Reports', ascending=False)
crimes_by_side.reset_index()

Unnamed: 0,Side,Reports
0,Central,474346
1,Far North Side,628196
2,Far Southeast Side,827899
3,Far Southwest Side,444103
4,North Side,503479
5,Northwest Side,399135
6,South Side,965119
7,Southwest Side,1051006
8,West Side,1736870


In [183]:
# get narcotics crimes
narcotics = crimes[crimes['Primary Type'] == 'NARCOTICS']
#print(narcotics.head())

# get narcotics crime description counts
narcotics_crimes = narcotics[['Primary Type', 'Description']]\
    .groupby('Description').count()\
    .sort_values(by='Primary Type', ascending=False)
narcotics_crimes.columns = ['Reports']  # rename Primary Type column
narcotics_crimes = narcotics_crimes.reset_index()

print(narcotics_crimes.head(10))
print('...')
print('Total Narcotics Crime Descriptions: {}'.format(len(narcotics_crimes)))

                      Description  Reports
0    POSS: CANNABIS 30GMS OR LESS   278139
1                     POSS: CRACK   121112
2             POSS: HEROIN(WHITE)    95786
3  SOLICIT NARCOTICS ON PUBLICWAY    27854
4    MANU/DELIVER: HEROIN (WHITE)    26446
5                   POSS: COCAINE    25470
6              MANU/DELIVER:CRACK    24949
7    ATTEMPT POSSESSION NARCOTICS    21366
8         FOUND SUSPECT NARCOTICS    19502
9  POSS: CANNABIS MORE THAN 30GMS    19092
...
Total Narcotics Crime Descriptions: 80


In [184]:
# plot top 20 narcotics crime reports by description
alt.Chart(narcotics_crimes[0:20]).mark_bar().encode(
    x={'field': 'Reports', 'type': 'quantitative'},
    y={'field': 'Description', 'type': 'nominal', 'sort': '-x'},
    tooltip=['Description', 'Reports'],
    color=alt.value('crimson')
).properties(title='Top 20 Chicago Narcotics Crime Reports - 2001-2022')

In [188]:
# get other offenses crimes
other_offenses = crimes[crimes['Primary Type'] == 'OTHER OFFENSE']
#print(other_offenses.head())

# get other offense crime description counts
other_offense_crimes = other_offenses[['Primary Type', 'Description']]\
    .groupby('Description').count()\
    .sort_values(by='Primary Type', ascending=False)
other_offense_crimes.columns = ['Reports']  # rename Primary Type column
other_offense_crimes = other_offense_crimes.reset_index()

print(other_offense_crimes.head(10))
print('...')
print('Total Other Offense Crime Descriptions: {}'.format(len(other_offense_crimes)))

                      Description  Reports
0                TELEPHONE THREAT   148121
1         HARASSMENT BY TELEPHONE   109798
2           OTHER VEHICLE OFFENSE    40624
3     VIOLATE ORDER OF PROTECTION    39829
4  HARASSMENT BY ELECTRONIC MEANS    33313
5        FALSE/STOLEN/ALTERED TRP    21866
6  OTHER CRIME INVOLVING PROPERTY    12499
7      OTHER CRIME AGAINST PERSON     8657
8       VEHICLE TITLE/REG OFFENSE     8504
9               LICENSE VIOLATION     8366
...
Total Other Offense Crime Descriptions: 63


In [190]:
# plot top 20 other offense crime reports by description
alt.Chart(other_offense_crimes[0:20]).mark_bar().encode(
  x={'field': 'Reports', 'type': 'quantitative'},
  y={'field': 'Description', 'type': 'nominal', 'sort': '-x'},
  tooltip=['Description', 'Reports'],
  color=alt.value('crimson')
).properties(title='Top 20 Other Offense Chicago Crime Reports - 2001-2022')