# Statistics on crime in the Chicago area

## This is an exploration of when, where, and what crime most occurs and if it can be predicted with any accuracy based on the features we have given it

In [39]:
import pandas as pd
import seaborn as sns
import matplotlib as plt
import numpy as np
#using this to sort the dictionaries by value
import operator
from scipy.stats import trim_mean, kurtosis
from scipy.stats.mstats import mode, gmean, hmean
pd.set_option('display.float_format', lambda x: '%.5f' % x)
%matplotlib inline
np.random.seed(42)
%cd crimes


[Errno 2] No such file or directory: 'crimes'
/Users/donovanadams/Desktop/GitHub/DS-3-Deep-Learning/notebooks/crimes


## Loading previously cleaned data

In [7]:
crime_df= pd.read_csv('CleanedCrimes.csv')
crime_df =crime_df.drop(['Unnamed: 0'], axis=1)

In [8]:
crime_df

Unnamed: 0,Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,Beat,Year,Location
0,2008-10-07 12:39:00,75TH ST,HOMICIDE,FIRST DEGREE MURDER,ALLEY,True,False,323,2008,"(41.758275857, -87.622451031)"
1,2008-10-09 03:30:00,POLK ST,HOMICIDE,FIRST DEGREE MURDER,STREET,True,False,1533,2008,"(41.87025207, -87.746069362)"
2,2008-10-09 08:35:00,MANN DR,HOMICIDE,FIRST DEGREE MURDER,PARK PROPERTY,False,False,831,2008,"(41.770990476, -87.698901469)"
3,2008-10-10 02:33:00,CHICAGO AVE,HOMICIDE,FIRST DEGREE MURDER,RESTAURANT,False,False,1524,2008,"(41.894916924, -87.757358147)"
4,2008-10-10 12:50:00,HOMAN AVE,HOMICIDE,FIRST DEGREE MURDER,GARAGE,False,True,1032,2008,"(41.843826272, -87.709893465)"
5,2008-10-10 08:32:00,14TH ST,HOMICIDE,FIRST DEGREE MURDER,STREET,True,True,1231,2008,"(41.863318307, -87.664923682)"
6,2008-10-11 12:55:00,23RD ST,HOMICIDE,FIRST DEGREE MURDER,STREET,True,False,1034,2008,"(41.850350125, -87.676543351)"
7,2008-10-11 10:25:00,PAXTON AVE,HOMICIDE,FIRST DEGREE MURDER,STREET,False,False,331,2008,"(41.769005966, -87.571485086)"
8,2008-10-11 10:00:00,SAWYER AVE,HOMICIDE,FIRST DEGREE MURDER,PORCH,False,False,823,2008,"(41.783861768, -87.704490821)"
9,2008-10-12 05:47:00,51ST ST,HOMICIDE,FIRST DEGREE MURDER,ALLEY,False,False,911,2008,"(41.801124416, -87.685910818)"


### Getting the shape of the data

In [94]:
crime_df.shape
print('The cleaned data set has {} number of total incedents and {} features for each incedent'.format(crime_df.shape[0],crime_df.shape[1]))

The cleaned data set has 7941282 number of total incedents and 10 features for each incedent


## Statistical Analysis of Individual features of the categorical values

### Multiclass classification

##### The top 10 types of crimes reported

In [83]:
for crime,count in crime_df['Primary Type'].value_counts().head(n=10).iteritems():
    print("The type of crime known as {}  has a total number incidents count of {}".format(crime, count))

The type of crime known as THEFT  has a total number incidents count of 1640506
The type of crime known as BATTERY  has a total number incidents count of 1442716
The type of crime known as CRIMINAL DAMAGE  has a total number incidents count of 923000
The type of crime known as NARCOTICS  has a total number incidents count of 885431
The type of crime known as OTHER OFFENSE  has a total number incidents count of 491922
The type of crime known as ASSAULT  has a total number incidents count of 481661
The type of crime known as BURGLARY  has a total number incidents count of 470958
The type of crime known as MOTOR VEHICLE THEFT  has a total number incidents count of 370548
The type of crime known as ROBBERY  has a total number incidents count of 300453
The type of crime known as DECEPTIVE PRACTICE  has a total number incidents count of 280931


#### The top 10 locations (such as restaurant, residence, but not a physical address) where crime occured

In [86]:
for location,count in crime_df['Location Description'].value_counts().head(n=10).iteritems():
    print("Crimes that happened on/in the {}  has/have a total number of incidents of {}".format(location, count))

Crimes that happened on/in the STREET  has/have a total number of incidents of 2101842
Crimes that happened on/in the RESIDENCE  has/have a total number of incidents of 1341749
Crimes that happened on/in the SIDEWALK  has/have a total number of incidents of 815595
Crimes that happened on/in the APARTMENT  has/have a total number of incidents of 812512
Crimes that happened on/in the OTHER  has/have a total number of incidents of 294286
Crimes that happened on/in the PARKING LOT/GARAGE(NON.RESID.)  has/have a total number of incidents of 225454
Crimes that happened on/in the ALLEY  has/have a total number of incidents of 180155
Crimes that happened on/in the SCHOOL, PUBLIC, BUILDING  has/have a total number of incidents of 173750
Crimes that happened on/in the RESIDENCE-GARAGE  has/have a total number of incidents of 158550
Crimes that happened on/in the RESIDENCE PORCH/HALLWAY  has/have a total number of incidents of 138492


#### The top 10 streets where crime occured on

In [81]:
for location,count in crime_df['Block'].value_counts().head(n=10).iteritems():
    print("The street known as {}  has a total number of incidents of {}".format(location, count))

The street known as STATE ST  has a total number of incidents of 131933
The street known as MICHIGAN AVE  has a total number of incidents of 104502
The street known as HALSTED ST  has a total number of incidents of 95625
The street known as ASHLAND AVE  has a total number of incidents of 88620
The street known as WESTERN AVE  has a total number of incidents of 76505
The street known as MADISON ST  has a total number of incidents of 73717
The street known as PULASKI RD  has a total number of incidents of 70749
The street known as DR MARTIN LUTHER KING JR DR  has a total number of incidents of 63924
The street known as CLARK ST  has a total number of incidents of 63579
The street known as KEDZIE AVE  has a total number of incidents of 59688


#### The top 10 Beats that experienced the most crime

In [101]:
for location,count in crime_df['Beat'].value_counts().head(n=10).iteritems():
    print("The Beat with  id number {}  has a total number of incidents of {}".format(location, count))

The Beat with  id number 423  has a total number of incidents of 63049
The Beat with  id number 421  has a total number of incidents of 60728
The Beat with  id number 624  has a total number of incidents of 56151
The Beat with  id number 823  has a total number of incidents of 54312
The Beat with  id number 1533  has a total number of incidents of 54008
The Beat with  id number 511  has a total number of incidents of 53295
The Beat with  id number 1112  has a total number of incidents of 51962
The Beat with  id number 1522  has a total number of incidents of 50880
The Beat with  id number 414  has a total number of incidents of 50825
The Beat with  id number 2533  has a total number of incidents of 49389


#### Top 10 years where crime was most prevalent

In [98]:
for location,count in crime_df['Year'].value_counts().head(n=10).iteritems():
    print("The year of {}  has a total number of incidents of {}".format(location, count))

The year of 2008  has a total number of incidents of 852053
The year of 2006  has a total number of incidents of 794684
The year of 2009  has a total number of incidents of 783900
The year of 2010  has a total number of incidents of 700691
The year of 2007  has a total number of incidents of 621848
The year of 2001  has a total number of incidents of 568517
The year of 2002  has a total number of incidents of 490879
The year of 2003  has a total number of incidents of 475913
The year of 2005  has a total number of incidents of 455811
The year of 2004  has a total number of incidents of 388205


### Binary Classification features

In [91]:
for location,count in crime_df['Domestic'].value_counts().head(n=10).iteritems():
    print("When the crime location , which was listed as somewhere domestic or not, was {}, it had a total number of incidents of {}".format(location, count))

When the crime location , which was listed as somewhere domestic or not, was False, it had a total number of incidents of 6921831
When the crime location , which was listed as somewhere domestic or not, was True, it had a total number of incidents of 1019451


#### Arrest statistics for the data set

In [88]:
for location,count in crime_df['Arrest'].value_counts().head(n=10).iteritems():
    print("if the arrest status is {}, it has a total number counts of {}".format(location, count))

if the arrest status is False, it has a total number counts of 5691862
if the arrest status is True, it has a total number counts of 2249420


### Time series feature

In [104]:
crime_df.Date

0          2008-10-07 12:39:00
1          2008-10-09 03:30:00
2          2008-10-09 08:35:00
3          2008-10-10 02:33:00
4          2008-10-10 12:50:00
5          2008-10-10 08:32:00
6          2008-10-11 12:55:00
7          2008-10-11 10:25:00
8          2008-10-11 10:00:00
9          2008-10-12 05:47:00
10         2008-10-12 10:33:00
11         2008-10-12 10:55:00
12         2008-10-13 02:49:00
13         2008-10-13 07:04:00
14         2008-10-14 09:37:00
15         2008-10-14 01:27:00
16         2008-10-15 04:10:00
17         2008-10-16 09:35:00
18         2008-10-16 04:25:00
19         2008-10-17 05:17:00
20         2008-10-17 10:55:00
21         2008-10-21 07:40:00
22         2008-10-24 03:41:00
23         2008-10-24 03:41:00
24         2008-10-24 08:26:00
25         2008-10-24 10:00:00
26         2008-10-25 05:57:00
27         2008-10-25 12:00:00
28         2008-10-26 06:37:00
29         2008-10-26 06:57:00
                  ...         
7941252    2006-02-25 03:38:00
7941253 

# Theft Statistics

In [75]:
crime_df[crime_df['Primary Type']=='THEFT']

Unnamed: 0,Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,Beat,Year,Location
341,2008-01-01 01:30:00,GRAND AVE,THEFT,POCKET-PICKING,LAKEFRONT/WATERFRONT/RIVERBANK,False,False,1834,2008,
343,2008-01-03 01:45:00,LELAND AVE,THEFT,$500 AND UNDER,STREET,False,False,2312,2008,
345,2008-02-12 08:00:00,MALDEN ST,THEFT,$500 AND UNDER,STREET,False,False,2311,2008,
348,2008-01-02 09:00:00,CENTRAL PARK AVE,THEFT,OVER $500,VEHICLE NON-COMMERCIAL,False,False,1413,2008,
355,2008-02-25 03:00:00,MADISON ST,THEFT,OVER $500,STREET,False,False,113,2008,
370,2008-02-26 03:00:00,HARLEM AVE,THEFT,OVER $500,OTHER,False,False,2511,2008,
374,2008-02-26 01:12:00,WESTERN AVE,THEFT,$500 AND UNDER,TAXICAB,True,False,1434,2008,
376,2008-02-25 04:00:00,KIMBALL AVE,THEFT,POCKET-PICKING,CTA BUS,False,False,1412,2008,
380,2008-02-23 08:10:00,95TH ST,THEFT,POCKET-PICKING,CTA PLATFORM,False,False,633,2008,
385,2008-01-02 06:00:00,GRAND AVE,THEFT,FROM BUILDING,SMALL RETAIL STORE,False,False,1834,2008,


In [76]:
crime_df[crime_df['Primary Type']=='THEFT'].describe()

Unnamed: 0,Beat,Year
count,1640506.0,1640506.0
mean,1269.34687,2007.82242
std,728.53512,4.13344
min,111.0,2001.0
25%,632.0,2005.0
50%,1231.0,2008.0
75%,1833.0,2010.0
max,2535.0,2017.0


# Battery Statistics

In [109]:
crime_df[crime_df['Primary Type']=='BATTERY']

Unnamed: 0,Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,Beat,Year,Location
95,2008-04-20 02:39:18,HOWARD ST,BATTERY,AGGRAVATED: HANDGUN,SIDEWALK,False,False,2424,2008,"(42.019439266, -87.679652874)"
360,2008-02-22 04:15:00,95TH ST,BATTERY,SIMPLE,CTA BUS,False,False,634,2008,
365,2008-02-23 05:00:00,ELLIS AVE,BATTERY,AGGRAVATED DOMESTIC BATTERY: OTHER DANG WEAPON,APARTMENT,False,True,624,2008,
366,2008-02-23 10:15:00,ASHLAND AVE,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,614,2008,
382,2008-01-01 01:00:00,LUMBER ST,BATTERY,DOMESTIC BATTERY SIMPLE,STREET,False,True,1233,2008,
395,2008-01-01 03:00:00,CONGRESS PKWY,BATTERY,SIMPLE,CHA APARTMENT,False,False,1133,2008,
404,2008-01-07 03:15:00,ONTARIO ST,BATTERY,SIMPLE,STREET,False,False,1832,2008,"(41.893200688, -87.633194742)"
412,2008-02-24 10:30:00,MARYLAND AVE,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,624,2008,
417,2008-02-25 08:25:00,PULASKI RD,BATTERY,SIMPLE,SMALL RETAIL STORE,False,False,1712,2008,
427,2008-02-25 09:14:00,MILWAUKEE AVE,BATTERY,AGG PO HANDS NO/MIN INJURY,"SCHOOL, PUBLIC, BUILDING",True,False,1731,2008,


In [105]:
crime_df[crime_df['Primary Type']=='BATTERY'].describe()


Unnamed: 0,Beat,Year
count,1442716.0,1442716.0
mean,1140.21593,2007.59997
std,689.38475,4.10695
min,111.0,2001.0
25%,612.0,2004.0
50%,1011.0,2008.0
75%,1623.0,2010.0
max,2535.0,2017.0


# Criminal Damage Statistics

In [110]:
crime_df[crime_df['Primary Type']=='CRIMINAL DAMAGE']

Unnamed: 0,Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,Beat,Year,Location
346,2008-01-01 04:00:00,MAGNOLIA AVE,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1923,2008,
347,2008-01-02 10:00:00,GREENVIEW AVE,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1922,2008,
359,2008-02-25 03:25:00,LARRABEE ST,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1823,2008,
362,2008-02-23 02:00:00,NORTH AVE,CRIMINAL DAMAGE,TO VEHICLE,PARKING LOT/GARAGE(NON.RESID.),False,False,1813,2008,
369,2008-01-01 09:00:00,HARDING AVE,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1122,2008,
372,2008-02-25 07:30:00,81ST ST,CRIMINAL DAMAGE,TO PROPERTY,STREET,False,False,631,2008,
379,2008-02-26 11:00:00,WABASH AVE,CRIMINAL DAMAGE,TO VEHICLE,OTHER,False,False,132,2008,
383,2008-01-01 08:30:00,19TH ST,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1233,2008,
386,2008-01-01 08:00:00,SEMINARY AVE,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1924,2008,
390,2008-01-02 10:15:00,INDIANA AVE,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,311,2008,


In [106]:
crime_df[crime_df['Primary Type']=='CRIMINAL DAMAGE'].describe()

Unnamed: 0,Beat,Year
count,923000.0,923000.0
mean,1216.42537,2007.56724
std,697.13561,4.48183
min,111.0,41.0
25%,631.0,2005.0
50%,1033.0,2008.0
75%,1731.0,2010.0
max,2535.0,2017.0


# Narcotics Statistics

In [111]:
crime_df[crime_df['Primary Type']=='NARCOTICS']

Unnamed: 0,Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,Beat,Year,Location
349,2008-02-19 10:38:20,INDIANA AVE,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,STREET,True,False,513,2008,
350,2008-01-19 08:47:37,DEARBORN ST,NARCOTICS,SOLICIT NARCOTICS ON PUBLICWAY,CTA PLATFORM,False,False,113,2008,
351,2008-02-22 02:24:01,NORTH AVE,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,SIDEWALK,True,False,2532,2008,
354,2008-02-26 02:44:57,PAULINA ST,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,VEHICLE NON-COMMERCIAL,True,False,932,2008,
357,2008-02-25 02:04:34,KAMERLING AVE,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,VEHICLE NON-COMMERCIAL,True,False,2532,2008,
361,2008-02-17 02:20:00,ELSTON AVE,NARCOTICS,POSS: COCAINE,GAS STATION,True,False,1623,2008,
368,2008-02-25 12:10:00,LONG AVE,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,STREET,True,False,1633,2008,
378,2008-02-25 10:40:00,CICERO AVE,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,STREET,True,False,1634,2008,
381,2008-02-24 02:25:00,ADDISON ST,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,PARKING LOT/GARAGE(NON.RESID.),True,False,1632,2008,
393,2008-01-02 08:42:00,PULASKI RD,NARCOTICS,POSS: CRACK,RESIDENCE,True,False,2535,2008,


In [107]:
crime_df[crime_df['Primary Type']=='NARCOTICS'].describe()

Unnamed: 0,Beat,Year
count,885431.0,885431.0
mean,1157.63794,2007.42278
std,640.02355,3.80351
min,111.0,2001.0
25%,711.0,2005.0
50%,1112.0,2007.0
75%,1531.0,2010.0
max,2535.0,2017.0


In [108]:
crime_df[crime_df['Primary Type']=='NARCOTICS'].describe()

Unnamed: 0,Beat,Year
count,885431.0,885431.0
mean,1157.63794,2007.42278
std,640.02355,3.80351
min,111.0,2001.0
25%,711.0,2005.0
50%,1112.0,2007.0
75%,1531.0,2010.0
max,2535.0,2017.0
