# Statistics on crime in the Chicago area

## This is an exploration of when, where, and what crime most occurs and if it can be predicted with any accuracy based on the features we have given it

In [39]:
import pandas as pd
import seaborn as sns
import matplotlib as plt
import numpy as np
#using this to sort the dictionaries by value
import operator
from scipy.stats import trim_mean, kurtosis
from scipy.stats.mstats import mode, gmean, hmean
pd.set_option('display.float_format', lambda x: '%.5f' % x)
%matplotlib inline
np.random.seed(42)
%cd crimes


[Errno 2] No such file or directory: 'crimes'
/Users/donovanadams/Desktop/GitHub/DS-3-Deep-Learning/notebooks/crimes


## Loading previously cleaned data

In [112]:
crime_df= pd.read_csv('CleanedCrimes.csv')


In [113]:
crime_df

Unnamed: 0,Arrest,Beat,Block,Date,Description,Domestic,Location,Location Description,Primary Type,Year
0,True,323,75TH ST,2008-10-07 12:39:00,FIRST DEGREE MURDER,False,"(41.758275857, -87.622451031)",ALLEY,HOMICIDE,2008
1,True,1533,POLK ST,2008-10-09 03:30:00,FIRST DEGREE MURDER,False,"(41.87025207, -87.746069362)",STREET,HOMICIDE,2008
2,False,831,MANN DR,2008-10-09 08:35:00,FIRST DEGREE MURDER,False,"(41.770990476, -87.698901469)",PARK PROPERTY,HOMICIDE,2008
3,False,1524,CHICAGO AVE,2008-10-10 02:33:00,FIRST DEGREE MURDER,False,"(41.894916924, -87.757358147)",RESTAURANT,HOMICIDE,2008
4,False,1032,HOMAN AVE,2008-10-10 12:50:00,FIRST DEGREE MURDER,True,"(41.843826272, -87.709893465)",GARAGE,HOMICIDE,2008
5,True,1231,14TH ST,2008-10-10 20:32:00,FIRST DEGREE MURDER,True,"(41.863318307, -87.664923682)",STREET,HOMICIDE,2008
6,True,1034,23RD ST,2008-10-11 00:55:00,FIRST DEGREE MURDER,False,"(41.850350125, -87.676543351)",STREET,HOMICIDE,2008
7,False,331,PAXTON AVE,2008-10-11 22:25:00,FIRST DEGREE MURDER,False,"(41.769005966, -87.571485086)",STREET,HOMICIDE,2008
8,False,823,SAWYER AVE,2008-10-11 22:00:00,FIRST DEGREE MURDER,False,"(41.783861768, -87.704490821)",PORCH,HOMICIDE,2008
9,False,911,51ST ST,2008-10-12 05:47:00,FIRST DEGREE MURDER,False,"(41.801124416, -87.685910818)",ALLEY,HOMICIDE,2008


### Getting the shape of the data

In [114]:
crime_df.shape
print('The cleaned data set has {} number of total incedents and {} features for each incedent'.format(crime_df.shape[0],crime_df.shape[1]))

The cleaned data set has 15882564 number of total incedents and 10 features for each incedent


## Statistical Analysis of Individual features of the categorical values

### Multiclass classification

##### The top 10 types of crimes reported

In [115]:
for crime,count in crime_df['Primary Type'].value_counts().head(n=10).iteritems():
    print("The type of crime known as {}  has a total number incidents count of {}".format(crime, count))

The type of crime known as THEFT  has a total number incidents count of 3281012
The type of crime known as BATTERY  has a total number incidents count of 2885432
The type of crime known as CRIMINAL DAMAGE  has a total number incidents count of 1846000
The type of crime known as NARCOTICS  has a total number incidents count of 1770862
The type of crime known as OTHER OFFENSE  has a total number incidents count of 983844
The type of crime known as ASSAULT  has a total number incidents count of 963322
The type of crime known as BURGLARY  has a total number incidents count of 941916
The type of crime known as MOTOR VEHICLE THEFT  has a total number incidents count of 741096
The type of crime known as ROBBERY  has a total number incidents count of 600906
The type of crime known as DECEPTIVE PRACTICE  has a total number incidents count of 561862


#### The top 10 locations (such as restaurant, residence, but not a physical address) where crime occured

In [116]:
for location,count in crime_df['Location Description'].value_counts().head(n=10).iteritems():
    print("Crimes that happened on/in the {}  has/have a total number of incidents of {}".format(location, count))

Crimes that happened on/in the STREET  has/have a total number of incidents of 4203684
Crimes that happened on/in the RESIDENCE  has/have a total number of incidents of 2683498
Crimes that happened on/in the SIDEWALK  has/have a total number of incidents of 1631190
Crimes that happened on/in the APARTMENT  has/have a total number of incidents of 1625024
Crimes that happened on/in the OTHER  has/have a total number of incidents of 588572
Crimes that happened on/in the PARKING LOT/GARAGE(NON.RESID.)  has/have a total number of incidents of 450908
Crimes that happened on/in the ALLEY  has/have a total number of incidents of 360310
Crimes that happened on/in the SCHOOL, PUBLIC, BUILDING  has/have a total number of incidents of 347500
Crimes that happened on/in the RESIDENCE-GARAGE  has/have a total number of incidents of 317100
Crimes that happened on/in the RESIDENCE PORCH/HALLWAY  has/have a total number of incidents of 276984


#### The top 10 streets where crime occured on

In [1]:
for location,count in crime_df['Block'].value_counts().head(n=10).iteritems():
    print("The street known as {}  has a total number of incidents of {}".format(location, count))

NameError: name 'crime_df' is not defined

#### The top 10 Beats that experienced the most crime

In [121]:
for location,count in crime_df['Beat'].value_counts().head(n=10).iteritems():
    print("The Beat with  id number {}  has a total number of incidents of {}".format(location, count))

The Beat with  id number 423  has a total number of incidents of 126098
The Beat with  id number 421  has a total number of incidents of 121456
The Beat with  id number 624  has a total number of incidents of 112302
The Beat with  id number 823  has a total number of incidents of 108624
The Beat with  id number 1533  has a total number of incidents of 108016
The Beat with  id number 511  has a total number of incidents of 106590
The Beat with  id number 1112  has a total number of incidents of 103924
The Beat with  id number 1522  has a total number of incidents of 101760
The Beat with  id number 414  has a total number of incidents of 101650
The Beat with  id number 2533  has a total number of incidents of 98778


#### Top 10 years where crime was most prevalent

In [122]:
for location,count in crime_df['Year'].value_counts().head(n=10).iteritems():
    print("The year of {}  has a total number of incidents of {}".format(location, count))

The year of 2008  has a total number of incidents of 1704106
The year of 2006  has a total number of incidents of 1589368
The year of 2009  has a total number of incidents of 1567800
The year of 2010  has a total number of incidents of 1401382
The year of 2007  has a total number of incidents of 1243696
The year of 2001  has a total number of incidents of 1137034
The year of 2002  has a total number of incidents of 981758
The year of 2003  has a total number of incidents of 951826
The year of 2005  has a total number of incidents of 911622
The year of 2004  has a total number of incidents of 776410


### Binary Classification features

In [123]:
for location,count in crime_df['Domestic'].value_counts().head(n=10).iteritems():
    print("When the crime location , which was listed as somewhere domestic or not, was {}, it had a total number of incidents of {}".format(location, count))

When the crime location , which was listed as somewhere domestic or not, was False, it had a total number of incidents of 13843662
When the crime location , which was listed as somewhere domestic or not, was True, it had a total number of incidents of 2038902


#### Arrest statistics for the data set

In [124]:
for location,count in crime_df['Arrest'].value_counts().head(n=10).iteritems():
    print("if the arrest status is {}, it has a total number counts of {}".format(location, count))

if the arrest status is False, it has a total number counts of 11383724
if the arrest status is True, it has a total number counts of 4498840


### Time series feature

In [125]:
crime_df.Date

0           2008-10-07 12:39:00
1           2008-10-09 03:30:00
2           2008-10-09 08:35:00
3           2008-10-10 02:33:00
4           2008-10-10 12:50:00
5           2008-10-10 20:32:00
6           2008-10-11 00:55:00
7           2008-10-11 22:25:00
8           2008-10-11 22:00:00
9           2008-10-12 05:47:00
10          2008-10-12 22:33:00
11          2008-10-12 22:55:00
12          2008-10-13 14:49:00
13          2008-10-13 19:04:00
14          2008-10-14 09:37:00
15          2008-10-14 13:27:00
16          2008-10-15 16:10:00
17          2008-10-16 21:35:00
18          2008-10-16 16:25:00
19          2008-10-17 05:17:00
20          2008-10-17 22:55:00
21          2008-10-21 19:40:00
22          2008-10-24 15:41:00
23          2008-10-24 15:41:00
24          2008-10-24 20:26:00
25          2008-10-24 22:00:00
26          2008-10-25 05:57:00
27          2008-10-25 12:00:00
28          2008-10-26 06:37:00
29          2008-10-26 18:57:00
                   ...         
15882534

# Statistics based on Primary Type ( the type of crime that was commited)

## Theft Statistics

In [126]:
crime_df[crime_df['Primary Type']=='THEFT']

Unnamed: 0,Arrest,Beat,Block,Date,Description,Domestic,Location,Location Description,Primary Type,Year
341,False,1834,GRAND AVE,2008-01-01 01:30:00,POCKET-PICKING,False,,LAKEFRONT/WATERFRONT/RIVERBANK,THEFT,2008
343,False,2312,LELAND AVE,2008-01-03 01:45:00,$500 AND UNDER,False,,STREET,THEFT,2008
345,False,2311,MALDEN ST,2008-02-12 20:00:00,$500 AND UNDER,False,,STREET,THEFT,2008
348,False,1413,CENTRAL PARK AVE,2008-01-02 21:00:00,OVER $500,False,,VEHICLE NON-COMMERCIAL,THEFT,2008
355,False,113,MADISON ST,2008-02-25 15:00:00,OVER $500,False,,STREET,THEFT,2008
370,False,2511,HARLEM AVE,2008-02-26 03:00:00,OVER $500,False,,OTHER,THEFT,2008
374,True,1434,WESTERN AVE,2008-02-26 13:12:00,$500 AND UNDER,False,,TAXICAB,THEFT,2008
376,False,1412,KIMBALL AVE,2008-02-25 16:00:00,POCKET-PICKING,False,,CTA BUS,THEFT,2008
380,False,633,95TH ST,2008-02-23 08:10:00,POCKET-PICKING,False,,CTA PLATFORM,THEFT,2008
385,False,1834,GRAND AVE,2008-01-02 18:00:00,FROM BUILDING,False,,SMALL RETAIL STORE,THEFT,2008


In [127]:
crime_df[crime_df['Primary Type']=='THEFT'].describe()

Unnamed: 0,Beat,Year
count,3281012.0,3281012.0
mean,1269.34687,2007.82242
std,728.53501,4.13344
min,111.0,2001.0
25%,632.0,2005.0
50%,1231.0,2008.0
75%,1833.0,2010.0
max,2535.0,2017.0


## Battery Statistics

In [128]:
crime_df[crime_df['Primary Type']=='BATTERY']

Unnamed: 0,Arrest,Beat,Block,Date,Description,Domestic,Location,Location Description,Primary Type,Year
95,False,2424,HOWARD ST,2008-04-20 02:39:18,AGGRAVATED: HANDGUN,False,"(42.019439266, -87.679652874)",SIDEWALK,BATTERY,2008
360,False,634,95TH ST,2008-02-22 16:15:00,SIMPLE,False,,CTA BUS,BATTERY,2008
365,False,624,ELLIS AVE,2008-02-23 05:00:00,AGGRAVATED DOMESTIC BATTERY: OTHER DANG WEAPON,True,,APARTMENT,BATTERY,2008
366,False,614,ASHLAND AVE,2008-02-23 10:15:00,DOMESTIC BATTERY SIMPLE,True,,RESIDENCE,BATTERY,2008
382,False,1233,LUMBER ST,2008-01-01 01:00:00,DOMESTIC BATTERY SIMPLE,True,,STREET,BATTERY,2008
395,False,1133,CONGRESS PKWY,2008-01-01 15:00:00,SIMPLE,False,,CHA APARTMENT,BATTERY,2008
404,False,1832,ONTARIO ST,2008-01-07 03:15:00,SIMPLE,False,"(41.893200688, -87.633194742)",STREET,BATTERY,2008
412,False,624,MARYLAND AVE,2008-02-24 22:30:00,DOMESTIC BATTERY SIMPLE,True,,APARTMENT,BATTERY,2008
417,False,1712,PULASKI RD,2008-02-25 08:25:00,SIMPLE,False,,SMALL RETAIL STORE,BATTERY,2008
427,True,1731,MILWAUKEE AVE,2008-02-25 09:14:00,AGG PO HANDS NO/MIN INJURY,False,,"SCHOOL, PUBLIC, BUILDING",BATTERY,2008


In [129]:
crime_df[crime_df['Primary Type']=='BATTERY'].describe()


Unnamed: 0,Beat,Year
count,2885432.0,2885432.0
mean,1140.21593,2007.59997
std,689.38463,4.10695
min,111.0,2001.0
25%,612.0,2004.0
50%,1011.0,2008.0
75%,1623.0,2010.0
max,2535.0,2017.0


## Criminal Damage Statistics (this refers to property damage and the like)

In [130]:
crime_df[crime_df['Primary Type']=='CRIMINAL DAMAGE']

Unnamed: 0,Arrest,Beat,Block,Date,Description,Domestic,Location,Location Description,Primary Type,Year
346,False,1923,MAGNOLIA AVE,2008-01-01 16:00:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008
347,False,1922,GREENVIEW AVE,2008-01-02 22:00:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008
359,False,1823,LARRABEE ST,2008-02-25 15:25:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008
362,False,1813,NORTH AVE,2008-02-23 14:00:00,TO VEHICLE,False,,PARKING LOT/GARAGE(NON.RESID.),CRIMINAL DAMAGE,2008
369,False,1122,HARDING AVE,2008-01-01 09:00:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008
372,False,631,81ST ST,2008-02-25 19:30:00,TO PROPERTY,False,,STREET,CRIMINAL DAMAGE,2008
379,False,132,WABASH AVE,2008-02-26 11:00:00,TO VEHICLE,False,,OTHER,CRIMINAL DAMAGE,2008
383,False,1233,19TH ST,2008-01-01 20:30:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008
386,False,1924,SEMINARY AVE,2008-01-01 20:00:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008
390,False,311,INDIANA AVE,2008-01-02 22:15:00,TO VEHICLE,False,,STREET,CRIMINAL DAMAGE,2008


In [132]:
crime_df[crime_df['Primary Type']=='CRIMINAL DAMAGE'].describe()

Unnamed: 0,Beat,Year
count,1846000.0,1846000.0
mean,1216.42537,2007.56724
std,697.13543,4.48183
min,111.0,41.0
25%,631.0,2005.0
50%,1033.0,2008.0
75%,1731.0,2010.0
max,2535.0,2017.0


# Narcotics Statistics

In [133]:
crime_df[crime_df['Primary Type']=='NARCOTICS']

Unnamed: 0,Arrest,Beat,Block,Date,Description,Domestic,Location,Location Description,Primary Type,Year
349,True,513,INDIANA AVE,2008-02-19 22:38:20,POSS: CANNABIS 30GMS OR LESS,False,,STREET,NARCOTICS,2008
350,False,113,DEARBORN ST,2008-01-19 20:47:37,SOLICIT NARCOTICS ON PUBLICWAY,False,,CTA PLATFORM,NARCOTICS,2008
351,True,2532,NORTH AVE,2008-02-22 02:24:01,POSS: CANNABIS 30GMS OR LESS,False,,SIDEWALK,NARCOTICS,2008
354,True,932,PAULINA ST,2008-02-26 02:44:57,POSS: CANNABIS 30GMS OR LESS,False,,VEHICLE NON-COMMERCIAL,NARCOTICS,2008
357,True,2532,KAMERLING AVE,2008-02-25 14:04:34,POSS: CANNABIS 30GMS OR LESS,False,,VEHICLE NON-COMMERCIAL,NARCOTICS,2008
361,True,1623,ELSTON AVE,2008-02-17 14:20:00,POSS: COCAINE,False,,GAS STATION,NARCOTICS,2008
368,True,1633,LONG AVE,2008-02-25 00:10:00,POSS: CANNABIS 30GMS OR LESS,False,,STREET,NARCOTICS,2008
378,True,1634,CICERO AVE,2008-02-25 22:40:00,POSS: CANNABIS 30GMS OR LESS,False,,STREET,NARCOTICS,2008
381,True,1632,ADDISON ST,2008-02-24 14:25:00,POSS: CANNABIS 30GMS OR LESS,False,,PARKING LOT/GARAGE(NON.RESID.),NARCOTICS,2008
393,True,2535,PULASKI RD,2008-01-02 20:42:00,POSS: CRACK,False,,RESIDENCE,NARCOTICS,2008


In [134]:
crime_df[crime_df['Primary Type']=='NARCOTICS'].describe()

Unnamed: 0,Beat,Year
count,1770862.0,1770862.0
mean,1157.63794,2007.42278
std,640.02336,3.80351
min,111.0,2001.0
25%,711.0,2005.0
50%,1112.0,2007.0
75%,1531.0,2010.0
max,2535.0,2017.0


In [108]:
crime_df[crime_df['Primary Type']=='NARCOTICS'].describe()

Unnamed: 0,Beat,Year
count,885431.0,885431.0
mean,1157.63794,2007.42278
std,640.02355,3.80351
min,111.0,2001.0
25%,711.0,2005.0
50%,1112.0,2007.0
75%,1531.0,2010.0
max,2535.0,2017.0
