# Machine Learning Project
# Kansas City Crime Data Deep Dive

### Import Dependancies

In [1]:
import warnings
warnings.simplefilter('ignore')

# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

### Getting our KCMO crime data (Raw data came from data.kcmo.org)

In [2]:
file_name = os.path.join('Resources', 'KCPD_Crime_Data_2017.csv')
kc_crime = pd.read_csv(file_name)
kc_crime.reset_index()
kc_crime.head(2)

Unnamed: 0,Report_No,Reported_Date,Reported_Time,From_Date,From_Time,To_Date,To_Time,Offense,IBRS,Description,...,Rep_Dist,Area,DVFlag,Invl_No,Involvement,Race,Sex,Age,Firearm Used Flag,Location
0,100080848,5/28/2017,3:44,5/28/2017,2:00,,,1352,280,Stolen Property OFFE,...,PJ3255,CPD,U,1,VIC,W,F,29.0,N,"4000 MILL ST\nKANSAS CITY 64111\n(39.053635, -..."
1,120046817,11/21/2017,13:30,11/20/2017,9:00,,,101,09A,HOMICIDE/Non Neglige,...,PJ1029,CPD,U,1,SUS,B,M,,Y,"1100 LOCUST ST\nKANSAS CITY 64105\n(39.10091, ..."


### Filter for male and female victims in KCMO

In [6]:
# kc_crime1 = kc_crime[kc_crime.City.str.contains("KANSAS CITY") == True]
# kc_crime2 = kc_crime1[kc_crime1.Involvement.str.contains("VIC") == True]
# kc_crime3 = kc_crime2[kc_crime2.Sex.str.contains("NaN") == False]
# kc_crime4 = kc_crime3[kc_crime3.Sex.str.contains("U") == False]
kc_crime4b = kc_crime4[kc_crime4.Zip_Code.str.contains("99999") == False]

NameError: name 'kc_crime4' is not defined

### Drop unnecessary columns

In [4]:
kc_crime5 = kc_crime4b.drop(['To_Date', 'From_Date', 'To_Time', 'From_Time',
                          'Offense', 'IBRS', 'Rep_Dist', 'Area', 'Beat' ], axis=1)

### Remove abnormal age values

In [5]:
kc_crime6 = kc_crime5[kc_crime5['Age'] < 91]  

In [6]:
kc_crime6.count()

Report_No            43908
Reported_Date        43908
Reported_Time        43908
Description          43908
Address              43908
City                 43908
Zip Code             43908
DVFlag               43908
Invl_No              43908
Involvement          43908
Race                 43908
Sex                  43908
Age                  43908
Firearm Used Flag    43908
Location             43908
dtype: int64

### Parse out latitude and longitude

In [7]:
kc_crime6_copy = kc_crime6.copy()

In [8]:
#copy the dataframe to a work df
# acc_drugtemp_df = acc_drug_deaths_df.copy()

In [9]:
kc_crime6_copy.head(2)

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Description,Address,City,Zip Code,DVFlag,Invl_No,Involvement,Race,Sex,Age,Firearm Used Flag,Location
0,100080848,5/28/2017,3:44,Stolen Property OFFE,4000 MILL ST,KANSAS CITY,64111,U,1,VIC,W,F,29.0,N,"4000 MILL ST\nKANSAS CITY 64111\n(39.053635, -..."
2,120046817,11/21/2017,13:30,HOMICIDE/Non Neglige,1100 LOCUST ST,KANSAS CITY,64105,N,1,VIC,B,F,69.0,Y,"1100 LOCUST ST\nKANSAS CITY 64105\n(39.10091, ..."


In [10]:
#split the DeathLoc column into two columns 
# acc_drugtemp3_df = acc_drugtemp_df['DeathLoc'].str[2:-1].str.split('\n', expand=True)

In [11]:
kc_crime7 = kc_crime6_copy['Location'].str[2:-1].str.split('\n', expand=True)

In [12]:
kc_crime7.head(2)

Unnamed: 0,0,1,2
0,00 MILL ST,KANSAS CITY 64111,"(39.053635, -94.595998"
2,00 LOCUST ST,KANSAS CITY 64105,"(39.10091, -94.577328"


In [13]:
test = kc_crime7[2][0]
test

'(39.053635, -94.595998'

In [14]:
#create a column called geo getting rid of the left (
# acc_drugtemp3_df['Geo'] = acc_drugtemp3_df[1][1][1:-1]
kc_crime7['Geo'] = kc_crime7[2][0:][0:]

In [15]:
kc_crime7.head(2)

Unnamed: 0,0,1,2,Geo
0,00 MILL ST,KANSAS CITY 64111,"(39.053635, -94.595998","(39.053635, -94.595998"
2,00 LOCUST ST,KANSAS CITY 64105,"(39.10091, -94.577328","(39.10091, -94.577328"


In [16]:
kc_crime7['Geo'] = kc_crime7['Geo'].str[1:]

In [17]:
kc_crime7.head(2)

Unnamed: 0,0,1,2,Geo
0,00 MILL ST,KANSAS CITY 64111,"(39.053635, -94.595998","39.053635, -94.595998"
2,00 LOCUST ST,KANSAS CITY 64105,"(39.10091, -94.577328","39.10091, -94.577328"


In [18]:
kc_crime8 = kc_crime7['Geo'].str[0:].str.split(',', expand=True)
kc_crime8.head(2)

Unnamed: 0,0,1
0,39.053635,-94.595998
2,39.10091,-94.577328


In [19]:
kc_crime8['Lat'] = kc_crime8[0]
kc_crime8['Lon'] = kc_crime8[1]
kc_crime8 = kc_crime8.drop([0], axis=1)
kc_crime8 = kc_crime8.drop([1], axis=1)
kc_crime8.head()

Unnamed: 0,Lat,Lon
0,39.053635,-94.595998
2,39.10091,-94.577328
33,39.17744,-94.572069
36,39.033505,-94.547812
46,,


In [23]:
kc_crime9 = kc_crime6.append(kc_crime8, ignore_index=True)

In [24]:
kc_crime9.head(20)

Unnamed: 0,Address,Age,City,DVFlag,Description,Firearm Used Flag,Invl_No,Involvement,Lat,Location,Lon,Race,Report_No,Reported_Date,Reported_Time,Sex,Zip Code
0,4000 MILL ST,29.0,KANSAS CITY,U,Stolen Property OFFE,N,1.0,VIC,,"4000 MILL ST\nKANSAS CITY 64111\n(39.053635, -...",,W,100080848.0,5/28/2017,3:44,F,64111.0
1,1100 LOCUST ST,69.0,KANSAS CITY,N,HOMICIDE/Non Neglige,Y,1.0,VIC,,"1100 LOCUST ST\nKANSAS CITY 64105\n(39.10091, ...",,B,120046817.0,11/21/2017,13:30,F,64105.0
2,4600 N HOLMES ST,31.0,KANSAS CITY,U,Auto Theft,N,1.0,VIC,,4600 N HOLMES ST\nKANSAS CITY 64119\n(39.17744...,,W,160028258.0,8/2/2017,13:50,F,64119.0
3,E 51 ST and WALROND AV,19.0,KANSAS CITY,U,Intimidation,N,1.0,VIC,,51 ST and WALROND AV\nKANSAS CITY 64130\n(39.0...,,W,160034107.0,8/24/2017,16:22,F,64130.0
4,3400 PALMER AV,36.0,KANSAS CITY,U,Agg Assault - Domest,N,1.0,VIC,,3400 PALMER AV\nKANSAS CITY 64138\n,,B,160047600.0,6/28/2017,12:00,F,64138.0
5,9400 NE 79 ST,62.0,KANSAS CITY,U,Auto Theft,N,1.0,VIC,,"9400 NE 79 ST\nKANSAS CITY 64157\n(39.235881, ...",,W,160063139.0,3/15/2017,14:06,F,64157.0
6,E 41 ST and PROSPECT AV,70.0,KANSAS CITY,U,Possession/Sale/Dist,N,1.0,VIC,,41 ST and PROSPECT AV\nKANSAS CITY 64131\n(39....,,B,160065976.0,4/1/2017,19:21,F,64131.0
7,1100 LOCUST ST,35.0,KANSAS CITY,U,HOMICIDE/Non Neglige,N,1.0,VIC,,"1100 LOCUST ST\nKANSAS CITY 64106\n(39.10091, ...",,W,160069964.0,2/4/2017,13:41,M,64106.0
8,E MISSOURI AV and PASEO,26.0,KANSAS CITY,U,Stolen Property OFFE,N,1.0,VIC,,E MISSOURI AV\nKANSAS CITY 64106\n,,B,160077054.0,5/24/2017,14:44,M,64106.0
9,E 8 ST and VAN BRUNT BL,25.0,KANSAS CITY,U,Justifiable Homicide,Y,1.0,VIC,,8 ST\nand VAN BRUNT BL KANSAS CITY 99999\n,,B,160090081.0,3/6/2017,10:52,M,99999.0


In [36]:
# test = acc_drugtemp4_df['Coord'][0]
# test.split(',')[1]
test = kc_crime7['Geo'][0]
test.split(',')[0]

'39.053635'

In [95]:
kc_crime8 = kc_crime7['Geo'].str[0:].str.split(',', expand=True)

In [80]:
#Add a new column to the dataframe with the latitude and longitude
# acc_drugtemp4_df = acc_drugtemp_df.assign(Coord = acc_drugtemp3_df[1][:-1][0:-1])
kc_crime8 = kc_crime6_copy.assign(Coord = kc_crime7[2][:-1][0:-1])

In [57]:
#create a column called geo
# acc_drugtemp3b_df['Lat'] = acc_drugtemp3_df[1][1][1:-1]
kc_crime8['Geo'] = kc_crime7[2][:-1][0:-1]

In [19]:
# test = acc_drugtemp4_df['Coord'][0]
# test.split(',')[1]
test = kc_crime8['Geo'][0]
test.split(',')[1]

' -94.57732'

In [20]:
# split the Coord column into two at the ,
# acc_drugtemp5_df = acc_drugtemp4_df['Coord'].str[1:].str.split(',', expand=True)
kc_crime9 = kc_crime8['Geo'].str[0:].str.split(',', expand=True)

In [22]:
#create a column called geo
# acc_drugtemp3_df['Lat'] = acc_drugtemp5_df[0][:-1][0:-1]
kc_crime8['Latitude'] = kc_crime9[0][:-1][0:-1]
kc_crime8['Longitude'] = kc_crime9[1][:-1][0:-1]

In [24]:
kc_crime9 =  kc_crime8.drop(['Location', 'Coord', 'Geo'], axis=1)
kc_crime9.head(1)

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Description,Address,City,Zip Code,DVFlag,Invl_No,Involvement,Race,Sex,Age,Firearm Used Flag,Latitude,Longitude
0,100080848,5/28/2017,3:44,Stolen Property OFFE,4000 MILL ST,KANSAS CITY,64111,U,1,VIC,W,F,29.0,N,39.10091,-94.57732


### Label encode description

In [29]:
Description_columns = pd.get_dummies(kc_crime['Description'],prefix = "Description")
kc_crime = pd.concat([kc_crime, Description_columns], axis=1)
kc_crime.drop('Description', axis=1, inplace=True)