### 

In [72]:
import pandas as pd
import numpy as np
from pathlib import Path
import sklearn 
import matplotlib.pyplot as plt

In [73]:
# Read in the CSV file and create the Pandas Dataframe
KC_crimes = pd.read_csv(Path("./Resources/KCPD_Crime_Data_2023.csv"))
 
# Review the DataFrame
KC_crimes.head()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location
0,KC23000049,01/01/2023,02:55,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016)
1,KC23000346,01/02/2023,14:00,Assault (Aggravated),13A,Aggravated Assault,I 35 HWY and PASEO,KANSAS CITY,64106.0,CPD,False,SUS,W,M,,False,POINT (-94.567090038 39.063079981)
2,KC23000490,01/03/2023,07:39,Stealing - Shoplift,23C,Shoplifting,3800 INDEPENDENCE AVE,KANSAS CITY,64124.0,EPD,False,VIC,,,,False,POINT (-94.537153514 39.105905137)
3,KC23000819,01/04/2023,15:23,Loss,,,7600 MONROE AVE,KANSAS CITY,64128.0,MPD,False,VIC,B,M,28.0,False,POINT (-94.540035187 39.073080307)
4,KC23001555,01/07/2023,23:10,Property Damage,120,Robbery,3000 VAN BRUNT BLVD,KANSAS CITY,64128.0,EPD,False,VIC,,,,True,POINT (-94.520866347 39.070814751)


In [74]:
KC_crimes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75292 entries, 0 to 75291
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Report_No           75292 non-null  object 
 1   Reported_Date       75292 non-null  object 
 2   Reported_Time       75292 non-null  object 
 3   Offense             75292 non-null  object 
 4   IBRS                68096 non-null  object 
 5   Description         68096 non-null  object 
 6   Address             75292 non-null  object 
 7   City                75292 non-null  object 
 8   Zip Code            70894 non-null  float64
 9   Area                75292 non-null  object 
 10  DVFlag              75292 non-null  bool   
 11  Involvement         75292 non-null  object 
 12  Race                64700 non-null  object 
 13  Sex                 65587 non-null  object 
 14  Age                 54604 non-null  float64
 15  Fire Arm Used Flag  75292 non-null  bool   
 16  Loca

In [76]:
# Read in the CSV file again but parse dates and create the Pandas Dataframe
KC_crimes = pd.read_csv(Path("./Resources/KCPD_Crime_Data_2023.csv"),
                       parse_dates=["Reported_Date"])
KC_crimes.head()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location
0,KC23000049,2023-01-01,02:55,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016)
1,KC23000346,2023-01-02,14:00,Assault (Aggravated),13A,Aggravated Assault,I 35 HWY and PASEO,KANSAS CITY,64106.0,CPD,False,SUS,W,M,,False,POINT (-94.567090038 39.063079981)
2,KC23000490,2023-01-03,07:39,Stealing - Shoplift,23C,Shoplifting,3800 INDEPENDENCE AVE,KANSAS CITY,64124.0,EPD,False,VIC,,,,False,POINT (-94.537153514 39.105905137)
3,KC23000819,2023-01-04,15:23,Loss,,,7600 MONROE AVE,KANSAS CITY,64128.0,MPD,False,VIC,B,M,28.0,False,POINT (-94.540035187 39.073080307)
4,KC23001555,2023-01-07,23:10,Property Damage,120,Robbery,3000 VAN BRUNT BLVD,KANSAS CITY,64128.0,EPD,False,VIC,,,,True,POINT (-94.520866347 39.070814751)


In [77]:
KC_crimes.Reported_Date.dtype

dtype('<M8[ns]')

In [78]:
KC_crimes.Reported_Date.tail(10)

75282   2023-07-30
75283   2023-06-19
75284   2023-06-23
75285   2023-06-15
75286   2023-07-22
75287   2023-06-08
75288   2023-07-25
75289   2023-08-09
75290   2023-09-08
75291   2023-07-27
Name: Reported_Date, dtype: datetime64[ns]

In [79]:
# Sort Dataframe by Reported_Date
KC_crimes.sort_values(by = ["Reported_Date"], inplace=True, ascending=True)
KC_crimes.Reported_Date.tail(10)


66923   2023-09-10
64395   2023-09-10
64412   2023-09-10
66913   2023-09-10
57162   2023-09-10
56027   2023-09-10
56026   2023-09-10
57203   2023-09-10
62190   2023-09-10
61909   2023-09-10
Name: Reported_Date, dtype: datetime64[ns]

In [88]:
# Make a copy of the original DataFrame to perform edits on 
KC_crimes_tmp = KC_crimes.copy()

In [89]:
KC_crimes.Report_No.nunique()

43277

In [97]:
## Turning Report_no column into a numeric variable by replacing the charachters KC with ''
KC_crimes_tmp["Report_No"] = KC_crimes.Report_No.apply(lambda x : int(x.replace('KC', '')))
KC_crimes_tmp.head()

Unnamed: 0,Report_No,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location,crimeMonth,crimeDay
0,23000049,02:55,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016),1,1
3862,23000188,20:25,Stolen Auto,240,Motor Vehicle Theft,10600 S JEFFERSON ST,KANSAS CITY,64114.0,SPD,False,CMP VIC,W,M,40.0,False,POINT (-94.599253645 38.935477958),1,1
3905,23000065,04:14,Domestic Violence Assault (Aggravated),13B,Simple Assault,6800 E 13TH ST,KANSAS CITY,64126.0,EPD,True,VIC,W,F,,False,POINT (-94.503343613 39.095555235),1,1
16237,23000224,23:16,Domestic Violence Assault (Aggravated),13A,Aggravated Assault,800 E 33RD ST,KANSAS CITY,64109.0,CPD,True,ARR CHA SUS,W,F,41.0,False,POINT (-94.575439 39.067178983),1,1
16224,23000099,10:23,Property Damage,290,Vandalism/Destruction of Property,1300 E 81ST TER,KANSAS CITY,64131.0,MPD,False,CMP VIC,W,F,27.0,False,POINT (-94.573644015 38.980798986),1,1


In [100]:
# Reported time is now an object. We are going to turn it into names before appying one hot encoding 
# Morning 5 am to 12 pm (noon)
# Afternoon 12 pm to 5 pm.
# Evening 5 pm to 9 pm.
# Night 9 pm to 4 am.

#KC_crimes_tmp["Reported_Time"] = KC_crimes_tmp.Reported_Time.apply(lambda x: int(x.replace(':','')))
KC_crimes_tmp.head()

Unnamed: 0,Report_No,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location,crimeMonth,crimeDay
0,23000049,255,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016),1,1
3862,23000188,2025,Stolen Auto,240,Motor Vehicle Theft,10600 S JEFFERSON ST,KANSAS CITY,64114.0,SPD,False,CMP VIC,W,M,40.0,False,POINT (-94.599253645 38.935477958),1,1
3905,23000065,414,Domestic Violence Assault (Aggravated),13B,Simple Assault,6800 E 13TH ST,KANSAS CITY,64126.0,EPD,True,VIC,W,F,,False,POINT (-94.503343613 39.095555235),1,1
16237,23000224,2316,Domestic Violence Assault (Aggravated),13A,Aggravated Assault,800 E 33RD ST,KANSAS CITY,64109.0,CPD,True,ARR CHA SUS,W,F,41.0,False,POINT (-94.575439 39.067178983),1,1
16224,23000099,1023,Property Damage,290,Vandalism/Destruction of Property,1300 E 81ST TER,KANSAS CITY,64131.0,MPD,False,CMP VIC,W,F,27.0,False,POINT (-94.573644015 38.980798986),1,1


### Add datetime parameter for Reported_Date column 

In [91]:
# Extract the day and the month, not the year because it is all 2023
KC_crimes_tmp["crimeMonth"]= KC_crimes_tmp.Reported_Date.dt.month
KC_crimes_tmp["crimeDay"] = KC_crimes_tmp.Reported_Date.dt.day

In [92]:
KC_crimes_tmp.tail()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location,crimeMonth,crimeDay
56027,23061929,2023-09-10,13:16,Stealing from Building/Residence,23D,Theft From Building,1200 E EMANUEL CLEAVER II BLVD,KANSAS CITY,64110.0,MPD,False,VIC,B,M,28.0,False,POINT (-94.571662702 39.041714418),9,10
56026,23061821,2023-09-10,01:26,Assault (Aggravated),90C,Disorderly Conduct,1300 GRAND BLVD,KANSAS CITY,64106.0,CPD,False,ARR CHA SUS,B,F,27.0,False,POINT (-94.581620989 39.097889983),9,10
57203,23061815,2023-09-10,00:43,Trespass of Real Property,90J,Trespass of Real Property,1300 GRAND BLVD,KANSAS CITY,64106.0,CPD,False,VIC,,,,False,POINT (-94.581620989 39.097889983),9,10
62190,23061907,2023-09-10,10:35,Miscellaneous Investigation,,,3800 MERSINGTON AVE,KANSAS CITY,64128.0,EPD,False,VIC,B,M,,False,POINT (-94.53879399 39.056648002),9,10
61909,23061846,2023-09-10,03:25,State Warrant Arrest,,,NE VIVION RD and N NORTON AVE,KANSAS CITY,,SCP,False,SUS,W,M,24.0,False,POINT (-94.531870035 39.188020001),9,10


In [93]:
# we can remove Reported_date column now that we have created a derived day, month and year column
KC_crimes_tmp.drop("Reported_Date", axis = 1, inplace=True )
KC_crimes_tmp.head()

Unnamed: 0,Report_No,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location,crimeMonth,crimeDay
0,23000049,02:55,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016),1,1
3862,23000188,20:25,Stolen Auto,240,Motor Vehicle Theft,10600 S JEFFERSON ST,KANSAS CITY,64114.0,SPD,False,CMP VIC,W,M,40.0,False,POINT (-94.599253645 38.935477958),1,1
3905,23000065,04:14,Domestic Violence Assault (Aggravated),13B,Simple Assault,6800 E 13TH ST,KANSAS CITY,64126.0,EPD,True,VIC,W,F,,False,POINT (-94.503343613 39.095555235),1,1
16237,23000224,23:16,Domestic Violence Assault (Aggravated),13A,Aggravated Assault,800 E 33RD ST,KANSAS CITY,64109.0,CPD,True,ARR CHA SUS,W,F,41.0,False,POINT (-94.575439 39.067178983),1,1
16224,23000099,10:23,Property Damage,290,Vandalism/Destruction of Property,1300 E 81ST TER,KANSAS CITY,64131.0,MPD,False,CMP VIC,W,F,27.0,False,POINT (-94.573644015 38.980798986),1,1


In [94]:
# Find the columns which contain strings 
for label, content in KC_crimes_tmp.items():
    if pd.api.types.is_string_dtype(content):
        print(label)

Reported_Time
Offense
IBRS
Description
Address
City
Area
Involvement
Race
Sex
Location
