In [52]:
import pandas as pd
import numpy as np
from pathlib import Path
import sklearn 
import matplotlib.pyplot as plt

In [53]:
# Read in the CSV file and create the Pandas Dataframe
KC_crimes = pd.read_csv(Path("./Resources/KCPD_Crime_Data_2023.csv"))
 
# Review the DataFrame
KC_crimes.head()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location
0,KC23000049,01/01/2023,02:55,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016)
1,KC23000346,01/02/2023,14:00,Assault (Aggravated),13A,Aggravated Assault,I 35 HWY and PASEO,KANSAS CITY,64106.0,CPD,False,SUS,W,M,,False,POINT (-94.567090038 39.063079981)
2,KC23000490,01/03/2023,07:39,Stealing - Shoplift,23C,Shoplifting,3800 INDEPENDENCE AVE,KANSAS CITY,64124.0,EPD,False,VIC,,,,False,POINT (-94.537153514 39.105905137)
3,KC23000819,01/04/2023,15:23,Loss,,,7600 MONROE AVE,KANSAS CITY,64128.0,MPD,False,VIC,B,M,28.0,False,POINT (-94.540035187 39.073080307)
4,KC23001555,01/07/2023,23:10,Property Damage,120,Robbery,3000 VAN BRUNT BLVD,KANSAS CITY,64128.0,EPD,False,VIC,,,,True,POINT (-94.520866347 39.070814751)


In [56]:
KC_crimes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75292 entries, 0 to 75291
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Report_No           75292 non-null  object 
 1   Reported_Date       75292 non-null  object 
 2   Reported_Time       75292 non-null  object 
 3   Offense             75292 non-null  object 
 4   IBRS                68096 non-null  object 
 5   Description         68096 non-null  object 
 6   Address             75292 non-null  object 
 7   City                75292 non-null  object 
 8   Zip Code            70894 non-null  float64
 9   Area                75292 non-null  object 
 10  DVFlag              75292 non-null  bool   
 11  Involvement         75292 non-null  object 
 12  Race                64700 non-null  object 
 13  Sex                 65587 non-null  object 
 14  Age                 54604 non-null  float64
 15  Fire Arm Used Flag  75292 non-null  bool   
 16  Loca

In [57]:
KC_crimes.isna().sum()

Report_No                 0
Reported_Date             0
Reported_Time             0
Offense                   0
IBRS                   7196
Description            7196
Address                   0
City                      0
Zip Code               4398
Area                      0
DVFlag                    0
Involvement               0
Race                  10592
Sex                    9705
Age                   20688
Fire Arm Used Flag        0
Location                 25
dtype: int64

In [61]:
# Read in the CSV file again but parse dates and create the Pandas Dataframe
KC_crimes = pd.read_csv(Path("./Resources/KCPD_Crime_Data_2023.csv"),
                       parse_dates=["Reported_Date"])
KC_crimes.head()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,IBRS,Description,Address,City,Zip Code,Area,DVFlag,Involvement,Race,Sex,Age,Fire Arm Used Flag,Location
0,KC23000049,2023-01-01,02:55,Casualty,,,1900 NE 49TH ST,KANSAS CITY,64118.0,SCP,False,VIC,B,M,37.0,False,POINT (-94.557504996 39.183924016)
1,KC23000346,2023-01-02,14:00,Assault (Aggravated),13A,Aggravated Assault,I 35 HWY and PASEO,KANSAS CITY,64106.0,CPD,False,SUS,W,M,,False,POINT (-94.567090038 39.063079981)
2,KC23000490,2023-01-03,07:39,Stealing - Shoplift,23C,Shoplifting,3800 INDEPENDENCE AVE,KANSAS CITY,64124.0,EPD,False,VIC,,,,False,POINT (-94.537153514 39.105905137)
3,KC23000819,2023-01-04,15:23,Loss,,,7600 MONROE AVE,KANSAS CITY,64128.0,MPD,False,VIC,B,M,28.0,False,POINT (-94.540035187 39.073080307)
4,KC23001555,2023-01-07,23:10,Property Damage,120,Robbery,3000 VAN BRUNT BLVD,KANSAS CITY,64128.0,EPD,False,VIC,,,,True,POINT (-94.520866347 39.070814751)


In [49]:
KC_crimes.Reported_Date.dtype

dtype('<M8[ns]')

In [66]:
KC_crimes.Reported_Date.tail(20)

75272   2023-06-30
75273   2023-08-01
75274   2023-07-25
75275   2023-08-12
75276   2023-08-18
75277   2023-06-10
75278   2023-07-07
75279   2023-08-05
75280   2023-06-29
75281   2023-06-26
75282   2023-07-30
75283   2023-06-19
75284   2023-06-23
75285   2023-06-15
75286   2023-07-22
75287   2023-06-08
75288   2023-07-25
75289   2023-08-09
75290   2023-09-08
75291   2023-07-27
Name: Reported_Date, dtype: datetime64[ns]

In [69]:
# Sort Dataframe by Reported_Date
KC_crimes.sort_values(by = ["Reported_Date"], inplace=True, ascending=True)
KC_crimes.Reported_Date.tail(100)


70092   2023-09-10
55584   2023-09-10
70071   2023-09-10
65769   2023-09-10
60680   2023-09-10
           ...    
56026   2023-09-10
69794   2023-09-10
61390   2023-09-10
71162   2023-09-10
61909   2023-09-10
Name: Reported_Date, Length: 100, dtype: datetime64[ns]

In [73]:
# Make a copy of the original DataFrame to perform edits on 
KC_crimes_tmp = KC_crimes.copy()

### Add datetime parameter for Reported_Date column 

In [80]:
KC_crimes_tmp.Reported_Date.dt.year

0        2023
3862     2023
3905     2023
9275     2023
16224    2023
         ... 
56026    2023
69794    2023
61390    2023
71162    2023
61909    2023
Name: Reported_Date, Length: 75292, dtype: int64

In [82]:
# Find the columns which contain strings 
for label, content in KC_crimes_tmp.items():
    if pd.api.types.is_string_dtype(content):
        print(label)

Report_No
Reported_Time
Offense
IBRS
Description
Address
City
Area
Involvement
Race
Sex
Location
