In [1]:
import pandas as pd

### Request relevant columns from dataset:

In [2]:
original = pd.read_csv("../data/lrpd.csv")

requested_columns = ["INCIDENT_DATE", "INCIDENT_NUMBER", "LOCATION_DISTRICT", "OFFENSE_DESCRIPTION", "WEAPON_TYPE", "ZIP", "LATITUDE", "LONGITUDE"]
lrpd = pd.read_csv("../data/lrpd.csv", usecols=requested_columns)
lrpd.set_index("INCIDENT_NUMBER", inplace=True)

original.shape

(88202, 14)

### Drop duplicate rows:

In [20]:
prev_shape = lrpd.shape
lrpd = lrpd.drop_duplicates(keep='last')
new_shape = lrpd.shape

print(f'Dropped {(prev_shape[0] - new_shape[0])} duplicates!')

lrpd.shape

Dropped 0 duplicates!


(82691, 15)

### Fill missing values with Number 0:

In [4]:
lrpd = lrpd.fillna(0)

### Parse Number 0 and String 1 to UNKNOWN weapon type:

In [5]:
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"].replace(0, "NO WEAPON")
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"].replace("1", "UNKNOWN")

In [6]:
lrpd.head()

Unnamed: 0_level_0,INCIDENT_DATE,LOCATION_DISTRICT,OFFENSE_DESCRIPTION,WEAPON_TYPE,ZIP,LATITUDE,LONGITUDE
INCIDENT_NUMBER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-092971,08/04/2021 10:21:00 AM,72.0,THEFT FROM MOTOR VEHICLE,NO WEAPON,72204.0,0.0,0.0
2017-029450,03/16/2017 06:30:00 PM,82.0,THEFT OF MOTOR VEHICLE PARTS,NO WEAPON,72206.0,34.649362,-92.301959
2017-156453,09/01/2017 12:00:00 PM,53.0,RAPE,NO WEAPON,0.0,0.0,0.0
2019-130717,10/17/2019 09:00:00 PM,51.0,RAPE,NO WEAPON,0.0,0.0,0.0
2020-073313,07/02/2020 07:10:00 PM,50.0,RAPE,NO WEAPON,0.0,0.0,0.0


### Convert columns to correct types:

In [7]:
lrpd["ZIP"] = pd.to_numeric(lrpd["ZIP"])
lrpd["LATITUDE"] = pd.to_numeric(lrpd["LATITUDE"])
lrpd["LONGITUDE"] = pd.to_numeric(lrpd["LONGITUDE"])
lrpd["INCIDENT_DATE"] = pd.to_datetime(lrpd["INCIDENT_DATE"])
lrpd["LOCATION_DISTRICT"] = pd.to_numeric(lrpd["LOCATION_DISTRICT"])

lrpd.head().T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0


In [8]:
lrpd["WEEK_OF_MONTH"] = lrpd["INCIDENT_DATE"].dt.day // 7
lrpd["YEAR"] = lrpd["INCIDENT_DATE"].dt.year
lrpd["DAY"] = lrpd["INCIDENT_DATE"].dt.day
lrpd["DAY_OF_YEAR"] = lrpd["INCIDENT_DATE"].dt.dayofyear
lrpd["MONTH"] = lrpd["INCIDENT_DATE"].dt.month

lrpd.head().T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0
WEEK_OF_MONTH,0,2,0,2,0
YEAR,2021,2017,2017,2019,2020
DAY,4,16,1,17,2


### Crime Risk Analysis

In [9]:
all_crimes = lrpd["OFFENSE_DESCRIPTION"].unique()
all_weapons = lrpd["WEAPON_TYPE"].unique()

In [10]:
violent_crimes = ['RAPE', 'AGGRAVATED ASSAULT', 'ALL OTHER LARCENY', 'ROBBERY', 'BURGLARY/B&E',
                  'MURDER & NONNEGLIGENT MANSLAUGHTER']
nonviolent_crimes = ['THEFT FROM MOTOR VEHICLE', 'MOTOR VEHICLE THEFT', 'THEFT OF MOTOR VEHICLE PARTS',
                     'SHOPLIFTING', 'THEFT FROM BUILDING', 'POCKET-PICKING', 'THEFT FROM COIN-OPERATED MACHINE',
                     'PURSE-SNATCHING']

In [11]:
def determine_crime_type(crime):
    if crime in violent_crimes:
        return 'Violent Crime'
    elif crime in nonviolent_crimes:
        return 'Non-Violent Crime'
    else:
        return 'Crime Type Unknown'

In [12]:
def determine_risk_type(crime):
    print(crime)

In [14]:
lrpd["CRIME_TYPE"] = lrpd["OFFENSE_DESCRIPTION"].apply(determine_crime_type)

In [15]:
# Risk ASSESSEMENT
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Violent Crime') | (
        lrpd['WEAPON_TYPE'] != 'NO WEAPON'), "RISK_TYPE"] = "High Risk"
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Violent Crime') & (
        lrpd['CRIME_TYPE'] != 'NO WEAPON'), "RISK_TYPE"] = "High Risk"
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Non-Violent Crime') & (
        lrpd['WEAPON_TYPE'] != 'NO WEAPON'), "RISK_TYPE"] = "High Risk"
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Non-Violent Crime') & (
        lrpd['WEAPON_TYPE'] == 'NO WEAPON'), "RISK_TYPE"] = "Low Risk"

lrpd[:20].T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313,2020-103105,2020-127857,2021-002311,2021-113605,2021-135637,2022-014768,2022-301030,2022-302467,2021-027276,2020-121047,2021-107177,2018-126330,2017-158478,2017-012201,2017-122878
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00,2020-09-05 23:29:00,2020-11-01 00:09:00,2021-01-10 22:08:00,2021-09-16 14:37:00,2021-11-04 01:22:00,2022-02-07 10:58:00,2022-04-22 20:30:00,2022-09-03 16:45:00,2021-03-17 18:12:00,2020-10-16 12:23:00,2021-09-02 18:35:00,2018-10-03 12:44:00,2017-12-17 20:00:00,2017-02-01 07:10:00,2017-10-01 19:32:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0,40.0,54.0,71.0,92.0,80.0,80.0,62.0,39.0,82.0,93.0,83.0,92.0,91.0,80.0,90.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE,RAPE,RAPE,RAPE,RAPE,ALL OTHER LARCENY,RAPE,ALL OTHER LARCENY,SHOPLIFTING,THEFT OF MOTOR VEHICLE PARTS,ALL OTHER LARCENY,ROBBERY,ROBBERY,THEFT OF MOTOR VEHICLE PARTS,BURGLARY/B&E,THEFT FROM BUILDING
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",UNKNOWN,UNKNOWN,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",NO WEAPON,UNKNOWN,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,FIREARM,KNIFE/CUTTING INSTRUMENT,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72211.0,72202.0,72209.0,72210.0,72209.0,72103.0,72209.0,72209.0,72209.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34.673893,34.652869,34.667065,34.627232,34.673161,34.692613,34.679524
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-92.334594,-92.43048,-92.35237,-92.390674,-92.372574,-92.335765,-92.353965
WEEK_OF_MONTH,0,2,0,2,0,0,0,1,2,0,1,3,0,2,2,0,0,2,0,0
YEAR,2021,2017,2017,2019,2020,2020,2020,2021,2021,2021,2022,2022,2022,2021,2020,2021,2018,2017,2017,2017
DAY,4,16,1,17,2,5,1,10,16,4,7,22,3,17,16,2,3,17,1,1


### Binary Classification

In [17]:
lrpd['RISK_TYPE_BC'] = lrpd["RISK_TYPE"].map({'High Risk': 1, 'Low Risk': 0})
lrpd[:20].T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313,2020-103105,2020-127857,2021-002311,2021-113605,2021-135637,2022-014768,2022-301030,2022-302467,2021-027276,2020-121047,2021-107177,2018-126330,2017-158478,2017-012201,2017-122878
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00,2020-09-05 23:29:00,2020-11-01 00:09:00,2021-01-10 22:08:00,2021-09-16 14:37:00,2021-11-04 01:22:00,2022-02-07 10:58:00,2022-04-22 20:30:00,2022-09-03 16:45:00,2021-03-17 18:12:00,2020-10-16 12:23:00,2021-09-02 18:35:00,2018-10-03 12:44:00,2017-12-17 20:00:00,2017-02-01 07:10:00,2017-10-01 19:32:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0,40.0,54.0,71.0,92.0,80.0,80.0,62.0,39.0,82.0,93.0,83.0,92.0,91.0,80.0,90.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE,RAPE,RAPE,RAPE,RAPE,ALL OTHER LARCENY,RAPE,ALL OTHER LARCENY,SHOPLIFTING,THEFT OF MOTOR VEHICLE PARTS,ALL OTHER LARCENY,ROBBERY,ROBBERY,THEFT OF MOTOR VEHICLE PARTS,BURGLARY/B&E,THEFT FROM BUILDING
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",UNKNOWN,UNKNOWN,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",NO WEAPON,UNKNOWN,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,FIREARM,KNIFE/CUTTING INSTRUMENT,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72211.0,72202.0,72209.0,72210.0,72209.0,72103.0,72209.0,72209.0,72209.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34.673893,34.652869,34.667065,34.627232,34.673161,34.692613,34.679524
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-92.334594,-92.43048,-92.35237,-92.390674,-92.372574,-92.335765,-92.353965
WEEK_OF_MONTH,0,2,0,2,0,0,0,1,2,0,1,3,0,2,2,0,0,2,0,0
YEAR,2021,2017,2017,2019,2020,2020,2020,2021,2021,2021,2022,2022,2022,2021,2020,2021,2018,2017,2017,2017
DAY,4,16,1,17,2,5,1,10,16,4,7,22,3,17,16,2,3,17,1,1


### Saving Changes

In [18]:
# Save the cleaned dataset.
lrpd.to_csv("../data/lrpd-clean.csv")

In [19]:
lrpd.shape

(82691, 15)