In [223]:
import pandas as pd

### Request relevant columns from dataset:

In [224]:
requested_columns = ["INCIDENT_DATE", "INCIDENT_NUMBER", "LOCATION_DISTRICT", "OFFENSE_DESCRIPTION", "WEAPON_TYPE", "INCIDENT_LOCATION", "ZIP", "LATITUDE", "LONGITUDE"]
lrpd = pd.read_csv("../data/lrpd.csv", usecols=requested_columns)
lrpd.set_index("INCIDENT_NUMBER", inplace=True)

### Drop duplicate rows:

In [225]:
prev_shape = lrpd.shape
lrpd = lrpd.drop_duplicates(keep='last')
new_shape = lrpd.shape

print(f'Dropped {(prev_shape[0] - new_shape[0])} duplicates!')

Dropped 5501 duplicates!


### Fill missing values with Number 0:

In [226]:
lrpd = lrpd.fillna(0)

### Parse Number 0 and String 1 to UNKNOWN weapon type:

In [227]:
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"].replace(0, "UNKNOWN")
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"].replace("1", "UNKNOWN")

### Convert columns to correct types:

In [228]:
lrpd["ZIP"] = pd.to_numeric(lrpd["ZIP"])
lrpd["LATITUDE"] = pd.to_numeric(lrpd["LATITUDE"])
lrpd["LONGITUDE"] = pd.to_numeric(lrpd["LONGITUDE"])
lrpd["INCIDENT_DATE"] = pd.to_datetime(lrpd["INCIDENT_DATE"])
lrpd["LOCATION_DISTRICT"] = pd.to_numeric(lrpd["LOCATION_DISTRICT"])

lrpd.head()

Unnamed: 0_level_0,INCIDENT_DATE,LOCATION_DISTRICT,OFFENSE_DESCRIPTION,WEAPON_TYPE,INCIDENT_LOCATION,ZIP,LATITUDE,LONGITUDE
INCIDENT_NUMBER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-092971,2021-08-04 10:21:00,72.0,THEFT FROM MOTOR VEHICLE,UNKNOWN,15601 KANIS RD,72204.0,0.0,0.0
2017-029450,2017-03-16 18:30:00,82.0,THEFT OF MOTOR VEHICLE PARTS,UNKNOWN,10801 IRONTON CUTOFF RD,72206.0,34.649362,-92.301959
2017-156453,2017-09-01 12:00:00,53.0,RAPE,UNKNOWN,0,0.0,0.0,0.0
2019-130717,2019-10-17 21:00:00,51.0,RAPE,UNKNOWN,0,0.0,0.0,0.0
2020-073313,2020-07-02 19:10:00,50.0,RAPE,UNKNOWN,0,0.0,0.0,0.0


In [229]:
all_crimes = lrpd["OFFENSE_DESCRIPTION"].unique()
all_weapons = lrpd["WEAPON_TYPE"].unique()

In [230]:
violent_crimes = ['RAPE', 'AGGRAVATED ASSAULT', 'ALL OTHER LARCENY', 'ROBBERY', 'BURGLARY/B&E',
                  'MURDER & NONNEGLIGENT MANSLAUGHTER']
nonviolent_crimes = ['THEFT FROM MOTOR VEHICLE', 'MOTOR VEHICLE THEFT', 'THEFT OF MOTOR VEHICLE PARTS',
                     'SHOPLIFTING', 'THEFT FROM BUILDING', 'POCKET-PICKING', 'THEFT FROM COIN-OPERATED MACHINE',
                     'PURSE-SNATCHING']

In [231]:
def determine_crime_type(crime):
    if crime in violent_crimes:
        return 'Violent Crime'
    elif crime in nonviolent_crimes:
        return 'Non-Violent Crime'
    else:
        return 'Crime Type Unknown'

In [232]:
def determine_risk_type(crime):
    pass

In [233]:
lrpd["CRIME_TYPE"] = lrpd["OFFENSE_DESCRIPTION"].apply(determine_crime_type)

In [234]:
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Violent Crime') | (
        lrpd['WEAPON_TYPE'] != 'UNKNOWN'), "RISK_TYPE"] = "High Risk"
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Violent Crime') & (
        lrpd['WEAPON_TYPE'] != 'UNKNOWN'), "RISK_TYPE"] = "High Risk"
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Non-Violent Crime') & (
        lrpd['WEAPON_TYPE'] != 'UNKNOWN'), "RISK_TYPE"] = "High Risk"
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Non-Violent Crime') & (
        lrpd['WEAPON_TYPE'] == 'UNKNOWN'), "RISK_TYPE"] = "Low Risk"

In [235]:
lrpd

Unnamed: 0_level_0,INCIDENT_DATE,LOCATION_DISTRICT,OFFENSE_DESCRIPTION,WEAPON_TYPE,INCIDENT_LOCATION,ZIP,LATITUDE,LONGITUDE,CRIME_TYPE,RISK_TYPE
INCIDENT_NUMBER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-092971,2021-08-04 10:21:00,72.0,THEFT FROM MOTOR VEHICLE,UNKNOWN,15601 KANIS RD,72204.0,0.000000,0.000000,Non-Violent Crime,Low Risk
2017-029450,2017-03-16 18:30:00,82.0,THEFT OF MOTOR VEHICLE PARTS,UNKNOWN,10801 IRONTON CUTOFF RD,72206.0,34.649362,-92.301959,Non-Violent Crime,Low Risk
2017-156453,2017-09-01 12:00:00,53.0,RAPE,UNKNOWN,0,0.0,0.000000,0.000000,Violent Crime,High Risk
2019-130717,2019-10-17 21:00:00,51.0,RAPE,UNKNOWN,0,0.0,0.000000,0.000000,Violent Crime,High Risk
2020-073313,2020-07-02 19:10:00,50.0,RAPE,UNKNOWN,0,0.0,0.000000,0.000000,Violent Crime,High Risk
...,...,...,...,...,...,...,...,...,...,...
2018-066314,2018-05-31 17:50:00,70.0,THEFT FROM BUILDING,UNKNOWN,1601 N SHACKLEFORD RD,72211.0,34.767624,-92.395836,Non-Violent Crime,Low Risk
2020-040599,2020-04-14 07:30:00,60.0,THEFT FROM MOTOR VEHICLE,UNKNOWN,225 KEIGHTLEY DR,72207.0,0.000000,0.000000,Non-Violent Crime,Low Risk
2020-091056,2020-08-10 11:43:00,71.0,AGGRAVATED ASSAULT,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",501 NAPA VALLEY DR,72207.0,34.767513,-92.350911,Violent Crime,High Risk
2021-068606,2021-06-15 14:38:00,71.0,THEFT FROM MOTOR VEHICLE,UNKNOWN,13121 SAINT CHARLES BLVD,72211.0,34.758569,-92.419065,Non-Violent Crime,Low Risk


In [236]:
# Save the cleaned dataset.
lrpd.to_csv("../data/lrpd-clean.csv")