In [115]:
import pandas as pd

### Read in the original DataSet for comparsion:

In [116]:
original = pd.read_csv("../data/lrpd.csv")
print(f"Successfully read in Original Little Rock Crime Data. DataFrame Size: {original.shape[0]} rows x {original.shape[1]} cols")

Successfully read in Original Little Rock Crime Data. DataFrame Size: 88202 rows x 14 cols


### Request relevant columns from dataset:

In [117]:
requested_columns = ["INCIDENT_DATE", "INCIDENT_NUMBER", "LOCATION_DISTRICT", "OFFENSE_DESCRIPTION", "WEAPON_TYPE", "ZIP", "LATITUDE", "LONGITUDE"]

lrpd = pd.read_csv("../data/lrpd.csv", usecols=requested_columns, index_col="INCIDENT_NUMBER")

print(f"Successfully read in Little Rock Crime Data. DataFrame Size: {lrpd.shape[0]} rows x {lrpd.shape[1]} cols")

Successfully read in Little Rock Crime Data. DataFrame Size: 88202 rows x 7 cols


### Drop duplicate rows based on index values:

In [118]:
prev_shape = lrpd.shape
lrpd = lrpd.drop_duplicates(keep='first')
new_shape = lrpd.shape

print(f'Dropped {(prev_shape[0] - new_shape[0])} duplicates!')
print(f'Little Rock Crime Data DataFrame Size: {lrpd.shape[0]} rows x {lrpd.shape[1]} cols')

Dropped 5511 duplicates!
Little Rock Crime Data DataFrame Size: 82691 rows x 7 cols


### Fill missing values with Number 0:

In [119]:
lrpd = lrpd.fillna(0)

### Parse Number 0 and String 1 to weapon type:

In [120]:
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"].replace(0, "NO WEAPON")
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"].replace("1", "UNKNOWN")

### Convert columns to correct types:

In [121]:
lrpd["INCIDENT_DATE"] = pd.to_datetime(lrpd["INCIDENT_DATE"])
lrpd["LOCATION_DISTRICT"] = pd.to_numeric(lrpd["LOCATION_DISTRICT"])
lrpd["OFFENSE_DESCRIPTION"] = lrpd["OFFENSE_DESCRIPTION"] # todo: ensure this is parsed as a string
lrpd["WEAPON_TYPE"] = lrpd["WEAPON_TYPE"] # todo: ensure this is parsed as a string
lrpd["ZIP"] = pd.to_numeric(lrpd["ZIP"])
lrpd["LATITUDE"] = pd.to_numeric(lrpd["LATITUDE"])
lrpd["LONGITUDE"] = pd.to_numeric(lrpd["LONGITUDE"])

lrpd.head().T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0


### Appending new columns to DataFrame:

In [122]:
lrpd["WEEK_OF_MONTH"] = lrpd["INCIDENT_DATE"].dt.day // 7
lrpd["YEAR"] = lrpd["INCIDENT_DATE"].dt.year
lrpd["DAY"] = lrpd["INCIDENT_DATE"].dt.day
lrpd["DAY_OF_YEAR"] = lrpd["INCIDENT_DATE"].dt.dayofyear
lrpd["MONTH"] = lrpd["INCIDENT_DATE"].dt.month

### Visualizing current modifications:

In [123]:
lrpd.head().T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0
WEEK_OF_MONTH,0,2,0,2,0
YEAR,2021,2017,2017,2019,2020
DAY,4,16,1,17,2


In [124]:
lrpd.tail().T

INCIDENT_NUMBER,2018-066314,2020-040599,2020-091056,2021-068606,2017-123107
INCIDENT_DATE,2018-05-31 17:50:00,2020-04-14 07:30:00,2020-08-10 11:43:00,2021-06-15 14:38:00,2017-10-02 10:10:00
LOCATION_DISTRICT,70.0,60.0,71.0,71.0,82.0
OFFENSE_DESCRIPTION,THEFT FROM BUILDING,THEFT FROM MOTOR VEHICLE,AGGRAVATED ASSAULT,THEFT FROM MOTOR VEHICLE,THEFT FROM BUILDING
WEAPON_TYPE,NO WEAPON,NO WEAPON,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",NO WEAPON,NO WEAPON
ZIP,72211.0,72207.0,72207.0,72211.0,72202.0
LATITUDE,34.767624,0.0,34.767513,34.758569,34.765292
LONGITUDE,-92.395836,0.0,-92.350911,-92.419065,-92.310104
WEEK_OF_MONTH,4,2,1,2,0
YEAR,2018,2020,2020,2021,2017
DAY,31,14,10,15,2


### Determining crime risk and violence level:

In [125]:
all_crimes = lrpd["OFFENSE_DESCRIPTION"].unique()
all_weapons = lrpd["WEAPON_TYPE"].unique()

Pre-defining which crimes are considered violent and nonviolent:

In [126]:
violent_crimes = ['RAPE', 'AGGRAVATED ASSAULT', 'ALL OTHER LARCENY', 'ROBBERY', 'BURGLARY/B&E',
                  'MURDER & NONNEGLIGENT MANSLAUGHTER']
nonviolent_crimes = ['THEFT FROM MOTOR VEHICLE', 'MOTOR VEHICLE THEFT', 'THEFT OF MOTOR VEHICLE PARTS',
                     'SHOPLIFTING', 'THEFT FROM BUILDING', 'POCKET-PICKING', 'THEFT FROM COIN-OPERATED MACHINE',
                     'PURSE-SNATCHING']

In [127]:
def determine_crime_type(crime):
    if crime in violent_crimes:
        return 'Violent Crime'
    elif crime in nonviolent_crimes:
        return 'Non-Violent Crime'
    else:
        return 'Crime Type Unknown'

In [128]:
def determine_risk_type(crime):
    print(crime)

In [129]:
lrpd["CRIME_TYPE"] = lrpd["OFFENSE_DESCRIPTION"].apply(determine_crime_type)

In [130]:
# Risk ASSESSEMENT
lrpd.loc[(lrpd['CRIME_TYPE'] == 'Violent Crime') | (
        lrpd['WEAPON_TYPE'] != 'NO WEAPON'), "RISK_TYPE"] = "High Risk"

lrpd.loc[(lrpd['CRIME_TYPE'] == 'Violent Crime') & (
        lrpd['CRIME_TYPE'] != 'NO WEAPON'), "RISK_TYPE"] = "High Risk"

lrpd.loc[(lrpd['CRIME_TYPE'] == 'Non-Violent Crime') & (
        lrpd['WEAPON_TYPE'] != 'NO WEAPON'), "RISK_TYPE"] = "High Risk"

lrpd.loc[(lrpd['CRIME_TYPE'] == 'Non-Violent Crime') & (
        lrpd['WEAPON_TYPE'] == 'NO WEAPON'), "RISK_TYPE"] = "Low Risk"

In [131]:
lrpd['RISK_TYPE_BC'] = lrpd["RISK_TYPE"].map({'High Risk': 1, 'Low Risk': 0})

### Visualizing current modifications:

In [132]:
lrpd.head().T

INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0
WEEK_OF_MONTH,0,2,0,2,0
YEAR,2021,2017,2017,2019,2020
DAY,4,16,1,17,2


In [133]:
lrpd.tail().T

INCIDENT_NUMBER,2018-066314,2020-040599,2020-091056,2021-068606,2017-123107
INCIDENT_DATE,2018-05-31 17:50:00,2020-04-14 07:30:00,2020-08-10 11:43:00,2021-06-15 14:38:00,2017-10-02 10:10:00
LOCATION_DISTRICT,70.0,60.0,71.0,71.0,82.0
OFFENSE_DESCRIPTION,THEFT FROM BUILDING,THEFT FROM MOTOR VEHICLE,AGGRAVATED ASSAULT,THEFT FROM MOTOR VEHICLE,THEFT FROM BUILDING
WEAPON_TYPE,NO WEAPON,NO WEAPON,"PERSONAL WEAPONS (HANDS, FISTS, ETC)",NO WEAPON,NO WEAPON
ZIP,72211.0,72207.0,72207.0,72211.0,72202.0
LATITUDE,34.767624,0.0,34.767513,34.758569,34.765292
LONGITUDE,-92.395836,0.0,-92.350911,-92.419065,-92.310104
WEEK_OF_MONTH,4,2,1,2,0
YEAR,2018,2020,2020,2021,2017
DAY,31,14,10,15,2


### Saving the modified DataSet to a new CSV:

In [134]:
lrpd.to_csv("../data/lrpd-clean.csv")

In [135]:
lrpd.shape

(82691, 15)