# Police Use Of Force Script

## Import Dependencies

In [1]:
import pandas as pd
import datetime as dt
import numpy as np

## Load Source Data CSV

In [2]:
#load csv into dataframe
load_csv = ("../Source_Data/Police_Use_of_Force_Source_Data.csv")

## Read In Source Data CSV
#### Produce Head Of DataFrame

In [3]:
source_data_df = pd.read_csv(load_csv)
source_data_df.head()

Unnamed: 0,X,Y,PoliceUseOfForceID,CaseNumber,ResponseDate,Problem,Is911Call,PrimaryOffense,SubjectInjury,ForceReportNumber,...,TotalCityCallsForYear,TotalPrecinctCallsForYear,TotalNeighborhoodCallsForYear,CenterGBSID,CenterLatitude,CenterLongitude,CenterX,CenterY,DateAdded,OBJECTID
0,-93.291183,45.018526,11224233,08-005343,2008/01/05 22:57:23+00,Suspicious Person,No,FLEEFT,Yes,1,...,322402,80434.0,4226,18266,45.018526,-93.291183,-10385126.96,5624438.454,2020/06/02 08:18:33+00,1
1,-93.265782,44.943185,11224234,08-010291,2008/01/10 22:21:53+00,Motor Vehicle Chase,No,FLEE,Yes,1,...,322402,84018.0,10923,11907,44.943185,-93.265782,-10382299.34,5612581.534,2020/06/02 08:18:33+00,2
2,-93.268029,44.979702,11224235,08-024156,2008/01/25 18:04:32+00,Disturbance,No,DISCON,No,1,...,322402,46998.0,23458,18612,44.979702,-93.268029,-10382549.52,5618326.52,2020/06/02 08:18:33+00,3
3,-93.271954,44.982469,11224236,08-025819,2008/01/27 02:44:26+00,Disturbance,No,DISCON,,1,...,322402,46998.0,23458,17873,44.982469,-93.271954,-10382986.45,5618762.022,2020/06/02 08:18:33+00,4
4,-93.277025,44.978019,11224237,08-039928,2008/02/10 04:54:24+00,Traffic Law Enforcement,No,FLEE,Yes,3,...,322402,46998.0,23458,25506,44.978019,-93.277025,-10383550.97,5618061.726,2020/06/02 08:18:33+00,5


## Drop Unwanted Fields From DataFrame
#### Produce Head Of DataFrame

In [4]:
edited_source_data_df = source_data_df.drop(columns = {"X", "Y", "Is911Call", "ForceReportNumber",
                                  "SubjectRoleNumber", "TotalCityCallsForYear",
                                  "TotalPrecinctCallsForYear", "TotalNeighborhoodCallsForYear",
                                  "CenterGBSID", "CenterLatitude", "CenterLongitude",
                                  "CenterX", "CenterY", "DateAdded", "OBJECTID"})

edited_source_data_df.head(5)

Unnamed: 0,PoliceUseOfForceID,CaseNumber,ResponseDate,Problem,PrimaryOffense,SubjectInjury,SubjectRole,ForceType,ForceTypeAction,Race,Sex,EventAge,TypeOfResistance,Precinct,Neighborhood
0,11224233,08-005343,2008/01/05 22:57:23+00,Suspicious Person,FLEEFT,Yes,A,Bodily Force,Punches,Black,Male,29.0,Fled on Foot,4,McKinley
1,11224234,08-010291,2008/01/10 22:21:53+00,Motor Vehicle Chase,FLEE,Yes,A,Bodily Force,Punches,White,Male,22.0,Fled in Vehicle,3,Central
2,11224235,08-024156,2008/01/25 18:04:32+00,Disturbance,DISCON,No,A,Taser,Firing Darts,Black,Male,40.0,Commission of Crime,1,Downtown West
3,11224236,08-025819,2008/01/27 02:44:26+00,Disturbance,DISCON,,A,Chemical Irritant,Crowd Control Mace,Black,Male,29.0,Commission of Crime,1,Downtown West
4,11224237,08-039928,2008/02/10 04:54:24+00,Traffic Law Enforcement,FLEE,Yes,A,Bodily Force,Knees,Black,Male,28.0,Fled in Vehicle,1,Downtown West


## Rename Fields To Match ERD
#### Produce Head Of DataFrame

In [5]:
renamed_columns_df = edited_source_data_df.rename(columns={'PoliceUseOfForceID': 'police_use_of_force_id',
                                                          'CaseNumber': 'case_number', 'ResponseDate': 'response_date',
                                                          'PrimaryOffense': 'primary_offense', 'SubjectInjury': 'subject_injury',
                                                          'SubjectRole': 'subject_role', 'ForceType': 'police_use_of_force_type',
                                                          'ForceTypeAction': 'force_type_action', 'Race': 'subject_race', 
                                                          'Sex': 'subject_sex', 'EventAge': 'subject_age', 'TypeOfResistance': 'type_of_resistance',
                                                          'Problem': 'problem', 'Precinct': 'precinct', 'Neighborhood': 'neighborhood'})

renamed_columns_df.head(5)

Unnamed: 0,police_use_of_force_id,case_number,response_date,problem,primary_offense,subject_injury,subject_role,police_use_of_force_type,force_type_action,subject_race,subject_sex,subject_age,type_of_resistance,precinct,neighborhood
0,11224233,08-005343,2008/01/05 22:57:23+00,Suspicious Person,FLEEFT,Yes,A,Bodily Force,Punches,Black,Male,29.0,Fled on Foot,4,McKinley
1,11224234,08-010291,2008/01/10 22:21:53+00,Motor Vehicle Chase,FLEE,Yes,A,Bodily Force,Punches,White,Male,22.0,Fled in Vehicle,3,Central
2,11224235,08-024156,2008/01/25 18:04:32+00,Disturbance,DISCON,No,A,Taser,Firing Darts,Black,Male,40.0,Commission of Crime,1,Downtown West
3,11224236,08-025819,2008/01/27 02:44:26+00,Disturbance,DISCON,,A,Chemical Irritant,Crowd Control Mace,Black,Male,29.0,Commission of Crime,1,Downtown West
4,11224237,08-039928,2008/02/10 04:54:24+00,Traffic Law Enforcement,FLEE,Yes,A,Bodily Force,Knees,Black,Male,28.0,Fled in Vehicle,1,Downtown West


## Check If response_date is datetime

In [6]:
# Use .dtypes to figure out if response_date is datetime
renamed_columns_df.dtypes

police_use_of_force_id        int64
case_number                  object
response_date                object
problem                      object
primary_offense              object
subject_injury               object
subject_role                 object
police_use_of_force_type     object
force_type_action            object
subject_race                 object
subject_sex                  object
subject_age                 float64
type_of_resistance           object
precinct                     object
neighborhood                 object
dtype: object

## Load MLS_Neighborhood.csv

In [7]:
# Load and read in MLS_Neighborhood.csv
load_second_csv = ("../target_files/MLS_Neighborhoods.csv")
nbhd_keys_df = pd.read_csv(load_second_csv)
nbhd_keys_df.head(5)

Unnamed: 0,neighborhood_id,name,community_id
0,1,Armatage,10
1,2,East Harriet,10
2,3,Fulton,10
3,4,Kenny,10
4,5,King Field,10


## Change Field Name From New DataFrame To Match First DataFrame

In [8]:
# Use rename function to change the field name that is name to neighborhood.
nbhd_keys_df_edited = nbhd_keys_df.rename(columns= ({'name': 'neighborhood'}))

In [9]:
# Verify field name changed to neighborhood
nbhd_keys_df_edited.head(5)

Unnamed: 0,neighborhood_id,neighborhood,community_id
0,1,Armatage,10
1,2,East Harriet,10
2,3,Fulton,10
3,4,Kenny,10
4,5,King Field,10


## Adjust Punctuation And Spelling Of Neighborhood Name From First DB To Match New DF

In [10]:
# Use replace function to change the name of the ten neighborhoods in the first DF that don't match the second DF
renamed_columns_df = renamed_columns_df.replace(to_replace = "St. Anthony West", value ="St Anthony West")
renamed_columns_df = renamed_columns_df.replace(to_replace = "St. Anthony East", value ="St Anthony East")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Humboldt Industrial Area", value ="Humboldt Indust Area")
renamed_columns_df = renamed_columns_df.replace(to_replace = "CARAG", value ="South Uptown")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Bryn - Mawr", value ="Bryn Mawr")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Columbia Park", value ="Columbia")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Mid - City Industrial", value ="Mid City Industrial Ar")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Nicollet Island - East Bank", value ="Nicollet Is/East Bank")
renamed_columns_df = renamed_columns_df.replace(to_replace = "University of Minnesota", value ="University")
renamed_columns_df = renamed_columns_df.replace(to_replace = "McKinley", value ="Mckinley")


In [11]:
# Produce DF to verify neighborhood names have been changed.
renamed_columns_df

Unnamed: 0,police_use_of_force_id,case_number,response_date,problem,primary_offense,subject_injury,subject_role,police_use_of_force_type,force_type_action,subject_race,subject_sex,subject_age,type_of_resistance,precinct,neighborhood
0,11224233,08-005343,2008/01/05 22:57:23+00,Suspicious Person,FLEEFT,Yes,A,Bodily Force,Punches,Black,Male,29.0,Fled on Foot,4,Mckinley
1,11224234,08-010291,2008/01/10 22:21:53+00,Motor Vehicle Chase,FLEE,Yes,A,Bodily Force,Punches,White,Male,22.0,Fled in Vehicle,3,Central
2,11224235,08-024156,2008/01/25 18:04:32+00,Disturbance,DISCON,No,A,Taser,Firing Darts,Black,Male,40.0,Commission of Crime,1,Downtown West
3,11224236,08-025819,2008/01/27 02:44:26+00,Disturbance,DISCON,,A,Chemical Irritant,Crowd Control Mace,Black,Male,29.0,Commission of Crime,1,Downtown West
4,11224237,08-039928,2008/02/10 04:54:24+00,Traffic Law Enforcement,FLEE,Yes,A,Bodily Force,Knees,Black,Male,28.0,Fled in Vehicle,1,Downtown West
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30049,11254282,20-Lynnhurst,2020/01/01 00:00:00+00,,,,,,0,,,,,,Lynnhurst
30050,11254283,12-St. Anthony East,2012/01/01 00:00:00+00,,,,,,0,,,,,,St Anthony East
30051,11254284,19-Columbia Park,2019/01/01 00:00:00+00,,,,,,0,,,,,,Columbia
30052,11254285,19-Kenwood,2019/01/01 00:00:00+00,,,,,,0,,,,,,Kenwood


## Merge The Two DataFrames

In [12]:
# Use the merge function to merge the two DataFrames and produce head.
merged_df_x = pd.merge(renamed_columns_df, nbhd_keys_df_edited, on='neighborhood', how= 'left')
merged_df_x.head(5)

Unnamed: 0,police_use_of_force_id,case_number,response_date,problem,primary_offense,subject_injury,subject_role,police_use_of_force_type,force_type_action,subject_race,subject_sex,subject_age,type_of_resistance,precinct,neighborhood,neighborhood_id,community_id
0,11224233,08-005343,2008/01/05 22:57:23+00,Suspicious Person,FLEEFT,Yes,A,Bodily Force,Punches,Black,Male,29.0,Fled on Foot,4,Mckinley,45.0,2.0
1,11224234,08-010291,2008/01/10 22:21:53+00,Motor Vehicle Chase,FLEE,Yes,A,Bodily Force,Punches,White,Male,22.0,Fled in Vehicle,3,Central,25.0,9.0
2,11224235,08-024156,2008/01/25 18:04:32+00,Disturbance,DISCON,No,A,Taser,Firing Darts,Black,Male,40.0,Commission of Crime,1,Downtown West,73.0,3.0
3,11224236,08-025819,2008/01/27 02:44:26+00,Disturbance,DISCON,,A,Chemical Irritant,Crowd Control Mace,Black,Male,29.0,Commission of Crime,1,Downtown West,73.0,3.0
4,11224237,08-039928,2008/02/10 04:54:24+00,Traffic Law Enforcement,FLEE,Yes,A,Bodily Force,Knees,Black,Male,28.0,Fled in Vehicle,1,Downtown West,73.0,3.0


## Rearrange Fields To Match ERD

In [13]:
# Use double brackets and tic marks to rearrange fields and produce head of DF
rearranged_df = merged_df_x[['police_use_of_force_id','response_date',
                             'case_number','problem','subject_race',
                             'subject_sex', 'subject_age',
                             'subject_role', 'primary_offense',
                             'type_of_resistance', 'police_use_of_force_type',
                             'force_type_action', 'subject_injury', 
                             'neighborhood_id', 'neighborhood', 'precinct']]

rearranged_df.head()

Unnamed: 0,police_use_of_force_id,response_date,case_number,problem,subject_race,subject_sex,subject_age,subject_role,primary_offense,type_of_resistance,police_use_of_force_type,force_type_action,subject_injury,neighborhood_id,neighborhood,precinct
0,11224233,2008/01/05 22:57:23+00,08-005343,Suspicious Person,Black,Male,29.0,A,FLEEFT,Fled on Foot,Bodily Force,Punches,Yes,45.0,Mckinley,4
1,11224234,2008/01/10 22:21:53+00,08-010291,Motor Vehicle Chase,White,Male,22.0,A,FLEE,Fled in Vehicle,Bodily Force,Punches,Yes,25.0,Central,3
2,11224235,2008/01/25 18:04:32+00,08-024156,Disturbance,Black,Male,40.0,A,DISCON,Commission of Crime,Taser,Firing Darts,No,73.0,Downtown West,1
3,11224236,2008/01/27 02:44:26+00,08-025819,Disturbance,Black,Male,29.0,A,DISCON,Commission of Crime,Chemical Irritant,Crowd Control Mace,,73.0,Downtown West,1
4,11224237,2008/02/10 04:54:24+00,08-039928,Traffic Law Enforcement,Black,Male,28.0,A,FLEE,Fled in Vehicle,Bodily Force,Knees,Yes,73.0,Downtown West,1


## Check The Type Of Data In subject_age field

In [14]:
# Check if subject_age field is int by use of .dtypes
rearranged_df.dtypes

police_use_of_force_id        int64
response_date                object
case_number                  object
problem                      object
subject_race                 object
subject_sex                  object
subject_age                 float64
subject_role                 object
primary_offense              object
type_of_resistance           object
police_use_of_force_type     object
force_type_action            object
subject_injury               object
neighborhood_id             float64
neighborhood                 object
precinct                     object
dtype: object

## Change NaN(s) To Zero In subject_age Field To Allow For Conversion To Int

In [15]:
# Change all NaN(s) Within The subject_age field to 0 via fillna
rearranged_df["subject_age"].fillna("0", inplace = True) 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [16]:
# Confirm NaN(s) have been changed to 0 by producing df
rearranged_df

Unnamed: 0,police_use_of_force_id,response_date,case_number,problem,subject_race,subject_sex,subject_age,subject_role,primary_offense,type_of_resistance,police_use_of_force_type,force_type_action,subject_injury,neighborhood_id,neighborhood,precinct
0,11224233,2008/01/05 22:57:23+00,08-005343,Suspicious Person,Black,Male,29,A,FLEEFT,Fled on Foot,Bodily Force,Punches,Yes,45.0,Mckinley,4
1,11224234,2008/01/10 22:21:53+00,08-010291,Motor Vehicle Chase,White,Male,22,A,FLEE,Fled in Vehicle,Bodily Force,Punches,Yes,25.0,Central,3
2,11224235,2008/01/25 18:04:32+00,08-024156,Disturbance,Black,Male,40,A,DISCON,Commission of Crime,Taser,Firing Darts,No,73.0,Downtown West,1
3,11224236,2008/01/27 02:44:26+00,08-025819,Disturbance,Black,Male,29,A,DISCON,Commission of Crime,Chemical Irritant,Crowd Control Mace,,73.0,Downtown West,1
4,11224237,2008/02/10 04:54:24+00,08-039928,Traffic Law Enforcement,Black,Male,28,A,FLEE,Fled in Vehicle,Bodily Force,Knees,Yes,73.0,Downtown West,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30049,11254282,2020/01/01 00:00:00+00,20-Lynnhurst,,,,0,,,,,0,,7.0,Lynnhurst,
30050,11254283,2012/01/01 00:00:00+00,12-St. Anthony East,,,,0,,,,,0,,19.0,St Anthony East,
30051,11254284,2019/01/01 00:00:00+00,19-Columbia Park,,,,0,,,,,0,,13.0,Columbia,
30052,11254285,2019/01/01 00:00:00+00,19-Kenwood,,,,0,,,,,0,,36.0,Kenwood,


## Convert subject_age Field To int64 From float64

In [17]:
# Convert subject_age field to int64 via astype function 
rearranged_df['subject_age'] = rearranged_df['subject_age'].astype(np.int64)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [18]:
# Confirm task from cell above was completed via .dtypes
rearranged_df.dtypes

police_use_of_force_id        int64
response_date                object
case_number                  object
problem                      object
subject_race                 object
subject_sex                  object
subject_age                   int64
subject_role                 object
primary_offense              object
type_of_resistance           object
police_use_of_force_type     object
force_type_action            object
subject_injury               object
neighborhood_id             float64
neighborhood                 object
precinct                     object
dtype: object

## Change NaN(s) To Zero In neighborhood_id Field To Allow For Conversion To Int

In [19]:
# Change all NaN(s) Within The neighborhood_id field to 0 via fillna
rearranged_df["neighborhood_id"].fillna("0", inplace = True) 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [20]:
# Confirm NaN(s) have been changed to 0 by producing df
rearranged_df

Unnamed: 0,police_use_of_force_id,response_date,case_number,problem,subject_race,subject_sex,subject_age,subject_role,primary_offense,type_of_resistance,police_use_of_force_type,force_type_action,subject_injury,neighborhood_id,neighborhood,precinct
0,11224233,2008/01/05 22:57:23+00,08-005343,Suspicious Person,Black,Male,29,A,FLEEFT,Fled on Foot,Bodily Force,Punches,Yes,45,Mckinley,4
1,11224234,2008/01/10 22:21:53+00,08-010291,Motor Vehicle Chase,White,Male,22,A,FLEE,Fled in Vehicle,Bodily Force,Punches,Yes,25,Central,3
2,11224235,2008/01/25 18:04:32+00,08-024156,Disturbance,Black,Male,40,A,DISCON,Commission of Crime,Taser,Firing Darts,No,73,Downtown West,1
3,11224236,2008/01/27 02:44:26+00,08-025819,Disturbance,Black,Male,29,A,DISCON,Commission of Crime,Chemical Irritant,Crowd Control Mace,,73,Downtown West,1
4,11224237,2008/02/10 04:54:24+00,08-039928,Traffic Law Enforcement,Black,Male,28,A,FLEE,Fled in Vehicle,Bodily Force,Knees,Yes,73,Downtown West,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30049,11254282,2020/01/01 00:00:00+00,20-Lynnhurst,,,,0,,,,,0,,7,Lynnhurst,
30050,11254283,2012/01/01 00:00:00+00,12-St. Anthony East,,,,0,,,,,0,,19,St Anthony East,
30051,11254284,2019/01/01 00:00:00+00,19-Columbia Park,,,,0,,,,,0,,13,Columbia,
30052,11254285,2019/01/01 00:00:00+00,19-Kenwood,,,,0,,,,,0,,36,Kenwood,


## Convert neighborhood_id Field To int64 From float64

In [21]:
# Convert  neighborhood_id field to int64 via astype function
rearranged_df['neighborhood_id'] = rearranged_df['neighborhood_id'].astype(np.int64)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [22]:
# Check if neighborhood_id field is int64
rearranged_df.dtypes

police_use_of_force_id       int64
response_date               object
case_number                 object
problem                     object
subject_race                object
subject_sex                 object
subject_age                  int64
subject_role                object
primary_offense             object
type_of_resistance          object
police_use_of_force_type    object
force_type_action           object
subject_injury              object
neighborhood_id              int64
neighborhood                object
precinct                    object
dtype: object

## Rename Final DF

In [23]:
# Rename final df to something clearer
police_use_of_force_final_df = rearranged_df

## Export Newest DF As CSV To target_files Folder

In [24]:
# Export useing to_csv function
police_use_of_force_final_df.to_csv("../target_files/police_use_of_force_cleaned_data.csv", index=False, header=True)