# Police Use Of Force Script

## Import Dependencies

In [1]:
import pandas as pd
import datetime as dt
import numpy as np

## Load Source Data CSV

In [2]:
#load csv into dataframe
load_csv = ("../source_files/Police_Use_of_Force_Source_Data.csv")

## Read In Source Data CSV
#### Produce Head Of DataFrame

In [3]:
source_data_df = pd.read_csv(load_csv)
source_data_df.head()

FileNotFoundError: [Errno 2] File ../Source_Data/Police_Use_of_Force_Source_Data.csv does not exist: '../Source_Data/Police_Use_of_Force_Source_Data.csv'

## Drop Unwanted Fields From DataFrame
#### Produce Head Of DataFrame

In [None]:
edited_source_data_df = source_data_df.drop(columns = {"X", "Y", "Is911Call", "ForceReportNumber",
                                  "SubjectRoleNumber", "TotalCityCallsForYear",
                                  "TotalPrecinctCallsForYear", "TotalNeighborhoodCallsForYear",
                                  "CenterGBSID", "CenterLatitude", "CenterLongitude",
                                  "CenterX", "CenterY", "DateAdded", "OBJECTID"})

edited_source_data_df.head(5)

## Rename Fields To Match ERD
#### Produce Head Of DataFrame

In [None]:
renamed_columns_df = edited_source_data_df.rename(columns={'PoliceUseOfForceID': 'police_use_of_force_id',
                                                          'CaseNumber': 'case_number', 'ResponseDate': 'response_date',
                                                          'PrimaryOffense': 'primary_offense', 'SubjectInjury': 'subject_injury',
                                                          'SubjectRole': 'subject_role', 'ForceType': 'police_use_of_force_type',
                                                          'ForceTypeAction': 'force_type_action', 'Race': 'subject_race', 
                                                          'Sex': 'subject_sex', 'EventAge': 'subject_age', 'TypeOfResistance': 'type_of_resistance',
                                                          'Problem': 'problem', 'Precinct': 'precinct', 'Neighborhood': 'neighborhood'})

renamed_columns_df.head(5)

## Check If response_date is datetime

In [None]:
# Use .dtypes to figure out if response_date is datetime
renamed_columns_df.dtypes

## Load MLS_Neighborhood.csv

In [None]:
# Load and read in MLS_Neighborhood.csv
load_second_csv = ("../target_files/MLS_Neighborhoods.csv")
nbhd_keys_df = pd.read_csv(load_second_csv)
nbhd_keys_df.head(5)

## Change Field Name From New DataFrame To Match First DataFrame

In [None]:
# Use rename function to change the field name that is name to neighborhood.
nbhd_keys_df_edited = nbhd_keys_df.rename(columns= ({'name': 'neighborhood'}))

In [None]:
# Verify field name changed to neighborhood
nbhd_keys_df_edited.head(5)

## Adjust Punctuation And Spelling Of Neighborhood Name From First DB To Match New DF

In [None]:
# Use replace function to change the name of the ten neighborhoods in the first DF that don't match the second DF
renamed_columns_df = renamed_columns_df.replace(to_replace = "St. Anthony West", value ="St Anthony West")
renamed_columns_df = renamed_columns_df.replace(to_replace = "St. Anthony East", value ="St Anthony East")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Humboldt Industrial Area", value ="Humboldt Indust Area")
renamed_columns_df = renamed_columns_df.replace(to_replace = "CARAG", value ="South Uptown")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Bryn - Mawr", value ="Bryn Mawr")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Columbia Park", value ="Columbia")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Mid - City Industrial", value ="Mid City Industrial Ar")
renamed_columns_df = renamed_columns_df.replace(to_replace = "Nicollet Island - East Bank", value ="Nicollet Is/East Bank")
renamed_columns_df = renamed_columns_df.replace(to_replace = "University of Minnesota", value ="University")
renamed_columns_df = renamed_columns_df.replace(to_replace = "McKinley", value ="Mckinley")


In [None]:
# Produce DF to verify neighborhood names have been changed.
renamed_columns_df

## Merge The Two DataFrames

In [None]:
# Use the merge function to merge the two DataFrames and produce head.
merged_df_x = pd.merge(renamed_columns_df, nbhd_keys_df_edited, on='neighborhood', how= 'left')
merged_df_x.head(5)

## Rearrange Fields To Match ERD

In [None]:
# Use double brackets and tic marks to rearrange fields and produce head of DF
rearranged_df = merged_df_x[['police_use_of_force_id','response_date',
                             'case_number','problem','subject_race',
                             'subject_sex', 'subject_age',
                             'subject_role', 'primary_offense',
                             'type_of_resistance', 'police_use_of_force_type',
                             'force_type_action', 'subject_injury', 
                             'neighborhood_id', 'neighborhood', 'precinct']]

rearranged_df.head()

## Check The Type Of Data In subject_age field

In [None]:
# Check if subject_age field is int by use of .dtypes
rearranged_df.dtypes

## Change NaN(s) To Zero In subject_age Field To Allow For Conversion To Int

In [None]:
# Change all NaN(s) Within The subject_age field to 0 via fillna
rearranged_df["subject_age"].fillna("0", inplace = True) 

In [None]:
# Confirm NaN(s) have been changed to 0 by producing df
rearranged_df

## Convert subject_age Field To int64 From float64

In [None]:
# Convert subject_age field to int64 via astype function 
rearranged_df['subject_age'] = rearranged_df['subject_age'].astype(np.int64)

In [None]:
# Confirm task from cell above was completed via .dtypes
rearranged_df.dtypes

## Change NaN(s) To Zero In neighborhood_id Field To Allow For Conversion To Int

In [None]:
# Change all NaN(s) Within The neighborhood_id field to 0 via fillna
rearranged_df["neighborhood_id"].fillna("0", inplace = True) 

In [None]:
# Confirm NaN(s) have been changed to 0 by producing df
rearranged_df

## Convert neighborhood_id Field To int64 From float64

In [None]:
# Convert  neighborhood_id field to int64 via astype function
rearranged_df['neighborhood_id'] = rearranged_df['neighborhood_id'].astype(np.int64)

In [None]:
# Check if neighborhood_id field is int64
rearranged_df.dtypes

## Rename Final DF

In [None]:
# Rename final df to something clearer
police_use_of_force_final_df = rearranged_df

## Export Newest DF As CSV To target_files Folder

In [None]:
# Export useing to_csv function
police_use_of_force_final_df.to_csv("../target_files/MLS_Police_Use_Of_Force_Cleaned_Data.csv", index=False, header=True)