# NYPD Borough and Precinct Analysis

In [13]:
%matplotlib notebook

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import sem
from scipy.stats import linregress

# CSV path
nypd_csv = "Resources/NYPD_Arrest_Data__Year_to_Date_.csv"

#Read NYPD dataset
nypd_data = pd.read_csv(nypd_csv)
nypd_data.head()

Unnamed: 0,ARREST_KEY,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CODE,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,JURISDICTION_CODE,AGE_GROUP,PERP_SEX,PERP_RACE,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,New Georeferenced Column
0,238859078,01/09/2022,,(null),,(null),PL 2650022,M,B,49,0,25-44,M,BLACK HISPANIC,1021536,251417,40.85668,-73.865212,POINT (-73.865212 40.85668)
1,239923883,01/31/2022,,(null),,(null),CPL5700600,9,Q,113,3,25-44,M,BLACK,1046367,186986,40.679701,-73.776047,POINT (-73.77604735 40.67970059)
2,239651234,01/25/2022,153.0,RAPE 3,104.0,RAPE,PL 1302503,F,K,71,0,25-44,M,BLACK,998742,181235,40.664121,-73.947765,POINT (-73.9477648403751 40.664121282631)
3,241524225,03/03/2022,157.0,RAPE 1,104.0,RAPE,PL 1303501,F,K,84,0,18-24,M,BLACK,988902,192641,40.695439,-73.983225,POINT (-73.9832253756043 40.6954388081238)
4,241038267,02/22/2022,157.0,RAPE 1,104.0,RAPE,PL 1303501,F,B,41,0,45-64,M,BLACK,1013037,236657,40.816206,-73.896001,POINT (-73.8960011932583 40.8162058439227)


In [3]:
# Reduce the number of columns
nypd_data.drop(nypd_data.columns[[1,2,3,4,5,6,7,10,11,12,13,18]], axis=1, inplace=True)

In [4]:
# Rename columns
nypd_data_cleaned = nypd_data.rename(columns={"ARREST_BORO": "Borough", 
                                              "ARREST_PRECINCT": "Precinct Number"})

In [5]:
# Filter through data to prepare for charting
borough_groups = nypd_data_cleaned.groupby("Borough")
borough_data = borough_groups[["ARREST_KEY"]].count()
borough_data.index = ["The Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island"]
borough_data_cleaned = borough_data.rename(columns={"ARREST_KEY": "Number of Arrests"})
borough_data_sorted = borough_data_cleaned.sort_values("Number of Arrests", ascending=False)

In [6]:
precinct_groups = nypd_data_cleaned.groupby("Precinct Number")
precinct_data = precinct_groups[["ARREST_KEY"]].count()
precinct_data_cleaned = precinct_data.rename(columns={"ARREST_KEY": "Number of Arrests"})

In [7]:
num_precincts = pd.DataFrame({
    "Number of Precincts": [23,22,12,16,4],
    "Number of Arrests": [38067, 34588, 32309, 29353, 6247]
})

In [8]:
# Display filtered precinct DataFrame
num_precincts

Unnamed: 0,Number of Precincts,Number of Arrests
0,23,38067
1,22,34588
2,12,32309
3,16,29353
4,4,6247


In [9]:
x_values = num_precincts["Number of Precincts"]
y_values = num_precincts["Number of Arrests"]
points = ["Staten Island", "Queens", "The Bronx", "Manhattan", "Brooklyn"]

In [10]:
# Display filtered borough DataFrame
borough_data_sorted

Unnamed: 0,Number of Arrests
Brooklyn,38067
Manhattan,34588
The Bronx,32309
Queens,29353
Staten Island,6247


In [15]:
# Set pie chart parameters
explode = (0.1,0,0,0,0)
colors = ["lightblue", "yellow", "yellowgreen", "coral", "pink"]
borough_pie_chart = borough_data_sorted.plot(kind="pie", subplots=True, legend=False, 
                                            autopct="%1.1f%%", ylabel="", explode=explode,
                                            shadow=True, colors=colors)
# Label, save and display pie chart
plt.title("Percentage of Arrests")
plt.savefig("Images/NYPD_Borough_Pie")
plt.show()

<IPython.core.display.Javascript object>

In [14]:
# Display precinct/arrest scatter plot
plt.scatter(x_values, y_values, color="lightblue", edgecolors="black",)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Calculate trend line and r-value
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")

# Label chart
plt.ylabel("Number of Arrests")
plt.xlabel("Number of Precincts")
plt.title("No. of Precincts in a Borough Vs. Arrests")
plt.text(4.5,5147, "Staten Island")
plt.text(16.5,28353, "Queens")
plt.text(12.5,32309, "The Bronx")
plt.text(22.5,33588, "Manhattan")
plt.text(23.5,37067,"Brooklyn")

# Set limiters
plt.xlim(0,30)
plt.ylim(4000,45000)

# Save and display scatter plot graph
plt.savefig("Images/NYPD_Precinct_Scatter")
plt.show()

<IPython.core.display.Javascript object>

The r-squared is: 0.8079196191962024
