In [11]:
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd

In [12]:
#reading in the csv file
staten_island_df = pd.read_csv("Resources/NYCCrimeData_Parks_Staten_Island_TableToExcel.csv")
staten_island_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 390 entries, 0 to 389
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   OBJECTID      390 non-null    int64  
 1   Complaint ID  366 non-null    object 
 2   Offense       366 non-null    object 
 3   Latitude      390 non-null    float64
 4   Longitude     390 non-null    float64
 5   Borough       24 non-null     object 
 6   Public Area   24 non-null     object 
dtypes: float64(2), int64(1), object(4)
memory usage: 21.5+ KB


In [13]:
#dividing the df into crimes and public institutions
#the letter r here is used as the official abbreviation for staten island

r_crimes = staten_island_df.iloc[0:365, :]
r_crimes = r_crimes.drop(columns= ["Borough", "Public Area"])

In [14]:
r_crimes.head()

Unnamed: 0,OBJECTID,Complaint ID,Offense,Latitude,Longitude
0,1,261214832,ROBBERY,40.643566,-74.074001
1,2,261233859,ROBBERY,40.635961,-74.161365
2,3,261233841,ROBBERY,40.643569,-74.098338
3,4,261261343,ROBBERY,40.630408,-74.142977
4,5,261347872,ROBBERY,40.630539,-74.076641


In [15]:
#same method for public institutions and reordering columns
r_public = staten_island_df.iloc[366:389, :]
r_public = r_public.drop(columns= ["Complaint ID", "Offense"])

#reordering columns
r_public = r_public[["OBJECTID", "Public Area", "Latitude", "Longitude"]]

In [16]:
r_public.head()

Unnamed: 0,OBJECTID,Public Area,Latitude,Longitude
366,367,PARKS,40.614533,-74.074185
367,368,RECREATION AND WATERFRONT SITES,40.510392,-74.230405
368,369,PUBLIC LIBRARIES,40.595802,-74.063027
369,370,PUBLIC LIBRARIES,40.590395,-74.101138
370,371,PARKS,40.638748,-74.083899


In [17]:
#each location a crime took place

r_crimes_gdf = gpd.GeoDataFrame(
    r_crimes,
    geometry=gpd.points_from_xy(
        r_crimes["Longitude"],
        r_crimes["Latitude"],
    ),
    crs={"init":"EPSG:4326"},
)

# each public institution

r_public_gdf = gpd.GeoDataFrame(
    r_public, 
    geometry=gpd.points_from_xy(
        r_public["Longitude"],
        r_public["Latitude"],
    ),
    crs={"init":"EPSG:4326"},
)

# EPSG:3857 converts it to meters

r_crimes_gdf = r_crimes_gdf.to_crs({"init": "EPSG:3857"})
r_public_gdf = r_public_gdf.to_crs({"init": "EPSG:3857"})

# 1 miles would be 1609 meters

x = r_public_gdf.buffer(1609).unary_union

crimes_per = r_crimes_gdf["geometry"].intersection(x)

# print all the nearby points
all_crimes_near_pi = (r_crimes_gdf[~crimes_per.is_empty])
all_crimes_near_pi

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


Unnamed: 0,OBJECTID,Complaint ID,Offense,Latitude,Longitude,geometry
0,1,261214832,ROBBERY,40.643566,-74.074001,POINT (-8245880.072 4959909.037)
1,2,261233859,ROBBERY,40.635961,-74.161365,POINT (-8255605.388 4958793.377)
4,5,261347872,ROBBERY,40.630539,-74.076641,POINT (-8246173.956 4957998.042)
5,6,261434927,ROBBERY,40.637135,-74.123473,POINT (-8251387.282 4958965.599)
6,7,261431397,ROBBERY,40.643996,-74.076502,POINT (-8246158.482 4959972.122)
...,...,...,...,...,...,...
359,360,279208483,ROBBERY,40.644719,-74.081327,POINT (-8246695.599 4960078.194)
360,361,279411279,ROBBERY,40.635738,-74.132124,POINT (-8252350.295 4958760.665)
362,363,279447847,ROBBERY,40.632740,-74.152082,POINT (-8254572.010 4958320.892)
363,364,279472819,ROBBERY,40.614616,-74.084716,POINT (-8247072.861 4955662.725)


In [18]:
#create empty list to hold the number of crimes commited within a 1 mile radius of each public institution
crimes_per_location = []

# Define the radius in meters
radius = 1609 # about 1 mile

for offense in r_crimes_gdf["Offense"]:
    crimes_per_location = []
    r_crimes_gdf_each = r_crimes_gdf[r_crimes_gdf["Offense"] == offense]

    for i in range(len(r_public_gdf)):

        # Calculate the distances between all pairs of points
        distances = gpd.GeoSeries(r_crimes_gdf_each.geometry).distance(r_public_gdf.geometry.iloc[i])

        # Filter the pairs where the distance is within the radius
        within_radius = distances <= radius

        # Count the number of points within the radius for each point in gdf2
        count_within_radius = within_radius.sum()

        crimes_per_location.append(count_within_radius)
    r_public_gdf[offense] = crimes_per_location

In [19]:
r_public_gdf["Total Crimes"] = r_public_gdf[['ROBBERY', 'MURDER & NON-NEGL. MANSLAUGHTER', 'RAPE']].sum(axis=1)
r_public_gdf

Unnamed: 0,OBJECTID,Public Area,Latitude,Longitude,geometry,ROBBERY,MURDER & NON-NEGL. MANSLAUGHTER,RAPE,Total Crimes
366,367,PARKS,40.614533,-74.074185,POINT (-8245900.534 4955650.513),36,4,0,40
367,368,RECREATION AND WATERFRONT SITES,40.510392,-74.230405,POINT (-8263290.887 4940390.577),0,0,0,0
368,369,PUBLIC LIBRARIES,40.595802,-74.063027,POINT (-8244658.412 4952904.109),8,0,0,8
369,370,PUBLIC LIBRARIES,40.590395,-74.101138,POINT (-8248900.989 4952111.454),3,0,0,3
370,371,PARKS,40.638748,-74.083899,POINT (-8246981.890 4959202.210),76,1,15,92
371,372,PARKS,40.636564,-74.160471,POINT (-8255505.889 4958881.789),21,3,0,24
372,373,PRESERVES AND CONSERVATION AREAS,40.576267,-74.203381,POINT (-8260282.637 4950040.586),0,0,0,0
373,374,PARKS,40.60869,-74.119578,POINT (-8250953.689 4954793.762),16,1,0,17
374,375,CEMETERIES,40.639297,-74.120934,POINT (-8251104.670 4959282.811),18,2,0,20
375,376,GARDENS,40.526901,-74.163615,POINT (-8255855.877 4942808.144),0,0,0,0
