# Imports

First, we'll import the necessary libraries.

In [60]:
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import spacy

Next, we'll query the City of Philadelphia data via the ArcGIS REST API using the `requests` library. We'll also use the `json` library to parse the response.

Finally, we'll use the `geopandas` library to create a geodataframe from the response.

We have three different datasets to import from the City's ArcGIS server. These are:

### 1. Vacant Land

In [61]:
# Define the URL for the Vacant_Indicators_Land feature service
land_url = 'https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/Vacant_Indicators_Land/FeatureServer/0/query'

# Define the parameters for the Vacant_Indicators_Land API request
land_params = {
    'where': '1=1',
    'outFields': '*',
    'returnGeometry': 'true',
    'f': 'json'
}

# Make the Vacant_Indicators_Land API request
land_response = requests.get(land_url, params=land_params)

# Check if the Vacant_Indicators_Land request was successful
if land_response.status_code == 200:
    # Convert the Vacant_Indicators_Land JSON data to a geopandas geodataframe; convert to CRS 3857
    land_data = land_response.json()

    # convert the JSON data to a pandas dataframe
    land_df = pd.DataFrame(land_data['features'])

    # separate the attributes column into one column per attribute
    land_df = pd.concat([land_df.drop(['attributes'], axis=1), land_df['attributes'].apply(pd.Series)], axis=1)

else:
    print('Vacant_Indicators_Land Request failed with status code:', land_response.status_code)


# make the `geometry` column a shapely geometry object
land_df['geometry'] = land_df['geometry'].apply(lambda x: Polygon(x['rings'][0]))


 # convert the pandas dataframe to a geopandas geodataframe
land_gdf = gpd.GeoDataFrame(land_df, geometry='geometry', crs='EPSG:3857')

land_gdf.to_crs(epsg=2272, inplace=True)

### 2. Vacant Buildings

In [62]:
# Define the URL for the Vacant_Indicators_Bldg feature service
bldg_url = 'https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/Vacant_Indicators_Bldg/FeatureServer/0/query'

# Define the parameters for the Vacant_Indicators_Bldg API request
bldg_params = {
    'where': '1=1',
    'outFields': '*',
    'returnGeometry': 'true',
    'f': 'json'
}

# Make the Vacant_Indicators_Bldg API request
bldg_response = requests.get(bldg_url, params=bldg_params)

# Check if the Vacant_Indicators_Bldg request was successful
if bldg_response.status_code == 200:
    # Convert the Vacant_Indicators_Bldg JSON data to a geopandas geodataframe; convert to CRS 3857
    bldg_data = bldg_response.json()

    # convert the JSON data to a pandas dataframe
    bldg_df = pd.DataFrame(bldg_data['features'])

    # separate the attributes column into one column per attribute
    bldg_df = pd.concat([bldg_df.drop(['attributes'], axis=1), bldg_df['attributes'].apply(pd.Series)], axis=1)

else:
    print('Vacant_Indicators_bldg Request failed with status code:', bldg_response.status_code)


# make the `geometry` column a shapely geometry object
bldg_df['geometry'] = bldg_df['geometry'].apply(lambda x: Polygon(x['rings'][0]))


 # convert the pandas dataframe to a geopandas geodataframe
bldg_gdf = gpd.GeoDataFrame(bldg_df, geometry='geometry', crs='EPSG:3857')

bldg_gdf.to_crs(epsg=2272, inplace=True)

### Now, combine the two datasets in preparation for string cleaning.

In [63]:
columns = ['geometry', 'ADDRESS', 'BLDG_DESC',
       'OPA_ID','COUNCILDISTRICT',
       'ZIPCODE','OWNER1', 'OWNER2']

# reduce gdfs to only the columns we want
land_gdf = land_gdf[columns]

bldg_gdf = bldg_gdf[columns]

land_gdf['type'] = 'lot'

bldg_gdf['type'] = 'building'

In [64]:
# bind the two geodataframes together using pandas.concat
full_gdf = pd.concat([land_gdf, bldg_gdf], axis=0)

### Now we'll apply string cleaning to the OWNER1 and OWNER2 columns to identify public vs. private ownership.

In [65]:
public_owners = ['PHILADELPHIA LAND BANK',
                'PHILADELPHIA HOUSING AUTH',
                'CITY OF PHILA',
                'REDEVELOPMENT AUTHORITY OF PHILADELPHIA',
                'CITY OF PHILADELPHIA',
                'DEPT OF PUBLC PROP; CITY OF PHILA',
                'DEPT OF PUBLIC PROP; CITY OF PHILA',
                'DEPT PUB PROP; CITY OF PHILA',
                'REDEVELOPMENT AUTHORITY OF PHILA',
                'PHILA REDEVELOPMENT AUTH',
                'PHILADELPHIA LAND INVESTM',
                'REDEVELOPMENT AUTHORITY O',
                'PHILADELPHIA REDEVELOPMEN',
                'PHILA HOUSING AUTHORITY',
                'KENSINGTON HOUSING AUTHOR',
                'DEVELOPMENT CORPORATION; PHILADELPHIA HOUSING',
                'PHILA REDEVELOPMENT AUTHO',
                'DEPT OF PUB PROP; CITY OF PHILA',
                'PHILA HOUSING DEV CORP',
                'DEP OF PUB PROP; CITY OF PHILA',
                'REDEVELOPMENT AUTHORITY',
                'COMMONWEALTH OF PA',
                'COMMONWEALTH OF PENNA',
                'DEPT OF PUBLIC PROP; CITY OF PHILADELPHIA',
                'DEPT PUBLIC PROP R E DIV; CITY OF PHILA',
                'PUBLIC PROP DIV; CITY OF PHILA',
                'PUBLIC PROP REAL ESTATE; CITY OF PHILA',
                'REAL ESTATE DIV; CITY OF PHILA',
                'REAL ESTATE DIVISION; CITY OF PHILA',
                'URBAN DEVELOPMENT; SECRETARY OF HOUSING'
                'URBAN DEVELOPMENT; SECRETARY OF HOUSING AND',
                'PHILADELPHIA REDEVELOPMENT AUTHORITY',
                'PHILADELPHIA REDEVELOPMENT AUTH',
                'PHILADELPHIA HOUSING AUTHORITY',
                'PHILADELPHIA LAND BANK',
                'REDEVELOPMENT AUTHORITY OF PHILADELPHIA',
                'PHILADELPHIA HOUSING AUTHORITY',
                'CITY OF PHILADELPHIA',
                'CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROPERTY',
                'REDEVELOPMENT AUTHORITY OF PHILADELPHIA',
                'PHILADELPHIA LAND INVESTMENT',
                'PHILADELPHIA REDEVELOPMENT AUTHORITY',
                'PHILADELPHIA HOUSING AUTHORITY',
                'KENSINGTON HOUSING AUTHORITY',
                'PHILADELPHIA HOUSING DEVELOPMENT CORPORATION',
                'REDEVELOPMENT AUTHORITY OF PHILADELPHIA',
                'CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROPERTY',
                'PHILADELPHIA HOUSING DEVELOPMENT CORPORATION',
                'CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROPERTY',
                'REDEVELOPMENT AUTHORITY OF PHILADELPHIA',
                'COMMONWEALTH OF PENNSYLVANIA',
                'CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROPERTY',
                'SECRETARY OF HOUSING AND URBAN DEVELOPMENT',
                'REDEVELOPMENT AUTHORITY OF PHILADELPHIA',
                'PHILADELPHIA LAND INVESTMENT',
                'CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROPERTY',
                'PENNDOT'
]

# return unique public_owners
public_owners = list(set(public_owners))

In [66]:
# create a new column that combines the OWNER1 and OWNER2 columns according to the following rules:
# if OWNER1 is not null and OWNER2 is null, then the new column is OWNER1
# if OWNER1 is null and OWNER2 is not null, then the new column is OWNER2
# if OWNER2 starts with a preposition, then the new column is OWNER1 + OWNER2 separated by a space
# if OWNER2 does not start with a preposition, then the new column is OWNER2 + OWNER1 separated by a se


# define a function to check if a string starts with a preposition
def starts_with_preposition(string):
    prepositions = ['a', 'an', 'and', 'as', 'at', 'but', 'by', 'for', 'from', 'in', 'into', 'nor', 'of', 'on', 'or', 'so', 'the', 'to', 'up', 'yet']
    if string.split(' ')[0].lower() in prepositions:
        return True
    else:
        return False
    
# define a function to combine the OWNER1 and OWNER2 columns
def combine_owners(row):
    if pd.isnull(row['OWNER1']) and pd.isnull(row['OWNER2']):
        return None
    elif pd.isnull(row['OWNER1']) and not pd.isnull(row['OWNER2']):
        return row['OWNER2']
    elif not pd.isnull(row['OWNER1']) and pd.isnull(row['OWNER2']):
        return row['OWNER1']
    elif starts_with_preposition(row['OWNER2']):
        return row['OWNER1'] + ' ' + row['OWNER2']
    else:
        return row['OWNER2'] + '; ' + row['OWNER1']
    
# apply the combine_owners function to the full_gdf dataframe
full_gdf['OWNER'] = full_gdf.apply(combine_owners, axis=1)

# if OWNER is 'PHILADELPHIA HOUSING AUTH' or 'PHILA HOUSING AUTHORITY', replace with 'PHILADELPHIA HOUSING AUTHORITY'
full_gdf.loc[full_gdf['OWNER'] == 'PHILADELPHIA HOUSING AUTH', 'OWNER'] = 'PHILADELPHIA HOUSING AUTHORITY'
full_gdf.loc[full_gdf['OWNER'] == 'PHILA HOUSING AUTHORITY', 'OWNER'] = 'PHILADELPHIA HOUSING AUTHORITY'

# redevelopment authority typos
redev_owner_variations = ['REDEVELOPMENT AUTHORITY OF PHILA', 'PHILA REDEVELOPMENT AUTH',
'REDEVELOPMENT AUTHORITY O', 'PHILADELPHIA REDEVELOPMEN',
'PHILA REDEVELOPMENT AUTHO', 'REDEVELOPMENT AUTHORITY',
'REDEVELOPMENT AUTH']

for var in redev_owner_variations:
    full_gdf.loc[full_gdf['OWNER'] == var, 'OWNER'] = 'REDEVELOPMENT AUTHORITY OF PHILADELPHIA'

# department of public property typos
dpp_owner_variations = ['DEPT OF PUBLIC PROPERTY', 'DEPT OF PUBLIC PROPERT', 'DEPT OF PUBLC PROP; CITY OF PHILA',
                        'DEPT OF PUBLIC PROP; CITY OF PHILA', 'DEPT OF PUBLIC PROPERTY; CITY OF PHILA', 'DEPT PUB PROP; CITY OF PHILA',
                        'DEPT OF PUB PROP; CITY OF PHILA', 'DEP OF PUB PROP; CITY OF PHILA', 'DEPT OF PUBLIC PROP; CITY OF PHILADELPHIA',
                        'DEPT PUBLIC PROP R E DIV; CITY OF PHILA', 'PUBLIC PROP DIV; CITY OF PHILA', 'PUBLIC PROP REAL ESTATE; CITY OF PHILA',
                        'REAL ESTATE DIV; CITY OF PHILA', 'REAL ESTATE DIVISION; CITY OF PHILA']

for var in dpp_owner_variations:
    full_gdf.loc[full_gdf['OWNER'] == var, 'OWNER'] = 'CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROPERTY'

# HUD
full_gdf.loc[full_gdf['OWNER'] == 'URBAN DEVELOPMENT; SECRETARY OF HOUSING', 'OWNER'] = 'SECRETARY OF HOUSING AND URBAN DEVELOPMENT'
full_gdf.loc[full_gdf['OWNER'] == 'URBAN DEVELOPMENT; SECRETARY OF HOUSING AND', 'OWNER'] = 'SECRETARY OF HOUSING AND URBAN DEVELOPMENT'

# commonwealth of pennsylvania
full_gdf.loc[full_gdf['OWNER'] == 'COMMONWEALTH OF PA', 'OWNER'] = 'COMMONWEALTH OF PENNSYLVANIA'
full_gdf.loc[full_gdf['OWNER'] == 'COMMONWEALTH OF PENNA', 'OWNER'] = 'COMMONWEALTH OF PENNSYLVANIA'

# phdc
full_gdf.loc[full_gdf['OWNER'] == 'DEVELOPMENT CORPORATION; PHILADELPHIA HOUSING', 'OWNER'] = 'PHILADELPHIA HOUSING DEVELOPMENT CORPORATION'
full_gdf.loc[full_gdf['OWNER'] == 'PHILA HOUSING DEV CORP', 'OWNER'] = 'PHILADELPHIA HOUSING DEVELOPMENT CORPORATION'

# PennDOT
full_gdf.loc[full_gdf['OWNER'] == 'DEPARTMENT OF TRANSPORTAT; COMMONWEALTH OF PENNSYLVA', 'OWNER'] = 'PENNDOT'

# city of Philadelphia
full_gdf.loc[full_gdf['OWNER'] == 'CITY OF PHILADELPHIA', 'OWNER'] = 'CITY OF PHILA'

# create a new column called 'public_owner' that is True if the OWNER column is in the public_owners list
full_gdf['public_owner'] = full_gdf['OWNER'].isin(public_owners)

# drop the OWNER1 and OWNER2 columns
full_gdf = full_gdf.drop(['OWNER1', 'OWNER2'], axis=1)

In [67]:
# randomly sample ten rows from the full_gdf dataframe
full_gdf.sample(10)

Unnamed: 0,geometry,ADDRESS,BLDG_DESC,OPA_ID,COUNCILDISTRICT,ZIPCODE,type,OWNER,public_owner
1405,"POLYGON ((2696199.775 250636.972, 2696191.886 ...",1246 W HUNTINGDON ST,VAC LAND RES < ACRE,371420601,5,19133,lot,CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROP...,True
589,"POLYGON ((2705088.155 256253.908, 2705045.704 ...",400-26 E ERIE AVE,VAC LAND IND < ACRE,885110980,7,19134,lot,PA SOCIETY 4 PREVENTION OF CRUELTY TO ANIMALS,False
293,"POLYGON ((2697726.549 251851.531, 2697725.669 ...",2825 GERMANTOWN AVE,VAC LAND COMM. < ACRE,885119004,5,19133,lot,2821 GERMANTOWN AVE LLC,False
1580,"POLYGON ((2702261.763 247683.677, 2702183.920 ...",2010 E DAUPHIN ST,ROW 3 STY MASONRY,313063300,7,19125,building,DAVID GAIL EILEEN,False
755,"POLYGON ((2693871.829 249003.864, 2693869.721 ...",2230 N 17TH ST,ROW CONV/APT 3STY MASONRY,161162901,5,19132,building,PHILADELPHIA HOUSING AUTHORITY,True
746,"POLYGON ((2686835.648 229936.811, 2686828.839 ...",2618 EARP ST,VAC LAND RES < ACRE,362106105,2,19146,lot,REDEVELOPMENT AUTHORITY OF PHILADELPHIA,True
1858,"POLYGON ((2686345.933 272776.392, 2686336.694 ...",10 SLOCUM ST,ROW 2 STY MASONRY,222042000,8,19119,building,GREEN MARIAM,False
868,"POLYGON ((2693986.280 271462.709, 2693950.985 ...",5508 W GODFREY AVE,ROW 2 STY MASONRY,122310400,8,19138,building,JOHNSON RONIQUE,False
1150,"POLYGON ((2694605.615 270968.333, 2694605.522 ...",6121 N NORWOOD ST,VAC LAND RES < ACRE,172472915,8,19138,lot,CITY OF PHILADELPHIA DEPARTMENT OF PUBLIC PROP...,True
1636,"POLYGON ((2699242.509 248689.927, 2699240.385 ...",2303 N REESE ST,VAC LAND RES < ACRE,191323301,7,19133,lot,PHILADELPHIA LAND BANK,True


### 3. PHS Community Landcare Parcels

Now we can import the PHS Community LandCare parcels and spatially join them to our full_gdf, which contains all of the vacant parcels in the city (both lots and buildings).

In [68]:
# Define the URL for the Vacant_Indicators_phs_landcare feature service
phs_landcare_url = 'https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/PHS_CommunityLandcare/FeatureServer/0/query'

# Define the parameters for the Vacant_Indicators_phs_landcare API request
phs_landcare_params = {
    'where': '1=1',
    'outFields': '*',
    'returnGeometry': 'true',
    'f': 'json'
}

# Make the Vacant_Indicators_phs_landcare API request
phs_landcare_response = requests.get(phs_landcare_url, params=phs_landcare_params)

# Check if the Vacant_Indicators_phs_landcare request was successful
if phs_landcare_response.status_code == 200:
    # Convert the Vacant_Indicators_phs_landcare JSON data to a geopandas geodataframe
    phs_landcare_data = phs_landcare_response.json()

    # convert the JSON data to a pandas dataframe
    phs_landcare_df = pd.DataFrame(phs_landcare_data['features'])

    # separate the attributes column into one column per attribute
    phs_landcare_df = pd.concat([phs_landcare_df.drop(['attributes'], axis=1), phs_landcare_df['attributes'].apply(pd.Series)], axis=1)

    # make the `geometry` column a shapely geometry object
    phs_landcare_df['geometry'] = phs_landcare_df['geometry'].apply(lambda x: Polygon(x['rings'][0]))

    # convert the pandas dataframe to a geopandas geodataframe
    phs_landcare_gdf = gpd.GeoDataFrame(phs_landcare_df, geometry='geometry', crs='EPSG:2272')

else:
    print('PHS_CommunityLandcare Request failed with status code:', phs_landcare_response.status_code)


phs_columns = ['geometry', 'ADDRESS', 'COMM_PARTN']

phs_landcare_gdf = phs_landcare_gdf[phs_columns]

In [72]:
# using folium, map phs_landcare_gdf and full_gdf together

# create a folium map object
m = folium.Map(location=[39.9526, -75.1652], zoom_start=11)

# define the style function for the GeoJSON object
def full_style_function(feature):
    return {
        'fillColor': 'red',
        'color': 'None',
        'weight': 2,
        'fillOpacity': 0.7
    }

# add the full_gdf geodataframe to the map with a fill color of 'red'
folium.GeoJson(full_gdf, style_function=full_style_function).add_to(m)


# define the style function for the GeoJSON object
def phs_style_function(feature):
    return {
        'fillColor': 'blue',
        'color': 'None',
        'weight': 2,
        'fillOpacity': 0.7
    }


# add the phs_landcare_gdf geodataframe to the map with a fill color of 'blue'
folium.GeoJson(phs_landcare_gdf, style_function=phs_style_function).add_to(m)


# display the map
m

In [77]:
# spatially join phs_landcare_gdf to full_gdf
joined_gdf = gpd.sjoin(full_gdf, phs_landcare_gdf, how='left', op='intersects')

# drop the index_right column and the ADDRESS_right column
joined_gdf = joined_gdf.drop(['index_right', 'ADDRESS_right'], axis=1)

joined_gdf['COMM_PARTN'] = joined_gdf['COMM_PARTN'].fillna('None')

  if await self.run_code(code, result, async_=asy):


In [81]:
joined_gdf.sample(10)

Unnamed: 0,geometry,ADDRESS_left,BLDG_DESC,OPA_ID,COUNCILDISTRICT,ZIPCODE,type,OWNER,public_owner,COMM_PARTN
1830,"POLYGON ((2668518.088 246084.092, 2668504.529 ...",6410 MORRIS PARK RD,ROW B/GAR 2STY MAS.+OTHER,344096400,4,19151,building,HOYLES FRANCIS X JR,False,
562,"POLYGON ((2711843.246 255580.422, 2711768.437 ...",1901-05 BUCKIUS ST,STR/OFF 2 STY MASONRY,882937070,1,19124,building,GAG 2006 LLC,False,
162,"POLYGON ((2712706.280 255449.374, 2712705.118 ...",3931 ARCADIA ST,ROW 2 STY MASONRY,453166000,1,19124,building,SIBRI INVESTMENTS LLC,False,
1314,"POLYGON ((2699355.870 262795.646, 2699286.482 ...",4715-65 N 10TH ST,VAC LAND COMM. ACRE+,785590020,8,19141,lot,REDEVELOPMENT AUTHORITY OF PHILADELPHIA,True,
1048,"POLYGON ((2693272.465 267544.235, 2693268.703 ...",536 E PENN ST,ROW 3 STY MASONRY,121142100,8,19144,building,WEEMS RAYMOND THOMAS,False,
1421,"POLYGON ((2673671.738 245386.060, 2673654.651 ...",5528 W OXFORD ST,VAC LAND RES < ACRE,41276550,4,19131,lot,CITY OF PHILA,True,
683,"POLYGON ((2692770.877 271293.534, 2692756.479 ...",949 E STAFFORD ST,ROW B/GAR 2STY MASONRY,591049500,8,19138,building,ALLEN VIOLA E,False,
240,"POLYGON ((2678513.545 230684.101, 2678479.865 ...",1516 S WILTON ST,ROW 2 STY MASONRY,512056700,3,19143,building,CITY OF PHILA,True,
591,"POLYGON ((2695340.426 264032.980, 2695331.369 ...",1819 W ROCKLAND ST,ROW B/GAR 2STY MASONRY,171025300,8,19141,building,FERGUSON SHALINA S,False,
1684,"POLYGON ((2675434.570 245472.527, 2675428.659 ...",1664 N WILTON ST,ROW 2 STY MASONRY,521311400,4,19131,building,DELIVERANCE CHURCH; COMMUNITY BIBLE,False,


Now we need to import two more datasets from the City's Carto database (SQL).

### 1. L&I Violations

In [4]:
import requests
import datetime

# Calculate one year ago from today's date
one_year_ago = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime("%Y-%m-%d")

# Create the SQL query
li_sql_query = "SELECT service_request_id, subject, status, service_name, service_code, lat, lon FROM public_cases_fc WHERE requested_datetime >= '{}'".format(one_year_ago)

# Make the GET request
li_response = requests.get("https://phl.carto.com/api/v2/sql", params={"q": li_sql_query})

# Get the data
li_data = li_response.json()["rows"]

# convert li_data to a pandas dataframe
li_df = pd.DataFrame(li_data)

# Convert the data to a geopandas dataframe
li_gdf = gpd.GeoDataFrame(li_df, geometry=gpd.points_from_xy(li_df.lon, li_df.lat), crs='EPSG:2272')

# drop the lat and lon columns
li_gdf.drop(['lat', 'lon'], axis=1, inplace=True)


In [118]:


# Define the URL for the Philadelphia Neighborhoods feature service
hoods_url = 'http://services.arcgis.com/rkitYk91zieQFZov/arcgis/rest/services/Philadelphia_Neighborhoods/FeatureServer/0/query'

# Define the parameters for the Vacant_Indicators_hoods API request
hoods_params = {
    'outFields': '*',
    'returnGeometry': 'false',
    'f': 'json'
}

# Make the Phila Neighborhoods API request
hoods_response = requests.get(hoods_url, params=hoods_params)

# Check if the Phila hoods request was successful
if hoods_response.status_code == 200:
    # Convert the Vacant_Indicators_hoods JSON data to a geopandas geodataframe; convert to CRS 2272
    hoods_data = hoods_response.json()

else:
    print('Philadelphia Neighborhoods Request failed with status code:', hoods_response.status_code)

In [116]:
# print the first 5 rows of the hoods_data
hoods_data

{'error': {'code': 504,
  'message': 'Your request has timed out.',
  'details': []}}

Remaining to pull in:
2. Neighborhoods
3. RCOs

# Gun Crimes

For our gun crime kernel density estimate, we have two steps:

### 1. Import gun crime data from the City's Carto database (SQL):

In [8]:
# Modify the SQL query
guncrimes_sql_query = "SELECT text_general_code, dispatch_date, point_x, point_y FROM incidents_part1_part2 WHERE dispatch_date_time >= '{}' AND text_general_code".format(one_year_ago)

# Make the GET request
guncrimes_response = requests.get("https://phl.carto.com/api/v2/sql", params={"q": guncrimes_sql_query})

# Get the data
guncrimes_data = guncrimes_response.json()["rows"]

# convert guncrimes_data to a pandas dataframe
guncrimes_df = pd.DataFrame(guncrimes_data)



In [9]:
guncrimes_df.head()

Unnamed: 0,text_general_code,dispatch_date,point_x,point_y
0,Thefts,2022-08-22,-75.247645,39.886841
1,Thefts,2022-09-10,-75.046249,40.03362
2,Thefts,2022-09-09,-75.046249,40.03362
3,Thefts,2022-09-09,-75.046249,40.03362
4,Thefts,2022-09-07,-75.046249,40.03362


In [None]:

# Convert the data to a geopandas dataframe
guncrimes_gdf = gpd.GeoDataFrame(guncrimes_df, geometry=gpd.points_from_xy(guncrimes_df.point_x, guncrimes_df.point_y), crs='EPSG:2272')

# drop the lat and lon columns
guncrimes_gdf.drop(['point_x', 'point_y'], axis=1, inplace=True)

### 2. Create a kernel density estimate from the gun crime data:

In [23]:
import sklearn
from sklearn.neighbors import KernelDensity
import numpy as np
import matplotlib.pyplot as plt

# Get X and Y coordinates of well points
x_sk = guncrime_gdf["geometry"].x
y_sk = guncrime_gdf["geometry"].y

# Get minimum and maximum coordinate values of well points
min_x_sk, min_y_sk, max_x_sk, max_y_sk = guncrime_gdf.total_bounds

# Create a cell mesh grid
# Horizontal and vertical cell counts should be the same
XX_sk, YY_sk = np.mgrid[min_x_sk:max_x_sk:100j, min_y_sk:max_y_sk:100j]

# Create 2-D array of the coordinates (paired) of each cell in the mesh grid
positions_sk = np.vstack([XX_sk.ravel(), YY_sk.ravel()]).T

# Create 2-D array of the coordinate values of the well points
Xtrain_sk = np.vstack([x_sk, y_sk]).T

# Get kernel density estimator (can change parameters as desired)
kde_sk = KernelDensity(bandwidth = 5280, metric = 'euclidean', kernel = 'gaussian', algorithm = 'auto')

# Fit kernel density estimator to wells coordinates
kde_sk.fit(Xtrain_sk)

# Evaluate the estimator on coordinate pairs
Z_sk = np.exp(kde_sk.score_samples(positions_sk))

# Reshape the data to fit mesh grid
Z_sk = Z_sk.reshape(XX_sk.shape)

# Plot data
#fig, ax = plt.subplots(1, 1, figsize = (10, 10))
#ax.imshow(np.rot90(Z_sk), cmap = "RdPu", extent = [min_x_sk, max_x_sk, min_y_sk, max_y_sk])
#ax.plot(x_sk, y_sk, 'k.', markersize = 2, alpha = 0.1)
#plt.show()

import rasterio

def export_kde_raster(Z, XX, YY, min_x, max_x, min_y, max_y, proj, filename):
    '''Export and save a kernel density raster.'''

    # Flip array vertically and rotate 270 degrees
    Z_export = np.rot90(np.flip(Z, 0), 3)

    # Get resolution
    xres = (max_x - min_x) / len(XX)
    yres = (max_y - min_y) / len(YY)

    # Set transform
    transform = rasterio.Affine.translation(min_x - xres / 2, min_y - yres / 2) * rasterio.Affine.scale(xres, yres)

    # Export array as raster
    with rasterio.open(
            filename,
            mode = "w",
            driver = "GTiff",
            height = Z_export.shape[0],
            width = Z_export.shape[1],
            count = 1,
            dtype = Z_export.dtype,
            crs = proj,
            transform = transform,
    ) as new_dataset:
            new_dataset.write(Z_export, 1)

# Export raster
export_kde_raster(Z = Z_sk, XX = XX_sk, YY = YY_sk,
                  min_x = min_x_sk, max_x = max_x_sk, min_y = min_y_sk, max_y = max_y_sk,
                  proj = 2272, filename = "C:/Users/Nissim/Desktop/Vacant Lots Project/guncrime_kde_rast.tif")

from matplotlib import pyplot

kde_rast = rasterio.open("C:/Users/Nissim/Desktop/Vacant Lots Project/guncrime_kde_rast.tif")

from rasterio.plot import show


Reclassify data into percentiles.

In [None]:
import rasterstats

vac_lots_gdf3['rast_val'] = rasterstats.point_query(vac_lots_gdf3, "C:/Users/Nissim/Desktop/Vacant Lots Project/guncrime_kde_rast.tif")

import mapclassify

# Define the number of classes
n_classes = 10

# Create a quantiles classifier
classifier = mapclassify.Quantiles.make(k = n_classes)

# Classify the data
vac_lots_gdf3['rast_val'] = vac_lots_gdf3[['rast_val']].apply(classifier)

# scale from 1-5 instead of 0-4
vac_lots_gdf3['rast_val'] = vac_lots_gdf3['rast_val'].replace([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['90th Percentile', '80th Percentile', '70th Percentile', '60th Percentile', '50th Percentile', '40th Percentile', '30th Percentile', '20th Percentile', '10th Percentile', '0th Percentile'])