# Census Data

In [406]:
import cenpy
import pandas as pd
import geopandas as gpd
acs = cenpy.remote.APIConnection("ACSDT5Y2015")

In [407]:
censusRace = acs.query(
    cols=["NAME","B02001_001E","B02001_002E"],
    geo_unit="block group:*",
    geo_filter={
                "state" : "42", 
                "county" : "101"
               },
).rename(columns={"B02001_001E": "totalPop","B02001_002E": "whitePop"}, errors="raise")

In [408]:
acs.set_mapservice("tigerWMS_ACS2015")
where_clause = "STATE = 42 AND COUNTY = 101"
censusTract = acs.mapservice.layers[10].query(where=where_clause)
censusTract = censusTract[["STATE","COUNTY","TRACT",'BLKGRP',"geometry"]]
censusTract = censusTract.to_crs(4326)

  return _prepare_from_string(" ".join(pjargs))


In [409]:
censusTract.shape

(1336, 5)

In [410]:
censusRace2 = censusRace
censusRace2['whitePop'] = pd.to_numeric(censusRace2['whitePop'], errors='ignore')
censusRace2['totalPop'] = pd.to_numeric(censusRace2['totalPop'], errors='ignore')
censusRace2['whitePercep'] = censusRace2['whitePop'] / censusRace2['totalPop'] 
censusRace2['nonwhitePercep'] = 1- censusRace2['whitePercep']
censusRace2.head()

Unnamed: 0,NAME,totalPop,whitePop,state,county,tract,block group,whitePercep,nonwhitePercep
0,"Block Group 1, Census Tract 81.02, Philadelphi...",858,0,42,101,8102,1,0.0,1.0
1,"Block Group 5, Census Tract 81.02, Philadelphi...",894,11,42,101,8102,5,0.012304,0.987696
2,"Block Group 3, Census Tract 81.02, Philadelphi...",543,0,42,101,8102,3,0.0,1.0
3,"Block Group 2, Census Tract 248, Philadelphia ...",1109,26,42,101,24800,2,0.023445,0.976555
4,"Block Group 1, Census Tract 2, Philadelphia Co...",2331,911,42,101,200,1,0.390819,0.609181


In [411]:
censusRace2.shape

(1336, 9)

In [412]:
censusRaceWithGeometry = censusTract.merge(
    censusRace2,
    left_on=["STATE", "COUNTY", "TRACT",'BLKGRP'],
    right_on=["state", "county", "tract",'block group'])
censusRaceWithGeometry = censusRaceWithGeometry.drop(columns=["STATE","COUNTY","TRACT","BLKGRP"])

In [413]:
censusRaceWithGeometry.head()

Unnamed: 0,geometry,NAME,totalPop,whitePop,state,county,tract,block group,whitePercep,nonwhitePercep
0,"POLYGON ((-75.20223 39.96164, -75.20142 39.961...","Block Group 3, Census Tract 91, Philadelphia C...",1529,223,42,101,9100,3,0.145847,0.854153
1,"POLYGON ((-75.24043 39.96354, -75.24011 39.965...","Block Group 3, Census Tract 95, Philadelphia C...",561,0,42,101,9500,3,0.0,1.0
2,"POLYGON ((-75.14683 40.02960, -75.14671 40.030...","Block Group 1, Census Tract 282, Philadelphia ...",1118,21,42,101,28200,1,0.018784,0.981216
3,"POLYGON ((-75.06981 40.07086, -75.06923 40.071...","Block Group 2, Census Tract 337.02, Philadelph...",1598,1010,42,101,33702,2,0.63204,0.36796
4,"POLYGON ((-75.16508 39.91337, -75.16486 39.914...","Block Group 2, Census Tract 372, Philadelphia ...",1454,1196,42,101,37200,2,0.822558,0.177442


In [414]:
censusRaceWithGeometry.to_file('censusRaceWithGeometry.geojson', driver="GeoJSON") 

# Crime

In [415]:
import carto2gpd
import matplotlib
import numpy as np
from sklearn.neighbors import NearestNeighbors
import altair as alt
from vega_datasets import data
from altair import datum

In [416]:
url = "https://phl.carto.com/api/v2/sql"
table_name = "incidents_part1_part2"
where = "dispatch_date >= '2015-01-01' AND dispatch_date < '2016-01-01'"
crime = carto2gpd.get(url, table_name, where=where)

In [417]:
crime = crime.drop(columns=["objectid","dc_dist","psa","dc_key","ucr_general","point_x","point_y"])
crime["count"]=1
crime = crime.rename(columns={"text_general_code": "crimeType"}, errors="raise")

In [418]:
total = crime.groupby("crimeType").sum("count").sort_values(by="count",ascending=False)
total = total.reset_index()

In [419]:
selectedCrimeType = total[0:20].crimeType
selectedCrimeType
selectCrime = crime[crime['crimeType'].isin(selectedCrimeType)]

In [420]:
def get_xy_from_geometry(df): 
    x = df.geometry.centroid.x
    y = df.geometry.centroid.y
    return np.column_stack((x, y))

In [421]:
pointsWithSeg = gpd.read_file("pointsWithSeg.geojson")

pointsWithSegXY = get_xy_from_geometry(pointsWithSeg.to_crs(epsg=3857))
crimeXY = get_xy_from_geometry(selectCrime.to_crs(epsg=3857))

nbrs = NearestNeighbors(n_neighbors=1)
nbrs.fit(pointsWithSegXY)

Dists, Indices = nbrs.kneighbors(crimeXY)

In [422]:
selectCrime["nearestSVPoint"] = Indices
selectCrime2 = selectCrime[['geometry','cartodb_id','crimeType','count','nearestSVPoint']].merge(pointsWithSeg.drop(columns=["geometry","fid","SEG_ID"]),how="left",left_on="nearestSVPoint",right_index=True)
selectCrime2 = selectCrime2.drop(columns="nearestSVPoint").rename(columns={"pointId": "nearestSVPoint"}, errors="raise")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [423]:
# selectCrime2 = round(selectCrime2,3)
selectCrime2["crimeTypeValue"] = selectCrime2.crimeType.rank(method="first",pct=True)
selectCrime2 = selectCrime2.rename(columns={"crimeTypeValue": "All"}, errors="raise")

selectCrime2.to_csv('selectCrime2.csv') 