In [25]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

In [26]:
toxicDf = pd.read_csv("../localData/epa_tri_toxic_waste_2019.csv")
toxicDf = toxicDf.loc[toxicDf["6. CITY"] == "CHICAGO"]
toxicDf = gpd.GeoDataFrame(toxicDf, geometry=gpd.points_from_xy(toxicDf["13. LONGITUDE"], toxicDf["12. LATITUDE"])).set_crs("EPSG:4326")

In [27]:
chicagoDf = pd.read_csv("../localdata/censusVars.csv")
chicagoDf['geometry'] = chicagoDf['geometry'].apply(wkt.loads)
chicagoDf = gpd.GeoDataFrame(chicagoDf, crs='epsg:4326')

In [28]:
#cast to projected CRS for distance nearest operations
chicagoNearest = chicagoDf.to_crs("EPSG:2953")
toxicDfNearest = toxicDf.to_crs("EPSG:2953")
#Distance units will be in meters
chicagoNearest = chicagoNearest.sjoin_nearest(toxicDfNearest, how = "left", distance_col = "distance_nearest_facility")
#sort by emissions so we can look at emissions from nearest plant
chicagoNearest = chicagoNearest.sort_values(by = ["62. ON-SITE RELEASE TOTAL"], ascending = False)
#drop duplicates for every tract which is equidistant from more than one facility (default creates more rows)
chicagoNearest = chicagoNearest.drop_duplicates(subset = "GEOID", keep = "first")
chicagoNearest = chicagoNearest[["GEOID", "distance_nearest_facility"]]

In [29]:
polygons = chicagoDf
points = toxicDf

In [30]:
# Spatial Joins
facilities_in_tract = gpd.sjoin(toxicDf, chicagoDf, how="left", op='intersects')

# Add a field with 1 as a constant value
facilities_in_tract['number_of_facilities_in_tract']=1

# Group according to the column by which you want to aggregate data
facilities_in_tract = facilities_in_tract.groupby('GEOID').agg({'number_of_facilities_in_tract':'sum'}).reset_index()



  if await self.run_code(code, result, async_=asy):


In [31]:
chicagoFacilities = chicagoDf.merge(facilities_in_tract, on="GEOID", how="left").fillna(value=0)
chicagoFacilities

Unnamed: 0.1,Unnamed: 0,GEOID,geometry,B01003_001E,B02001_002E,B02001_003E,B02008_001E,B02009_001E,B19013_001E,B25105_001E,...,black_some_population,black_alone_population,median_household_income_past_12_months,median_housing_cost_past_12_months,total_population,proportion_some_black,proportion_alone_black,proportion_some_white,proportion_alone_white,number_of_facilities_in_tract
0,0,17031171100,"POLYGON ((-87.78635 41.94548, -87.78635 41.945...",4013.0,3110.0,34.0,3117.0,34.0,69330.0,1447.0,...,34.0,34.0,69330.0,1447.0,4013.0,0.008472,0.008472,0.776726,0.774981,0.0
1,1,17031191301,"POLYGON ((-87.78544 41.92368, -87.78520 41.923...",5525.0,2640.0,224.0,2989.0,476.0,50046.0,1283.0,...,476.0,224.0,50046.0,1283.0,5525.0,0.086154,0.040543,0.540995,0.477828,0.0
2,2,17031191302,"POLYGON ((-87.77572 41.92383, -87.77547 41.923...",5046.0,2823.0,176.0,2848.0,176.0,45962.0,1186.0,...,176.0,176.0,45962.0,1186.0,5046.0,0.034879,0.034879,0.564407,0.559453,2.0
3,3,17031190701,"POLYGON ((-87.76625 41.93128, -87.76600 41.931...",2735.0,1700.0,29.0,1772.0,77.0,42270.0,1117.0,...,77.0,29.0,42270.0,1117.0,2735.0,0.028154,0.010603,0.647898,0.621572,0.0
4,4,17031252102,"POLYGON ((-87.76486 41.88025, -87.76453 41.880...",6864.0,162.0,6264.0,162.0,6264.0,38500.0,1021.0,...,6264.0,6264.0,38500.0,1021.0,6864.0,0.912587,0.912587,0.023601,0.023601,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659,659,17031838800,"POLYGON ((-87.60195 41.68414, -87.60194 41.684...",3300.0,1389.0,1759.0,1398.0,1768.0,27160.0,597.0,...,1768.0,1759.0,27160.0,597.0,3300.0,0.535758,0.533030,0.423636,0.420909,36.0
660,660,17031460700,"POLYGON ((-87.55559 41.73733, -87.55548 41.737...",2651.0,818.0,1508.0,820.0,1604.0,24517.0,671.0,...,1604.0,1508.0,24517.0,671.0,2651.0,0.605055,0.568842,0.309317,0.308563,0.0
661,661,17031460200,"POLYGON ((-87.55276 41.74464, -87.55263 41.744...",2468.0,1255.0,979.0,1312.0,1000.0,28363.0,761.0,...,1000.0,979.0,28363.0,761.0,2468.0,0.405186,0.396677,0.531605,0.508509,0.0
662,662,17031461000,"POLYGON ((-87.55560 41.72997, -87.55545 41.730...",1288.0,304.0,803.0,304.0,816.0,15956.0,475.0,...,816.0,803.0,15956.0,475.0,1288.0,0.633540,0.623447,0.236025,0.236025,0.0


In [32]:
chicagoDf = chicagoFacilities.merge(chicagoNearest, on="GEOID", how="left")
chicagoDf

Unnamed: 0.1,Unnamed: 0,GEOID,geometry,B01003_001E,B02001_002E,B02001_003E,B02008_001E,B02009_001E,B19013_001E,B25105_001E,...,black_alone_population,median_household_income_past_12_months,median_housing_cost_past_12_months,total_population,proportion_some_black,proportion_alone_black,proportion_some_white,proportion_alone_white,number_of_facilities_in_tract,distance_nearest_facility
0,0,17031171100,"POLYGON ((-87.78635 41.94548, -87.78635 41.945...",4013.0,3110.0,34.0,3117.0,34.0,69330.0,1447.0,...,34.0,69330.0,1447.0,4013.0,0.008472,0.008472,0.776726,0.774981,0.0,2477.360557
1,1,17031191301,"POLYGON ((-87.78544 41.92368, -87.78520 41.923...",5525.0,2640.0,224.0,2989.0,476.0,50046.0,1283.0,...,224.0,50046.0,1283.0,5525.0,0.086154,0.040543,0.540995,0.477828,0.0,597.459824
2,2,17031191302,"POLYGON ((-87.77572 41.92383, -87.77547 41.923...",5046.0,2823.0,176.0,2848.0,176.0,45962.0,1186.0,...,176.0,45962.0,1186.0,5046.0,0.034879,0.034879,0.564407,0.559453,2.0,0.000000
3,3,17031190701,"POLYGON ((-87.76625 41.93128, -87.76600 41.931...",2735.0,1700.0,29.0,1772.0,77.0,42270.0,1117.0,...,29.0,42270.0,1117.0,2735.0,0.028154,0.010603,0.647898,0.621572,0.0,1210.359102
4,4,17031252102,"POLYGON ((-87.76486 41.88025, -87.76453 41.880...",6864.0,162.0,6264.0,162.0,6264.0,38500.0,1021.0,...,6264.0,38500.0,1021.0,6864.0,0.912587,0.912587,0.023601,0.023601,0.0,1173.119840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659,659,17031838800,"POLYGON ((-87.60195 41.68414, -87.60194 41.684...",3300.0,1389.0,1759.0,1398.0,1768.0,27160.0,597.0,...,1759.0,27160.0,597.0,3300.0,0.535758,0.533030,0.423636,0.420909,36.0,0.000000
660,660,17031460700,"POLYGON ((-87.55559 41.73733, -87.55548 41.737...",2651.0,818.0,1508.0,820.0,1604.0,24517.0,671.0,...,1508.0,24517.0,671.0,2651.0,0.605055,0.568842,0.309317,0.308563,0.0,3032.984216
661,661,17031460200,"POLYGON ((-87.55276 41.74464, -87.55263 41.744...",2468.0,1255.0,979.0,1312.0,1000.0,28363.0,761.0,...,979.0,28363.0,761.0,2468.0,0.405186,0.396677,0.531605,0.508509,0.0,3487.781681
662,662,17031461000,"POLYGON ((-87.55560 41.72997, -87.55545 41.730...",1288.0,304.0,803.0,304.0,816.0,15956.0,475.0,...,803.0,15956.0,475.0,1288.0,0.633540,0.623447,0.236025,0.236025,0.0,2410.546519


In [33]:
chicagoDf.to_csv("../localData/toxicAndDistanceAndCensusVars.csv")

In [34]:
distanceVarsOnly = chicagoDf[["GEOID", "number_of_facilities_in_tract", "distance_nearest_facility"]]

In [35]:
distanceVarsOnly.to_csv("../localData/distanceVarsOnly.csv")