In [61]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import numpy as np

In [79]:
# Read toxic waste data
toxicDf = pd.read_csv("../localData/epa_tri_toxic_waste_2019.csv")
toxicDf = toxicDf.loc[toxicDf["6. CITY"] == "CHICAGO"]
toxicDf = gpd.GeoDataFrame(toxicDf, geometry=gpd.points_from_xy(toxicDf["13. LONGITUDE"], toxicDf["12. LATITUDE"])).set_crs("EPSG:4326")
#greab necessary columns, convert to cartesian geometry
toxicDf = toxicDf.to_crs("EPSG:2953")
#toxicDf = toxicDf.to_crs("EPSG:2953")

In [80]:
# Read census data
chicagoDf = pd.read_csv("../localdata/censusVars.csv")
chicagoDf['geometry'] = chicagoDf['geometry'].apply(wkt.loads)
chicagoDf = gpd.GeoDataFrame(chicagoDf, crs='epsg:4326')
#grab necessary columns, convert to cartesian geometry
chicagoDf = chicagoDf[["GEOID", "geometry"]].to_crs("EPSG:2953")

In [73]:
# Get the sum of the distances from all facilities to facilitate creation of "exposure" variables later
# This will capture the fact that some tracts are near more than one facility.
def get_sum_of_distances(polygon_data: pd.DataFrame, points_data: pd.DataFrame):
    points = points_data["geometry"].tolist()
    polygon_data["sum_of_distances"] = 0
    for point in points:
        distances_to_one_point = polygon_data["geometry"].distance(point)
        polygon_data["sum_of_distances"] += distances_to_one_point
    return polygon_data

In [88]:
def get_exposure_vars(polygon_data: pd.DataFrame, points_data: pd.DataFrame):
    points = points_data["geometry"].tolist()
    outputs = points_data["62. ON-SITE RELEASE TOTAL"].tolist()
    polygon_data["total_exposure"] = 0
    polygon_data["sum_of_distances"] = 0
    for i in range(len(points_data)):
        distances_to_one_point = polygon_data["geometry"].distance(points[i])
        exposure_to_one_point = distances_to_one_point.apply(lambda x: ((1/x**2) * outputs[i]) if x != 0 else outputs[i])
        polygon_data["sum_of_distances"] += distances_to_one_point
        polygon_data["total_exposure"] += exposure_to_one_point
    return polygon_data

In [89]:
#Use sum of distances to get "total_exposure_distance", the inverse of the sum of the distances each tract is from all facilities squared


In [90]:
chicagoDf = get_exposure_vars(chicagoDf, toxicDf)
chicagoDf

Unnamed: 0,GEOID,geometry,total_exposure,sum_of_distances
0,17031171100,"POLYGON ((741882.753 7224318.414, 741883.395 7...",0.003982,5.219851e+06
1,17031191301,"POLYGON ((741319.835 7221913.370, 741339.883 7...",0.005711,4.730344e+06
2,17031191302,"POLYGON ((742118.869 7221717.633, 742138.890 7...",60.376298,4.597057e+06
3,17031190701,"POLYGON ((743109.889 7222326.315, 743130.069 7...",0.005404,4.755254e+06
4,17031252102,"POLYGON ((741734.560 7216713.266, 741762.146 7...",0.025034,3.708454e+06
...,...,...,...,...
659,17031838800,"POLYGON ((749387.431 7191713.746, 749406.178 7...",345965.230751,4.892123e+06
660,17031460700,"POLYGON ((754725.743 7196532.988, 754734.058 7...",0.009427,5.067049e+06
661,17031460200,"POLYGON ((755168.063 7197272.318, 755178.665 7...",0.008968,5.002666e+06
662,17031461000,"POLYGON ((754512.124 7195727.919, 754525.259 7...",0.011263,5.154898e+06


In [91]:
output = chicagoDf[["GEOID", "sum_of_distances", "total_exposure"]].to_csv("../localData/exposureVars.csv")