## INMAP source-receptor matrix

In [78]:
import time
import numpy as np
import pandas as pd
import geopandas as gpd
import s3fs
import zarr
from shapely.geometry import Polygon

def rect(i, w, s, e, n):
    x = [w[i], e[i], e[i], w[i], w[i]]
    y = [s[i], s[i], n[i], n[i], s[i]]
    return x, y

def poly(sr, num_cells):
    ret = []
    w = sr["W"][:]
    s = sr["S"][:]
    e = sr["E"][:]
    n = sr["N"][:]
    for i in range(num_cells):
        x, y = rect(i, w, s, e, n)
        ret.append(Polygon([[x[0], y[0]], [x[1], y[1]], [x[2], y[2]],
                            [x[3], y[3]], [x[4], y[4]]]))
    return ret

# Define the run_sr function
def run_sr(emis, model, emis_units="tons/year"):
    start = time.time()
    sr = zarr.open("/Users/yunhalee/Documents/LOCAETA/RCM/INMAP/source_receptor_model/isrm_v1.2.1.zarr", mode="r")

    # Number of sources/receptors is fixed at 52411
    num_cells = 52411

    print("num_cells", num_cells)

    # Build the geometry
    p = poly(sr, num_cells)
    print("Making polygons as geometry.")

    # Create a GeoDataFrame for the grid
    df = pd.DataFrame({'Location': range(num_cells)})
    gdf = gpd.GeoDataFrame(df, geometry=p)
    gdf.crs = "+proj=lcc +lat_1=33.000000 +lat_2=45.000000 +lat_0=40.000000 +lon_0=-97.000000 +x_0=0 +y_0=0 +a=6370997.000000 +b=6370997.000000 +to_meter=1"
    emis = emis.to_crs(gdf.crs)
    print("emis after crs", emis.head())

    gdf = gdf.to_crs(gdf.crs)
    print(gdf.head())


    join_right_df = gdf.sjoin(emis, how="right")
    print("Finished joining the dataframes.")
    print(join_right_df.head())

    print("join_right_df lenght:", len(join_right_df))

    # Filter out rows with NaNs in the Location column
    join_right_df = join_right_df.dropna(subset=['Location'])
    join_right_df['Location'] = join_right_df['Location'].astype(int)

    index = join_right_df.Location.tolist()
    ppl = np.unique(join_right_df.Location.tolist())
    num = range(0, len(ppl))
    dictionary = dict(zip(ppl, num))

    print("index:", index)
    print("ppl:", ppl)
    print("num:", num)

    SOA = sr['SOA'].get_orthogonal_selection(([0], ppl, slice(None)))
    print("SOA data is allocated.")
    pNO3 = sr['pNO3'].get_orthogonal_selection(([0], ppl, slice(None)))
    print("pNO3 data is allocated.")
    pNH4 = sr['pNH4'].get_orthogonal_selection(([0], ppl, slice(None)))
    print("pNH4 data is allocated.")
    pSO4 = sr['pSO4'].get_orthogonal_selection(([0], ppl, slice(None)))
    print("pSO4 data is allocated.")
    PM25 = sr['PrimaryPM25'].get_orthogonal_selection(([0], ppl, slice(None)))
    print("PrimaryPM25 data is allocated.")

    SOA_data, pNO3_data, pNH4_data, pSO4_data, PM25_data = 0.0, 0.0, 0.0, 0.0, 0.0
    for i in range(len(index)):
        if index[i] in dictionary and i < num_emis_rows:
            SOA_data += SOA[0, dictionary[index[i]], :] * emis.VOC.iloc[i]
            pNO3_data += pNO3[0, dictionary[index[i]], :] * emis.NOx.iloc[i]
            pNH4_data += pNH4[0, dictionary[index[i]], :] * emis.NH3.iloc[i]
            pSO4_data += pSO4[0, dictionary[index[i]], :] * emis.SOx.iloc[i]
            PM25_data += PM25[0, dictionary[index[i]], :] * emis.PM2_5.iloc[i]
    data = SOA_data + pNO3_data + pNH4_data + pSO4_data + PM25_data

    print("Accessing the data.")
    if emis_units == "tons/year":
        fact = 28766.639

    TotalPM25 = fact * data
    TotalPop = sr['TotalPop'][0:num_cells]
    MortalityRate = sr['MortalityRate'][0:num_cells]
    deathsK = (np.exp(np.log(1.06)/10 * TotalPM25) - 1) * TotalPop * 1.0465819687408728 * MortalityRate / 100000 * 1.025229357798165
    deathsL = (np.exp(np.log(1.14)/10 * TotalPM25) - 1) * TotalPop * 1.0465819687408728 * MortalityRate / 100000 * 1.025229357798165

    ret = gpd.GeoDataFrame(pd.DataFrame({'SOA': fact * SOA_data,
                                         'pNO3': fact * pNO3_data,
                                         'pNH4': fact * pNH4_data,
                                         'pSO4': fact * pSO4_data,
                                         'PrimPM25': fact * PM25_data,
                                         'TotalPM25': TotalPM25,
                                         'deathsK': deathsK,
                                         'deathsL': deathsL}), geometry=p[:num_cells])

    print("Finished (%.0f seconds)" % (time.time() - start))
    return ret


In [79]:
# Read emission file
file = '/Users/yunhalee/Documents/LOCAETA/CS_emissions/LA_point_CSS.shp'
emis = gpd.read_file(file)
print(emis[["VOC", "NOx", "NH3", "SOx", "PM2_5"]].sum())

num_emis_rows = emis.shape[0]
print(f"Number of rows in emission file: {num_emis_rows}")
print(emis.head())

# This step might take a while. (20-25 mins in the author's computer)
# If this step is interrupted due to insufficient memory, you can try virtual memory.
resultsISRM = run_sr(emis, model="isrm", emis_units="tons/year")

resultsISRM.head()

deaths = pd.DataFrame.from_dict({
    "Model": ["ISRM"],
    "Krewski Deaths": [resultsISRM.deathsK.sum()],
    "LePeule Deaths": [resultsISRM.deathsL.sum()],
})

print(deaths)


resultsISRM.to_file("/Users/yunhalee/Documents/LOCAETA/RCM/INMAP/source_receptor_model/outputs/ISRM_LA_CSS.shp")

VOC      32984.494797
NOx      97488.243049
NH3       7396.842171
SOx      77021.382504
PM2_5    12435.259688
dtype: float64
Number of rows in emission file: 2258
   Unnamed_ 0   FIPS       SCC       VOC     NOx       NH3       SOx  \
0         534  22033  10100501   0.00000    0.00    5.0160  0.000000   
1         535  22033  10100604  21.47000  348.55    0.0000  5.120000   
2         536  22033  20200203  12.84000  882.54  103.9321  5.510000   
3         537  22033  20300203  18.94000  361.80    0.0000  8.520000   
4         543  22109  10100601   0.99805   28.80    0.5246  0.108878   

       PM2_5   height    diam  ...  velocity   EIS_ID  latitude  longitude  \
0   0.000000  45.8724  3.3528  ...  12.92352  5160611  30.48952  -91.18693   
1  29.660000  45.8724  3.3528  ...  12.92352  5160611  30.48952  -91.18693   
2  20.790000  53.3400  4.8768  ...  25.81656  5160611  30.48905  -91.18769   
3  59.520000  53.3400  4.8768  ...  23.89632  5160611  30.48944  -91.18815   
4   1.379123  

In [81]:
# Read emission file
file = '/Users/yunhalee/Documents/LOCAETA/CS_emissions/LA_point_CSS_reduced_emis.shp'
emis2 = gpd.read_file(file)
gdf_crs = "+proj=lcc +lat_1=33.000000 +lat_2=45.000000 +lat_0=40.000000 +lon_0=-97.000000 +x_0=0 +y_0=0 +a=6370997.000000 +b=6370997.000000 +to_meter=1"
emis = emis.to_crs(gdf_crs)
print(emis2[["VOC", "NOx", "NH3", "SOx", "PM2_5"]].sum())

num_emis_rows = emis2.shape[0]
print(f"Number of rows in emission file: {num_emis_rows}")
print(emis2.head())

# This step might take a while. (20-25 mins in the author's computer)
# If this step is interrupted due to insufficient memory, you can try virtual memory.
resultsISRM_red = run_sr(emis2, model="isrm", emis_units="tons/year")

resultsISRM_red.head()

deaths_red = pd.DataFrame.from_dict({
    "Model": ["apsca_q0"],
    "Krewski Deaths": [resultsISRM_red.deathsK.sum()],
    "LePeule Deaths": [resultsISRM_red.deathsL.sum()],
})

deaths_red

resultsISRM_red.to_file("/Users/yunhalee/Documents/LOCAETA/RCM/INMAP/source_receptor_model/outputs/ISRM_LA_CSS_reduced_emis.shp")


VOC      33280.482088
NOx      76739.382554
NH3      37981.685645
SOx      56043.412612
PM2_5     6514.109564
dtype: float64
Number of rows in emission file: 2258
   Unnamed_ 0   FIPS       SCC   VOC_old  NOx_old   NH3_old   SOx_old  \
0         534  22033  10100501   0.00000     0.00    5.0160  0.000000   
1         535  22033  10100604  21.47000   348.55    0.0000  5.120000   
2         536  22033  20200203  12.84000   882.54  103.9321  5.510000   
3         537  22033  20300203  18.94000   361.80    0.0000  8.520000   
4         543  22109  10100601   0.99805    28.80    0.5246  0.108878   

   PM2_5_old   height    diam  ...  velocity   EIS_ID  latitude  longitude  \
0   0.000000  45.8724  3.3528  ...  12.92352  5160611  30.48952  -91.18693   
1  29.660000  45.8724  3.3528  ...  12.92352  5160611  30.48952  -91.18693   
2  20.790000  53.3400  4.8768  ...  25.81656  5160611  30.48905  -91.18769   
3  59.520000  53.3400  4.8768  ...  23.89632  5160611  30.48944  -91.18815   
4   1.37

In [None]:
cut = resultsISRM_red.TotalPM25.quantile(0.985)
resultsISRM_red.plot(column=resultsISRM_red.TotalPM25, cmap="GnBu", legend=True, 
                 vmin=0, vmax=cut, figsize=(6, 3))

In [None]:
vsl = 9.0e6

pd.DataFrame.from_dict({
    "Model": ["ISRM"],
    "Krewski Damages": deaths["Krewski Deaths"] * vsl,
    "LePeule Damages": deaths["LePeule Deaths"] * vsl,
})

In [None]:
cut = resultsISRM.TotalPM25.quantile(0.985)
resultsISRM.plot(column=resultsISRM.TotalPM25, cmap="GnBu", legend=True, 
                 vmin=0, vmax=cut, figsize=(6, 3))

In [None]:
cut = resultsISRM_red.TotalPM25.quantile(0.985)
resultsISRM_red.plot(column=resultsISRM_red.TotalPM25, cmap="GnBu", legend=True, 
                 vmin=0, vmax=cut, figsize=(6, 3))

In [None]:
resultsISRM.to_file("ISRM_LA_CSS.shp")