In [1]:
import json
import os
from collections.abc import Iterable
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd
import requests

from nad.load import load_ageb_geometry

In [2]:
data_path = Path(os.environ["DATA_PATH"])
population_grids_path = Path(os.environ["POPULATION_GRIDS_PATH"])
naics_path = Path(os.environ["NAICS_PATH"])

results_path = Path("./results")
results_path.mkdir(exist_ok=True)
(results_path / "platform").mkdir(exist_ok=True)

In [3]:
with open(naics_path / "naics_map.json") as f:
    naics_map = json.load(f)

naics_map = {int(k): v for k, v in naics_map.items()}

In [4]:
df_geom_agebs = load_ageb_geometry(
    population_grids_path / "final" / "zone_agebs" / "shaped" / "2020" / "08.2.03.gpkg",
)

In [5]:
def process_item(elem: dict) -> dict:
    key = elem["Keys"][0]

    value = elem["Values"]["2023"]
    wanted_cols = [
        "TotalOnSiteReleases",
        "TotalOffSiteReleases",
    ]
    emissions = {col: value.get(col, 0) for col in wanted_cols}
    emissions["name"] = key["Name"]
    return emissions


def get_naics(facility_id: int) -> str:
    url = f"https://takingstock.cec.org/api/GetReport?Culture=en-US&FacilityIDs={facility_id}&IndustryLevel=4&Measure=3&MediaTypes=29&ReportType=1&ResultType=1&Years=2023"
    response = requests.get(url, timeout=100)
    industries = response.json()["Items"][0]["Keys"][0]["Industries"].split(",")
    return "+".join([str(int(ind)) for ind in industries])


def get_releases_complete(facility_id: int) -> pd.DataFrame:
    url = "https://takingstock.cec.org/Api/GetReport?Culture=en-US&FacilityIDs={facility_id}&IndustryLevel=4&Measure=3&PageSize=10&ReportType=16&ResultType=1&SortDirection=1&SortProperty=Keys[0].Name&Years=2023"
    response = requests.get(url.format(facility_id=facility_id), timeout=100)
    return (
        pd.DataFrame(
            [process_item(it) for it in response.json()["Items"]],
        )
        .set_index("name")
        .transpose()
        .reset_index(names="index")
        .assign(
            index=lambda df: df["index"].map(
                {
                    "TotalOnSiteReleases": "On-site releases",
                    "TotalOffSiteReleases": "Off-site releases",
                },
            ),
        )
        .set_index("index")
        .transpose()
        .query("(`On-site releases` > 0) | (`Off-site releases` > 0)")
    )


def process_chemical_types(types: Iterable[dict]) -> dict[str, bool]:
    wanted_types = {
        "Persistent, Bioaccumulative and Toxic",
        "Known or Suspected Carcinogens",
        "Developmental/Reproductive Toxins (on California Prop 65 List)",
        "Metals",
    }
    found_types = {t["Name"].strip() for t in types}
    out = {t: t in found_types for t in wanted_types}
    out["Developmental/Reproductive Toxins"] = out.pop(
        "Developmental/Reproductive Toxins (on California Prop 65 List)",
    )
    return out


def get_chemicals_table() -> pd.DataFrame:
    url = "https://takingstock.cec.org/api/GetChemicalGroups?Culture=en-US&IndustryLevel=4&ResultType=1&Years=2023"
    out = pd.DataFrame(requests.get(url, timeout=100).json()).drop(
        columns=["$type", "TypeIds"],
    )
    chem_types = out.apply(
        lambda row: process_chemical_types(row["Types"]),
        result_type="expand",
        axis=1,
    )
    return pd.concat([out[["Name"]], chem_types], axis=1).set_index("Name")


def get_onsite_offsite_risks(
    releases: pd.DataFrame,
    chemicals: pd.DataFrame,
) -> pd.DataFrame:
    return (
        releases.reset_index()
        .melt(id_vars="name")
        .query("value > 0")
        .merge(chemicals.reset_index(names="name"), on="name", how="inner")
        .drop(columns=["name", "value"])
        .groupby("index")
        .any()
        .reindex(["On-site releases", "Off-site releases"], fill_value=False)
        .transpose()
    )


def get_total_releases(releases: pd.DataFrame) -> pd.Series:
    return releases.sum(axis=0)

In [6]:
with open(data_path / "datos" / "takingstock.json") as f:
    df_industry = (
        pd.DataFrame(json.load(f))
        .assign(geometry=lambda df: gpd.points_from_xy(df["Longitude"], df["Latitude"]))
        .drop(columns=["Longitude", "Latitude", "$type"])
        .set_index("ID")
    )

    df_industry = (
        gpd.GeoDataFrame(
            df_industry,
            crs="EPSG:4326",
            geometry="geometry",
        )
        .to_crs("EPSG:6372")
        .sjoin(df_geom_agebs[["geometry"]], how="inner", predicate="within")
        .drop(columns=["CVEGEO"])
        .assign(
            naics=lambda df: df.index.map(get_naics),
            industries=lambda df: df["naics"].apply(
                lambda x: "+".join([naics_map[int(i)] for i in x.split("+")]),
            ),
        )
    )

In [7]:
chems = get_chemicals_table()

In [8]:
release_map = {}

for industry_id in df_industry.index:
    releases = get_releases_complete(industry_id)
    release_map[industry_id] = {}
    release_map[industry_id]["risks"] = get_onsite_offsite_risks(
        releases,
        chems,
    ).to_dict()
    release_map[industry_id]["total_releases"] = get_total_releases(releases).to_dict()
    release_map[industry_id]["releases_by_pollutant"] = releases.to_dict()

In [9]:
with open(results_path / "platform" / "releases.json", "w") as f:
    json.dump(release_map, f, indent=4)

df_industry.to_crs("EPSG:4326").to_file(
    results_path / "platform" / "industry_points.geojson",
)

In [10]:
harmful_industries = {}
for industry_id, info in release_map.items():
    on_site_releases = info["total_releases"]["On-site releases"]
    harmful_industries[industry_id] = on_site_releases

harmful_industries = (
    pd.Series(harmful_industries, name="on_site_releases")
    .to_frame()
    .query("on_site_releases > 0")
    .join(df_industry)
    .assign(
        category=lambda df: pd.cut(
            df["on_site_releases"],
            [0, 400, 10_000, np.inf],
            labels=["Microgenerador", "Pequeño generador", "Gran generador"],
        ),
        radius=lambda df: df["category"]
        .map({"Microgenerador": 1, "Pequeño generador": 2, "Gran generador": 3})
        .astype(float),
    )
)
harmful_industries = (
    gpd.GeoDataFrame(
        harmful_industries,
        crs=df_industry.crs,
        geometry="geometry",
    )
    .assign(
        geometry=lambda df: df["geometry"].buffer(
            df["radius"] * 1.60934 * 1000,
            resolution=32,
        ),
    )
    .drop(columns=["radius"])
)

harmful_industries.to_file(results_path / "harmful_industries.gpkg")

harmful_industries.to_crs("EPSG:4326").to_file(
    results_path / "platform" / "industry_circles.geojson",
)