In [None]:
import sqlalchemy as sq
import geopandas as gpd  # type: ignore
import pandas as pd  # type: ignore
from dotenv import load_dotenv
import os, sys

sys.path.append("../")
from Shared.DataService import DataService

In [None]:
TABLENAME = "agg_ergot_sample"

load_dotenv()
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")

In [None]:
def pullAgRegions(conn: sq.engine.Connection) -> gpd.GeoDataFrame:
    regionQuery = sq.text("select district, geometry FROM public.census_ag_regions")

    return gpd.GeoDataFrame.from_postgis(
        regionQuery, conn, crs="EPSG:3347", geom_col="geometry"
    )

In [None]:
def pullErgot(conn: sq.engine.Connection) -> pd.DataFrame:
    ergotQuery = sq.text("SELECT * FROM public.ergot_sample")

    return pd.read_sql_query(ergotQuery, conn)

In [None]:
def calcUIDs(ergot: pd.DataFrame) -> pd.DataFrame:
    ergot.loc[ergot["province"] == "MB", "district"] = (
        ergot.loc[ergot["province"] == "MB", "crop_district"] + 4600
    )
    ergot.loc[ergot["province"] == "SK", "district"] = (
        ergot.loc[ergot["province"] == "SK", "crop_district"] - 1
    ) + 4700
    ergot.loc[ergot["province"] == "AB", "district"] = (
        ergot.loc[ergot["province"] == "AB", "crop_district"] * 10
    ) + 4800

    ergot[["district"]] = ergot[["district"]].astype(int)

    return ergot

In [None]:
def calcNeighbors(agRegions: gpd.GeoDataFrame) -> dict:
    touches = {}

    for index1, agRegion1 in agRegions.iterrows():
        currTouches = []

        for index2, agRegion2 in agRegions.iterrows():
            if agRegion1["geometry"].touches(agRegion2["geometry"]):
                currTouches.append(agRegion2["district"])

        touches[str(agRegion1["district"])] = currTouches

    return touches

In [None]:
def createErgotFeatures(ergot: pd.DataFrame, touches: gpd.GeoDataFrame) -> pd.DataFrame:
    ergot["percnt_true"] = None
    ergot["has_ergot"] = False
    ergot["sum_severity"] = 0

    ergot["present_prev1"] = False
    ergot["present_prev2"] = False
    ergot["present_prev3"] = False
    ergot["present_in_neighbor"] = False

    ergot["severity_prev1"] = 0
    ergot["severity_prev2"] = 0
    ergot["severity_prev3"] = 0
    ergot["severity_in_neighbor"] = 0

    for year in ergot["year"].unique():
        for uid in ergot["district"].unique():
            # load the current ag_region samples
            currSamples = ergot.query(f"year == {year} and district == {uid}")

            # load the neighbors samples
            neighborSamples = ergot.query(
                f"year == {year} and district in {touches[str(uid)]}"
            )

            # load last years samples
            prev1Year = ergot.query(f"year == {year - 1} and district == {uid}")

            # load the samples from 2 years ago
            prev2Year = ergot.query(f"year == {year - 2} and district == {uid}")

            # load the samples from 3 years ago
            prev3Year = ergot.query(f"year == {year - 3} and district == {uid}")

            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "percnt_true"
            ] = currSamples["incidence"].sum() / len(currSamples.index)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "has_ergot"
            ] = (currSamples["incidence"].sum() > 0)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "sum_severity"
            ] = currSamples["severity"].sum()

            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "present_prev1"
            ] = (prev1Year["incidence"].sum() > 0)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "present_prev2"
            ] = (prev2Year["incidence"].sum() > 0)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "present_prev3"
            ] = (prev3Year["incidence"].sum() > 0)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid),
                "present_in_neighbor",
            ] = (
                neighborSamples["incidence"].sum() > 0
            )

            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "severity_prev1"
            ] = prev1Year["incidence"].sum() / len(prev1Year.index)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "severity_prev2"
            ] = prev2Year["incidence"].sum() / len(prev2Year.index)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid), "severity_prev3"
            ] = prev3Year["incidence"].sum() / len(prev3Year.index)
            ergot.loc[
                (ergot["year"] == year) & (ergot["district"] == uid),
                "severity_in_neighbor",
            ] = neighborSamples["incidence"].sum() / len(neighborSamples.index)

    # set any unexpected values to 0
    ergot.loc[ergot["severity_prev1"].isna(), "severity_prev1"] = 0
    ergot.loc[ergot["severity_prev2"].isna(), "severity_prev2"] = 0
    ergot.loc[ergot["severity_prev3"].isna(), "severity_prev3"] = 0
    ergot.loc[ergot["severity_in_neighbor"].isna(), "severity_in_neighbor"] = 0

    return ergot

In [None]:
def main():
    if (
        PG_DB is None
        or PG_ADDR is None
        or PG_PORT is None
        or PG_USER is None
        or PG_PW is None
    ):
        raise ValueError("Environment variables not set")

    db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
    conn = db.connect()

    agRegions = pullAgRegions(conn)
    ergot = pullErgot(conn)

    ergot = calcUIDs(ergot)
    neighbors = calcNeighbors(agRegions)
    ergot = createErgotFeatures(ergot, neighbors)

    ergot.to_sql(TABLENAME, conn, schema="public", if_exists="append", index=False)
    db.cleanup()

In [None]:
if __name__ == "__main__":
    main()