In [None]:
import sqlalchemy as sq
import geopandas as gpd  # type: ignore
import pandas as pd  # type: ignore
from dotenv import load_dotenv
import os, sys

sys.path.append("../")
from Shared.DataService import DataService

In [None]:
TABLENAME = "agg_ergot_sample"

load_dotenv()
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")

In [None]:
if (
    PG_DB is None
    or PG_ADDR is None
    or PG_PORT is None
    or PG_USER is None
    or PG_PW is None
):
    raise ValueError("Environment variables not set")

db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
conn = db.connect()

In [None]:
regionQuery = sq.text("select car_uid, geometry FROM public.census_ag_regions")
agRegions = gpd.GeoDataFrame.from_postgis(
    regionQuery, conn, crs="EPSG:3347", geom_col="geometry"
)

In [None]:
ergotQuery = sq.text("SELECT * FROM public.ergot_sample")
ergot = pd.read_sql_query(ergotQuery, conn)

In [None]:
# calculate the car_uid
ergot.loc[ergot["province"] == "MB", "car_uid"] = (
    ergot.loc[ergot["province"] == "MB", "crop_district"] + 4600
)
ergot.loc[ergot["province"] == "SK", "car_uid"] = (
    ergot.loc[ergot["province"] == "SK", "crop_district"] - 1
) + 4700
ergot.loc[ergot["province"] == "AB", "car_uid"] = (
    ergot.loc[ergot["province"] == "AB", "crop_district"] * 10
) + 4800

ergot[["car_uid"]] = ergot[["car_uid"]].astype(int)

In [None]:
touches = {}

for index1, agRegion1 in agRegions.iterrows():
    currTouches = []

    for index2, agRegion2 in agRegions.iterrows():
        if agRegion1["geometry"].touches(agRegion2["geometry"]):
            currTouches.append(agRegion2["car_uid"])

    touches[str(agRegion1["car_uid"])] = currTouches

In [None]:
ergot["percnt_true"] = None
ergot["has_ergot"] = False
ergot["sum_severity"] = 0

ergot["present_prev1"] = False
ergot["present_prev2"] = False
ergot["present_prev3"] = False
ergot["present_in_neighbor"] = False

ergot["severity_prev1"] = 0
ergot["severity_prev2"] = 0
ergot["severity_prev3"] = 0
ergot["severity_in_neighbor"] = 0

In [None]:
for year in ergot["year"].unique():
    for uid in ergot["car_uid"].unique():
        currSamples = ergot.query(
            f"year == {year} and car_uid == {uid}"
        )  # load the current ag_region samples
        neighborSamples = ergot.query(
            f"year == {year} and car_uid in {touches[str(uid)]}"
        )  # load the neighbors samples
        prev1Year = ergot.query(
            f"year == {year - 1} and car_uid == {uid}"
        )  # load last years samples
        prev2Year = ergot.query(
            f"year == {year - 2} and car_uid == {uid}"
        )  # load the samples from 2 years ago
        prev3Year = ergot.query(
            f"year == {year - 3} and car_uid == {uid}"
        )  # load the samples from 3 years ago

        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "percnt_true"
        ] = currSamples["incidence"].sum() / len(currSamples.index)
        ergot.loc[(ergot["year"] == year) & (ergot["car_uid"] == uid), "has_ergot"] = (
            currSamples["incidence"].sum() > 0
        )
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "sum_severity"
        ] = currSamples["severity"].sum()

        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "present_prev1"
        ] = (prev1Year["incidence"].sum() > 0)
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "present_prev2"
        ] = (prev2Year["incidence"].sum() > 0)
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "present_prev3"
        ] = (prev3Year["incidence"].sum() > 0)
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "present_in_neighbor"
        ] = (neighborSamples["incidence"].sum() > 0)

        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "severity_prev1"
        ] = prev1Year["incidence"].sum() / len(prev1Year.index)
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "severity_prev2"
        ] = prev2Year["incidence"].sum() / len(prev2Year.index)
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "severity_prev3"
        ] = prev3Year["incidence"].sum() / len(prev3Year.index)
        ergot.loc[
            (ergot["year"] == year) & (ergot["car_uid"] == uid), "severity_in_neighbor"
        ] = neighborSamples["incidence"].sum() / len(neighborSamples.index)

In [None]:
# set any unexpected values to 0
ergot.loc[ergot["severity_prev1"].isna(), "severity_prev1"] = 0
ergot.loc[ergot["severity_prev2"].isna(), "severity_prev2"] = 0
ergot.loc[ergot["severity_prev3"].isna(), "severity_prev3"] = 0
ergot.loc[ergot["severity_in_neighbor"].isna(), "severity_in_neighbor"] = 0

In [None]:
ergot

In [None]:
ergot.to_sql(TABLENAME, conn, schema="public", if_exists="append", index=False)

In [None]:
db.cleanup()