In [None]:
import os
import xarray as xr
import geopandas as gpd  # type: ignore
from SoilMoistureQueryHandler import SoilMoistureQueryHandler
from dotenv import load_dotenv
import sqlalchemy as sq
import sys

sys.path.append("../")
from DataService import DataService  # type: ignore

In [None]:
TABLE = "soil_moisture"

In [None]:
load_dotenv()
PG_USER = os.getenv("POSTGRES_USER", "")
PG_PW = os.getenv("POSTGRES_PW", "")
PG_DB = os.getenv("POSTGRES_DB", "")
PG_ADDR = os.getenv("POSTGRES_ADDR", "")
PG_PORT = os.getenv("POSTGRES_PORT", 5432)

In [None]:
netcdf_file_path = "/data/common/Images/2004/ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20040605000000-fv07.1.nc"
queryHandler = SoilMoistureQueryHandler()

db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
conn = db.connect()

queryHandler.createSoilMoistureTableReq(db)

In [None]:
query = sq.text("select cr_num, car_uid, geometry FROM public.census_ag_regions")
agRegions = gpd.GeoDataFrame.from_postgis(
    query, conn, crs="EPSG:3347", geom_col="geometry"
)

In [None]:
dataset = xr.open_dataset(netcdf_file_path)
df = (
    dataset.to_dataframe().reset_index()
)  # Converts the contents into a dataframe and corrects indexes

dataset.close()

In [None]:
df.drop(columns=["flag", "freqbandID", "dnflag", "mode", "sensor", "t0"], inplace=True)
df.rename(columns={df.columns[0]: "date"}, inplace=True)
df.rename(columns={df.columns[3]: "soil_moisture"}, inplace=True)
df = df[df["soil_moisture"].notna()]

In [None]:
df = gpd.GeoDataFrame(
    df, crs="EPSG:4326", geometry=gpd.points_from_xy(df.lon, df.lat)
)  # Creates geometry from df using lon and lat as cords to create points (points being geometry)
df = df.to_crs(  # type: ignore
    crs="EPSG:3347"
)  # Changes the points projection to match the agriculture regions of EPSG:3347
df = gpd.sjoin(
    df, agRegions, how="left", predicate="within"
)  # Join the two dataframes based on which points fit within what agriculture regions

In [None]:
df.drop(columns=["index_right", "geometry"], inplace=True)
df = df[df["cr_num"].notna()]  # Take rows that are valid numbers
df[["cr_num"]] = df[["cr_num"]].astype(int)

In [None]:
df = df.reset_index()

In [None]:
df.drop(columns=["index"], inplace=True)
df.drop(columns=["sm_uncertainty"], inplace=True)

In [None]:
df.to_sql(TABLE, conn, schema="public", if_exists="append", index=False)