In [None]:
import os
import xarray as xr
import geopandas as gpd  # type: ignore

from QueryHandler import QueryHandler
from dotenv import load_dotenv
import sqlalchemy as sq
import sys
import pandas as pd
import matplotlib.pyplot as plt  # type: ignore

sys.path.append("../")
from Shared.DataService import DataService

In [None]:
def updateLog(fileName: str, message: str) -> None:
    try:
        if fileName is not None:
            with open(fileName, "a") as log:
                log.write(message + "\n")
    except Exception as e:
        print(message)

In [None]:
LOG_FILE = "/data/pull_moisture.log"

load_dotenv()
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")

In [None]:
if (
    PG_DB is None
    or PG_ADDR is None
    or PG_PORT is None
    or PG_USER is None
    or PG_PW is None
):
    updateLog(LOG_FILE, "Missing database credentials")
    raise ValueError("Environment variables are not set")
else:
    # connicting to database
    db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
    conn = db.connect()

In [None]:
query = sq.text("select * FROM public.soil_moisture")
sm_df = pd.read_sql(query, conn)

In [6]:
sm_df = (
    sm_df.groupby(["date", "cr_num", "car_uid"])
    .agg({"soil_moisture": ["min", "max", "mean"]})
    .reset_index()
)
sm_df.columns = [  # type: ignore
    "date",
    "cr_num",
    "district",
    "soil_moisture_min",
    "soil_moisture_max",
    "soil_moisture_mean",
]

Unnamed: 0,id,lon,lat,date,cr_num,car_uid,soil_moisture
0,1,-117.625,59.875,1999-10-29,10,4870,0.229969
1,2,-117.375,59.875,1999-10-29,10,4870,0.277028
2,3,-117.125,59.875,1999-10-29,10,4870,0.253480
3,4,-116.875,59.875,1999-10-29,10,4870,0.201568
4,5,-116.625,59.875,1999-10-29,10,4870,0.216237
...,...,...,...,...,...,...,...
5240828,5240829,-98.375,49.125,1993-07-24,3,4608,0.205054
5240829,5240830,-98.125,49.125,1993-07-24,3,4608,0.185292
5240830,5240831,-97.875,49.125,1993-07-24,3,4608,0.173992
5240831,5240832,-97.625,49.125,1993-07-24,3,4608,0.194576


In [8]:
sm_df["date"] = pd.to_datetime(sm_df["date"])
sm_df["day"] = sm_df["date"].dt.day
sm_df["month"] = sm_df["date"].dt.month
sm_df["year"] = sm_df["date"].dt.year

In [9]:
sm_df.drop(columns="date", inplace=True)

In [None]:
sm_df = sm_df.reindex(
    columns=[
        "year",
        "month",
        "day",
        "cr_num",
        "district",
        "soil_moisture_min",
        "soil_moisture_min",
        "soil_moisture_min",
    ]
)

In [None]:
sm_df["date"] = pd.to_datetime(sm_df["date"])
sm_df["day"] = sm_df["date"].dt.day
sm_df["month"] = sm_df["date"].dt.month
sm_df["year"] = sm_df["date"].dt.year

In [None]:
sm_df.drop(columns="date", inplace=True)

In [None]:
sm_df = sm_df.reindex(
    columns=[
        "year",
        "month",
        "day",
        "cr_num",
        "district",
        "soil_moisture_min",
        "soil_moisture_min",
        "soil_moisture_min",
    ]
)

In [None]:
sm_df

In [None]:
# inserting in db
TABLE = "agg_soil_moisture"
queryHandler = QueryHandler()
queryHandler.createAggSoilMoistureTableReq(db)

In [None]:
sm_df.to_sql(TABLE, conn, schema="public", if_exists="replace")