# Import Block

In [1]:
import pandas as pd
import geopandas as gpd
# from sqlalchemy import TIMESTAMP, SMALLINT

import os
from dotenv import load_dotenv

import sys
sys.path.append("../src")

from utils.Database import Database

# Load Environment

In [2]:
load_dotenv(".env")

TABLE_WEATHER_METADATA = os.getenv("TABLE_WEATHER_METADATA")
TABLE_WEATHER_DATA_PROCESSED = os.getenv("TABLE_WEATHER_DATA_PROCESSED")

# Constants

In [3]:
WEATHER_READING_GET_MIN_MAX_DATE_BYCLIMATE_ID = f"""
SELECT 
	wdar."CLIMATE_ID",
	min(wdar."DATE") as "FIRST_DATE",
	max(wdar."DATE") + make_interval(days => -1) as "LAST_DATE"
FROM {TABLE_WEATHER_DATA_PROCESSED} as wdar 
GROUP BY wdar."CLIMATE_ID";
"""

In [4]:
WEATHER_METADATA_DB_DTYPES = {
    "LAST_DATE": "TIMESTAMP",
    "FIRST_DATE": "TIMESTAMP",
    "LAST_DAY": "SMALLINT", 
    "FIRST_DAY": "SMALLINT", 
    "LAST_MONTH": "SMALLINT", 
    "FIRST_MONTH": "SMALLINT",
}

# Establish Database Connection

In [5]:
db = Database()

Connection Established!!!
	Engine(postgresql://wireaiadmin:***@localhost:5434/weather_db)


# Read Data

In [6]:
# read metaadata
wather_meta_df = gpd.read_postgis(
    sql = f"SELECT * FROM {TABLE_WEATHER_METADATA};",
    con = db.connection,
    geom_col = 'geometry'
)

In [7]:
# read first and last readings for each station
weather_reading_min_max_date_df = pd.read_sql(
    sql = WEATHER_READING_GET_MIN_MAX_DATE_BYCLIMATE_ID,
    con = db.connection
)

# Data Pre-Processing

In [8]:
# merge first and last readings
weather_merged_meta_gdf = wather_meta_df.merge(
    right = weather_reading_min_max_date_df,
    how = 'inner',
    on = "CLIMATE_ID"
)

In [9]:
assert (weather_merged_meta_gdf['FIRST_DATE'].dt.year == weather_merged_meta_gdf['FIRST_YR']).value_counts()[True] == len(wather_meta_df), f"The first should be the same for extracted values for each station!!!"

In [10]:
assert (weather_merged_meta_gdf['LAST_DATE'].dt.year == weather_merged_meta_gdf['LAST_YR']).value_counts()[True] == len(wather_meta_df), f"The first should be the same for extracted values for each station!!!"

In [11]:
assert len(weather_merged_meta_gdf) == len(wather_meta_df), "The number of stations in both should be the same"

In [12]:
# add exess date to for first and last month and day

weather_merged_meta_gdf['FIRST_MONTH'] = weather_merged_meta_gdf['FIRST_DATE'].dt.month
weather_merged_meta_gdf['FIRST_DAY'] = weather_merged_meta_gdf['FIRST_DATE'].dt.day

weather_merged_meta_gdf['LAST_MONTH'] = weather_merged_meta_gdf['LAST_DATE'].dt.month
weather_merged_meta_gdf['LAST_DAY'] = weather_merged_meta_gdf['LAST_DATE'].dt.day

In [13]:
# add column query 
add_column_alter_query = lambda col, col_type: f'ALTER TABLE "{TABLE_WEATHER_METADATA}" ADD COLUMN "{col}" {col_type};'

for col, col_type in WEATHER_METADATA_DB_DTYPES.items():
    db.execute_sql(add_column_alter_query(col, col_type))

Execution started --> ALTER TABLE "weather_metadata" ADD COLUMN "LAST_DATE" TIMESTAMP;
Exectution completed --> ALTER TABLE "weather_metadata" ADD COLUMN "LAST_DATE" TIMESTAMP;
Execution started --> ALTER TABLE "weather_metadata" ADD COLUMN "FIRST_DATE" TIMESTAMP;
Exectution completed --> ALTER TABLE "weather_metadata" ADD COLUMN "FIRST_DATE" TIMESTAMP;
Execution started --> ALTER TABLE "weather_metadata" ADD COLUMN "LAST_DAY" SMALLINT;
Exectution completed --> ALTER TABLE "weather_metadata" ADD COLUMN "LAST_DAY" SMALLINT;
Execution started --> ALTER TABLE "weather_metadata" ADD COLUMN "FIRST_DAY" SMALLINT;
Exectution completed --> ALTER TABLE "weather_metadata" ADD COLUMN "FIRST_DAY" SMALLINT;
Execution started --> ALTER TABLE "weather_metadata" ADD COLUMN "LAST_MONTH" SMALLINT;
Exectution completed --> ALTER TABLE "weather_metadata" ADD COLUMN "LAST_MONTH" SMALLINT;
Execution started --> ALTER TABLE "weather_metadata" ADD COLUMN "FIRST_MONTH" SMALLINT;
Exectution completed --> ALTER 

In [27]:
# update the new data row by row
update_data_statement = lambda c_id, FIRST_DAY, LAST_DAY, FIRST_MONTH, LAST_MONTH, FIRST_DATE, LAST_DATE: f"""
UPDATE "{TABLE_WEATHER_METADATA}" 
SET 
    "FIRST_DAY" = {FIRST_DAY}, 
    "LAST_DAY" = {LAST_DAY}, 
    "FIRST_MONTH" = {FIRST_MONTH}, 
    "LAST_MONTH" = {LAST_MONTH}, 
    "FIRST_DATE" = '{FIRST_DATE}', 
    "LAST_DATE" = '{LAST_DATE}' 
WHERE "CLIMATE_ID" = '{c_id}';
"""

for index, row in weather_merged_meta_gdf.iterrows():
    statement = update_data_statement(
        row['CLIMATE_ID'],
        row['FIRST_DAY'],
        row['LAST_DAY'],
        row['FIRST_MONTH'],
        row['LAST_MONTH'],
        row['FIRST_DATE'],
        row['LAST_DATE']
    )
    db.execute_sql(statement)

Execution started --> 
UPDATE "weather_metadata" 
SET 
    "FIRST_DAY" = 1, 
    "LAST_DAY" = 31, 
    "FIRST_MONTH" = 1, 
    "LAST_MONTH" = 12, 
    "FIRST_DATE" = '2003-01-01 00:00:00', 
    "LAST_DATE" = '2017-12-31 00:00:00' 
WHERE "CLIMATE_ID" = '3010010';

Exectution completed --> 
UPDATE "weather_metadata" 
SET 
    "FIRST_DAY" = 1, 
    "LAST_DAY" = 31, 
    "FIRST_MONTH" = 1, 
    "LAST_MONTH" = 12, 
    "FIRST_DATE" = '2003-01-01 00:00:00', 
    "LAST_DATE" = '2017-12-31 00:00:00' 
WHERE "CLIMATE_ID" = '3010010';

Execution started --> 
UPDATE "weather_metadata" 
SET 
    "FIRST_DAY" = 1, 
    "LAST_DAY" = 31, 
    "FIRST_MONTH" = 1, 
    "LAST_MONTH" = 12, 
    "FIRST_DATE" = '2003-01-01 00:00:00', 
    "LAST_DATE" = '2017-12-31 00:00:00' 
WHERE "CLIMATE_ID" = '3010237';

Exectution completed --> 
UPDATE "weather_metadata" 
SET 
    "FIRST_DAY" = 1, 
    "LAST_DAY" = 31, 
    "FIRST_MONTH" = 1, 
    "LAST_MONTH" = 12, 
    "FIRST_DATE" = '2003-01-01 00:00:00', 
    "LAST_DA