In [None]:
import ngen
import os
import duckdb
import sys

import holoviews as hv
import geoviews as gv
import geopandas as gpd
import hvplot.pandas #noqa
import pandas as pd
import dask.dataframe as dd
import cartopy.crs as ccrs

from pathlib import Path

In [None]:

sys.path.insert(0, '../../evaluation')
import utils

In [None]:
# Set some configurations
NGEN_DIR = "/home/matt/Downloads/AWI_03W_113060_001/"
NGEN_CONFIG_DIR = Path(NGEN_DIR, "config")
NGEN_FORCINGS_DIR = Path(NGEN_DIR, "forcings")
NGEN_OUTPUT_DIR = Path(NGEN_DIR, "output")

STUDY_DIR = "/home/matt/cache/AWI_03W_113060_001/"
STUDY_FORCINGS_DIR = Path(STUDY_DIR, "forcings")
STUDY_OUTPUT_DIR = Path(STUDY_DIR, "output")
STUDY_GEO_DIR = Path(STUDY_DIR, "geo")
STUDY_USGS_DIR = Path(STUDY_DIR, "usgs")

In [None]:
catchment_file_gdf = gpd.read_file(Path(NGEN_CONFIG_DIR, "catchment_data.geojson")).to_crs("EPSG:3857")
catchment_file_gdf 

In [None]:
catchment_polygons = catchment_file_gdf.hvplot(crs=ccrs.GOOGLE_MERCATOR, hover_cols=["id", "toid"])

In [None]:
nexus_file_gdf = gpd.read_file(Path(NGEN_CONFIG_DIR, "nexus_data.geojson")).to_crs("EPSG:3857")
nexus_file_gdf 

In [None]:
nexus_points = nexus_file_gdf.hvplot(color="green", crs=ccrs.GOOGLE_MERCATOR, hover_cols=["id", "toid"])

In [None]:
tiles = gv.tile_sources.OSM

In [None]:
tiles * catchment_polygons * nexus_points 

In [None]:
cat_forcing_files = NGEN_FORCINGS_DIR.glob(pattern="cat03w_cat-*.csv")

In [None]:
for file in cat_forcing_files:
    cat_df = pd.read_csv(file)
    cat_df["configuration"] = "AWI_03W_113060_001"
    cat_df["variable_name"] = "precipitation_flux"
    cat_df["reference_time"] = ""
    cat_df["measurement_unit"] = "mm s^-1"
    cat_df["catchment_id"] = file.stem.split("_")[-1]
    cat_df.rename(columns={"time":"value_time", "precip_rate":"value"}, inplace=True, errors="raise")
    cat_df = cat_df[["reference_time", "catchment_id", "value_time", "value", "variable_name", "measurement_unit", "configuration"]]
    cat_df.to_parquet(Path(STUDY_FORCINGS_DIR, f"{file.stem.split('_')[-1]}.parquet"))

In [None]:
cat_output_files = NGEN_OUTPUT_DIR.glob(pattern="cat-*.csv")

In [None]:
for file in cat_output_files:
    cat_out_df = pd.read_csv(file)
    cat_out_df["configuration"] = "AWI_03W_113060_001"
    cat_out_df["catchment_id"] = file.stem.split("_")[-1]
    cat_out_df["variable_name"] = "runoff"
    cat_out_df["reference_time"] = ""
    cat_out_df["measurement_unit"] = "m^3/s"
    cat_out_df.rename(columns={"Time Step":"lead_time","Time":"value_time", "Q_OUT": "value"}, inplace=True, errors="raise")
    cat_out_df = cat_out_df[["reference_time", "catchment_id", "value_time", "value", "variable_name", "measurement_unit", "configuration"]]
    cat_out_df.to_parquet(Path(STUDY_OUTPUT_DIR, f"{file.stem.split('_')[-1]}.parquet"))

In [None]:
nexus_output_files = NGEN_OUTPUT_DIR.glob(pattern="nex-*.csv")

In [None]:
for file in nexus_output_files:
    nex_out_df = pd.read_csv(file, header=1, names=["lead_time","value_time", "value"])
    nex_out_df["configuration"] = "AWI_03W_113060_001"
    nex_out_df["nexus_id"] = file.stem.split("_")[0]
    nex_out_df["variable_name"] = "streamflow"
    nex_out_df["reference_time"] = ""
    nex_out_df["measurement_unit"] = "m^3/s"
    nex_out_df = nex_out_df[["reference_time", "nexus_id", "value_time", "value", "variable_name", "measurement_unit", "configuration"]]
    nex_out_df.to_parquet(Path(STUDY_OUTPUT_DIR, f"{file.stem.split('_')[-1]}.parquet"))

In [None]:
gdf = gdf = gpd.read_file(Path(STUDY_GEO_DIR, "nextgen_03W.gpkg"))

In [None]:
gdf.hvplot()

In [None]:
query = f"""
    SELECT catchment_id, max(value)
    FROM read_parquet('{STUDY_OUTPUT_DIR}/cat-*.parquet')
    GROUP BY catchment_id;
"""
df = duckdb.query(query).to_df()
df

In [None]:
query = f"""
    SELECT catchment_id, max(value)
    FROM read_parquet('{STUDY_FORCINGS_DIR}/cat-*.parquet')
    GROUP BY catchment_id;
"""
df = duckdb.query(query).to_df()

In [None]:
df