# Hydrofabric Geopackage Viewer

Reads/extracts files from the NGWPC pyiceberg resources

In [None]:
import json
import os
from pathlib import Path

import pandas as pd
from ipywidgets import interact
from pyiceberg.catalog import load_catalog

from icefabric.helpers import load_creds, load_pyiceberg_config
from icefabric.hydrofabric import subset_hydrofabric
from icefabric.schemas import IdType
from icefabric.ui import create_time_series_widget, get_hydrofabric_gages, get_streamflow_data

# Changes the current working dir to be the project root
current_working_dir = Path.cwd()
os.chdir(Path.cwd() / "../../")
print(
    f"Changed current working dir from {current_working_dir} to: {Path.cwd()}. This must run at the project root"
)


# dir is where the .env file is located
load_creds(dir=Path.cwd())

# Loading the local pyiceberg config settings
pyiceberg_config = load_pyiceberg_config(Path.cwd())
catalog = load_catalog(
    name="sql",
    type=pyiceberg_config["catalog"]["sql"]["type"],
    uri=pyiceberg_config["catalog"]["sql"]["uri"],
    warehouse=pyiceberg_config["catalog"]["sql"]["warehouse"],
)

## Getting streamflow observations for different gages


Step 1) getting the data

In [None]:
from pprint import pprint

# Using a local warehouse for this example. This was created through the following command and NGWPC test AWS account
# python tools/pyiceberg/export_catalog.py --namespace streamflow_observations
streamflow_obs_df = get_streamflow_data(catalog_name="sql", **pyiceberg_config["catalog"])

# List all gauge IDs

pprint(streamflow_obs_df.columns.tolist())

In [None]:
# select the gauge ID you want to use:
gage_id = "12145500"

In [None]:
streamflow_obs_df["time"] = pd.to_datetime(streamflow_obs_df["time"])

# Scatter Plot of observations
create_time_series_widget(streamflow_obs_df, point_size=5, time_col="time", flow_col=gage_id)

## Geopackage Summary

Print list of layers, number of catchments, and list of hydrolocations.

In [None]:
pprint(get_hydrofabric_gages(catalog=catalog))

In [None]:
# Using a local warehouse for this example. This was created through the following command and NGWPC test AWS account
# python tools/pyiceberg/export_catalog.py --namespace conus_hf
gage_id = "11280000"
layers = ["flowpaths", "nexus", "divides", "network", "hydrolocations", "pois"]
domain = "conus_hf"

upstream_connections_path = Path.cwd() / f"data/hydrofabric/{domain}_upstream_connections.json"
assert upstream_connections_path.exists(), (
    f"Upstream Connections missing for {domain}. Please run `icefabric build-upstream-connections` to generate this file"
)

with open(upstream_connections_path) as f:
    data = json.load(f)
    print(
        f"Loading upstream connections connected generated on: {data['_metadata']['generated_at']} from snapshot id: {data['_metadata']['iceberg']['snapshot_id']}"
    )
    upstream_dict = data["upstream_connections"]

In [None]:
layers_df = subset_hydrofabric(
    catalog=catalog,
    layers=layers,
    identifier=f"gages-{gage_id}",
    id_type=IdType.HL_URI,
    namespace="conus_hf",
    upstream_dict=upstream_dict,
)

In [None]:
print("Layers:")
print(list(layers_df.keys()))
row, col = layers_df["divides"].shape
print("Number of catchments:")
print(row)
print("Hydrolocations:")
hl = layers_df["hydrolocations"].hl_uri.tolist()
hl_str = ", ".join(hl)
print(hl_str)

## Map of divides, nexuses, and flowpaths

Plot divides, nexuses, and flowpaths on a map.

In [None]:
divides = layers_df["divides"].to_crs(epsg=3857)

flowpaths = layers_df["flowpaths"].to_crs(epsg=3857)
nexus = layers_df["nexus"].to_crs(epsg=3857)

div_ex = divides.explore()
fl_ex = flowpaths.explore(m=div_ex, color="yellow")
nexus.explore(m=fl_ex, color="red")

## View Layers

Select layer and print table

In [None]:
import pandas as pd

# Configure pandas display options for better formatting
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", 50)
pd.set_option("display.expand_frame_repr", False)

# Interactive display with limited rows
interact(lambda layer_name: layers_df[layer_name].head(20), layer_name=layers)