### Querying RAS XS through PyIceberg

The following notebook is to walk you through the process of querying a RAS XS through PyIceberg. 

For the warehouse path, please put the path to your S3 tables URI


In [10]:
# imports
import os

from dotenv import load_dotenv
from pyiceberg.catalog import load_catalog

from icefabric_tools import to_geopandas

load_dotenv()

os.environ["WAREHOUSE_PATH"] = "INSERT PATH TO URI"

In [None]:
# Load Catalog
catalog_settings = {
    "type": "glue",
    "s3.endpoint": "s3.us-east-1.amazonaws.com",
    "warehouse": os.environ["WAREHOUSE_PATH"],
    "glue_region": "us-east-1",
}
catalog = load_catalog("glue", **catalog_settings)

In [21]:
catalog.list_tables("mip_xs")[40:50]

[('mip_xs', '02030102'),
 ('mip_xs', '02030103'),
 ('mip_xs', '02030104'),
 ('mip_xs', '02030105'),
 ('mip_xs', '02040101'),
 ('mip_xs', '02040102'),
 ('mip_xs', '02040103'),
 ('mip_xs', '02040104'),
 ('mip_xs', '02040105'),
 ('mip_xs', '02040106')]

Using `catalog.load_table()` we can directly call the XS data. Each is stored based on HUC8

In [36]:
# Reading MIP XS
namespace = "mip_xs"
huc_number = "02040106"
df = catalog.load_table(f"{namespace}.{huc_number}").scan().to_pandas()
gdf = to_geopandas(df)
gdf.head()
# gdf.explore()

Unnamed: 0,river,reach,river_reach,river_station,river_reach_rs,thalweg,xs_max_elevation,left_reach_length,right_reach_length,channel_reach_length,ras_data,station_elevation_points,bank_stations,number_of_station_elevation_points,number_of_coords,flows,profile_names,geometry,huc,model_id,us_reach_id,us_network_to_id,ds_reach_id,ds_network_to_id,__index_level_0__
0,Middle Creek,Reach 1,"Middle Creek ,Reach 1",16456.0,Middle Creek Reach 1 16456.0,861.852,922.65,204.9,114.1,156.0,"Type RM Length L Ch R = 1 ,16456 ,204.9,156,...","[(0.0, 922.65), (5.6, 922.16), (15.5, 921.15),...","['406.7', '461.44']",398,2,1605.0\n1037.0\n1453.0\n837.0\n660.0\n458.0,0.2% Chance\n1% Chance\n1+% Chance\n2% Chance\...,"LINESTRING (1699491.379 2176460.083, 1699268.2...",2040106,MiddleCreekAE,4187133.0,4187169.0,,,89158
1,Middle Creek,Reach 1,"Middle Creek ,Reach 1",16300.0,Middle Creek Reach 1 16300.0,860.3,873.03,325.1,281.1,327.0,"Type RM Length L Ch R = 1 ,16300 ,325.1,327,...","[(0.0, 873.03), (1.2, 872.89), (1.8, 872.7), (...","['226.7', '283.62']",199,2,2113.0\n1371.0\n1920.0\n1109.0\n877.0\n611.0,0.2% Chance\n1% Chance\n1+% Chance\n2% Chance\...,"LINESTRING (1699470.635 2176367.49, 1699291.07...",2040106,MiddleCreekAE,,,,,89159
2,Middle Creek,Reach 1,"Middle Creek ,Reach 1",15972.0,Middle Creek Reach 1 15972.0,856.647,865.25,141.8,214.1,191.2,"Type RM Length L Ch R = 1 ,15972 ,141.8,191....","[(0.0, 865.25), (0.757, 865.23), (4.075, 865.0...","['177.08', '192.51']",104,7,2113.0\n1371.0\n1920.0\n1109.0\n877.0\n611.0,0.2% Chance\n1% Chance\n1+% Chance\n2% Chance\...,"LINESTRING (1699527.966 2176278.803, 1699473.4...",2040106,MiddleCreekAE,,,,,89160
3,Middle Creek,Reach 1,"Middle Creek ,Reach 1",15781.0,Middle Creek Reach 1 15781.0,853.81,864.813,128.0,205.3,183.5,"Type RM Length L Ch R = 1 ,15781 ,128,183.5,...","[(0.0, 864.22), (19.555, 864.768), (22.379, 86...","['249.38', '264.58']",129,6,2113.0\n1371.0\n1920.0\n1109.0\n877.0\n611.0,0.2% Chance\n1% Chance\n1+% Chance\n2% Chance\...,"LINESTRING (1699583.967 2176242.774, 1699513.8...",2040106,MiddleCreekAE,,,,,89161
4,Middle Creek,Reach 1,"Middle Creek ,Reach 1",15598.0,Middle Creek Reach 1 15598.0,851.72,861.82,215.3,95.0,209.6,"Type RM Length L Ch R = 1 ,15598 ,215.3,209....","[(0.0, 861.81), (0.4, 861.82), (10.0, 861.02),...","['298.1', '328.6']",354,2,2113.0\n1371.0\n1920.0\n1109.0\n877.0\n611.0,0.2% Chance\n1% Chance\n1+% Chance\n2% Chance\...,"LINESTRING (1699625.84 2176240.53, 1699373.415...",2040106,MiddleCreekAE,,,,,89162


To query individual river reaches, we can use the scan feature to query based on the table schema

In [23]:
catalog.load_table(f"{namespace}.{huc_number}").schema

<bound method Table.schema of 02040102(
  1: river: optional string,
  2: reach: optional string,
  3: river_reach: optional string,
  4: river_station: optional double,
  5: river_reach_rs: optional string,
  6: thalweg: optional string,
  7: xs_max_elevation: optional string,
  8: left_reach_length: optional double,
  9: right_reach_length: optional double,
  10: channel_reach_length: optional double,
  11: ras_data: optional string,
  12: station_elevation_points: optional string,
  13: bank_stations: optional string,
  14: number_of_station_elevation_points: optional long,
  15: number_of_coords: optional long,
  16: flows: optional string,
  17: profile_names: optional string,
  18: geometry: optional binary,
  19: huc: optional string,
  20: model_id: optional string,
  21: us_reach_id: optional string,
  22: us_network_to_id: optional string,
  23: ds_reach_id: optional string,
  24: ds_network_to_id: optional string,
  25: __index_level_0__: optional long
),
partition by: [],
s

Let's query by the river name

In [37]:
from pyiceberg.expressions import EqualTo

df = (
    catalog.load_table(f"{namespace}.{huc_number}")
    .scan(row_filter=EqualTo("river", "Lehigh River"))
    .to_pandas()
)
display(df.tail())
# to_geopandas(df).explore()

Unnamed: 0,river,reach,river_reach,river_station,river_reach_rs,thalweg,xs_max_elevation,left_reach_length,right_reach_length,channel_reach_length,ras_data,station_elevation_points,bank_stations,number_of_station_elevation_points,number_of_coords,flows,profile_names,geometry,huc,model_id,us_reach_id,us_network_to_id,ds_reach_id,ds_network_to_id,__index_level_0__
4916,Lehigh River,Main,"Lehigh River ,Main",970.0563,Lehigh River Main 970.0563,161.24,315.72,205.44,257.81,237.58,"Type RM Length L Ch R = 1 ,970.0563,205.44,237...","[(0.0, 212.27), (10.0, 212.07), (13.99, 212.11...","['1691.53', '1986.6']",396,4,46235.0\n56895.0\n64915.0\n73200.0\n88600.0\n9...,10-yr\n25-yr\n50-yr\n100-yr\n100-yr+\n500-yr,b'\x01\x02\x00\x00\x00\x04\x00\x00\x00\xb7^\xf...,2040106,LehighRiverFEMA_Sup,,,,,99205
4917,Lehigh River,Main,"Lehigh River ,Main",732.4796,Lehigh River Main 732.4796,160.27,307.0,103.3,103.3,103.3,"Type RM Length L Ch R = 1 ,732.4796,103.3,103....","[(0.0, 211.27), (3.99, 210.98), (9.98, 210.82)...","['1649.13', '1981.56']",400,4,46235.0\n56895.0\n64915.0\n73200.0\n88600.0\n9...,10-yr\n25-yr\n50-yr\n100-yr\n100-yr+\n500-yr,"b""\x01\x02\x00\x00\x00\x04\x00\x00\x000\xdc\xe...",2040106,LehighRiverFEMA_Sup,,,,,99206
4918,Lehigh River,Main,"Lehigh River ,Main",640.0,Lehigh River Main 640.0,160.25,211.8,74.2,74.2,74.2,"Type RM Length L Ch R = 1 ,640 ,74.2,74.2,...","[(0.0, 209.86), (7.7, 209.81), (19.2, 209.34),...","['1628.6', '2020.7']",440,9,46235.0\n56895.0\n64915.0\n73200.0\n88600.0\n9...,10-yr\n25-yr\n50-yr\n100-yr\n100-yr+\n500-yr,b'\x01\x02\x00\x00\x00\t\x00\x00\x00\xba!\x0b\...,2040106,LehighRiverFEMA_Sup,,,,,99207
4919,Lehigh River,Main,"Lehigh River ,Main",573.0077,Lehigh River Main 573.0077,160.76,296.63,201.11,302.23,224.98,"Type RM Length L Ch R = 1 ,573.0077,201.11,224...","[(0.0, 210.06), (9.99, 209.84), (15.98, 209.82...","['1657.49', '2078.85']",380,7,46235.0\n56895.0\n64915.0\n73200.0\n88600.0\n9...,10-yr\n25-yr\n50-yr\n100-yr\n100-yr+\n500-yr,b'\x01\x02\x00\x00\x00\x07\x00\x00\x00*e)r\xa7...,2040106,LehighRiverFEMA_Sup,,,,,99208
4920,Lehigh River,Main,"Lehigh River ,Main",348.0278,Lehigh River Main 348.0278,152.95,283.66,140.55,152.84,348.03,"Type RM Length L Ch R = 1 ,348.0278,140.55,348...","[(0.0, 208.28), (9.99, 207.72), (37.95, 206.87...","['1743.75', '2358.16']",400,8,46235.0\n56895.0\n64915.0\n73200.0\n88600.0\n9...,10-yr\n25-yr\n50-yr\n100-yr\n100-yr+\n500-yr,b'\x01\x02\x00\x00\x00\x08\x00\x00\x00\xb0\x8f...,2040106,LehighRiverFEMA_Sup,,,4188251.0,2588461.0,99209


Now, let's query by an individual river station ID

In [38]:
from pyiceberg.expressions import EqualTo

df = (
    catalog.load_table(f"{namespace}.{huc_number}")
    .scan(row_filter=EqualTo("river_station", 573.0077))
    .to_pandas()
)
display(df.head())
# to_geopandas(df).explore()

Unnamed: 0,river,reach,river_reach,river_station,river_reach_rs,thalweg,xs_max_elevation,left_reach_length,right_reach_length,channel_reach_length,ras_data,station_elevation_points,bank_stations,number_of_station_elevation_points,number_of_coords,flows,profile_names,geometry,huc,model_id,us_reach_id,us_network_to_id,ds_reach_id,ds_network_to_id,__index_level_0__
0,Lehigh River,Main,"Lehigh River ,Main",573.0077,Lehigh River Main 573.0077,160.76,296.63,201.11,302.23,224.98,"Type RM Length L Ch R = 1 ,573.0077,201.11,224...","[(0.0, 210.06), (9.99, 209.84), (15.98, 209.82...","['1657.49', '2078.85']",380,7,46235.0\n56895.0\n64915.0\n73200.0\n88600.0\n9...,10-yr\n25-yr\n50-yr\n100-yr\n100-yr+\n500-yr,b'\x01\x02\x00\x00\x00\x07\x00\x00\x00*e)r\xa7...,2040106,LehighRiverFEMA_Sup,,,,,99208


To extend this notebook to other HUCs, just change the HUC number, then change the XS reference