### Icefabric QuickStart
Author: Tadd Bindas

In [None]:
from pathlib import Path

from pyiceberg.catalog import load_catalog

from icefabric_manage import build
from icefabric_tools import find_origin, table_to_geopandas

First, you will need to define where your local data warehouse will live. This file will be located on disk and requires you to already have some geoparquet files to load

In [None]:
warehouse_path = Path("../data/warehouse")
warehouse_path.mkdir(exist_ok=True)

catalog_settings = {
    'type': 'sql',
    "uri": f"sqlite:///{warehouse_path}/pyiceberg_catalog.db",
    "warehouse": f"file://{warehouse_path}",
}

catalog = load_catalog("hydrofabric", **catalog_settings)
build(catalog, Path("/Users/taddbindas/projects/NGWPC/icefabric/data/parquet"))

In [76]:
catalog

hydrofabric (<class 'pyiceberg.catalog.sql.SqlCatalog'>)

Once the catalog is created, you can load the table into memory, scan the file based on a query, then return the object via pandas dataframe

In [3]:
network = catalog.load_table("hydrofabric.network")
network.scan().to_pandas()

Unnamed: 0,id,toid,divide_id,ds_id,mainstem,hydroseq,hf_source,hf_id,lengthkm,areasqkm,tot_drainage_areasqkm,type,vpuid,hf_hydroseq,hf_lengthkm,hf_mainstem,topo,poi_id,hl_uri
0,wb-20469,tnx-1000000125,cat-20469,,2613576.0,20283.0,NOAA Reference Fabric,166196253.0,7.006821,13.96665,72.707401,terminal,01,2613576.0,7.006821,2613576.0,fl-nex,,
1,wb-20469,tnx-1000000125,cat-20469,,2613576.0,20283.0,NOAA Reference Fabric,166196257.0,7.006821,13.96665,72.707401,terminal,01,2613602.0,0.992107,2613601.0,fl-nex,,
2,wb-20469,tnx-1000000125,cat-20469,,2613576.0,20283.0,NOAA Reference Fabric,166196256.0,7.006821,13.96665,72.707401,terminal,01,2613601.0,0.940432,2613601.0,fl-nex,,
3,wb-20479,tnx-1000000697,cat-20479,,2613603.0,20277.0,NOAA Reference Fabric,4599061.0,1.445138,5.88735,42.698700,terminal,01,2613604.0,0.793282,2613603.0,fl-nex,,
4,wb-20479,tnx-1000000697,cat-20479,,2613603.0,20277.0,NOAA Reference Fabric,4599715.0,1.445138,5.88735,42.698700,terminal,01,2613605.0,0.205191,2613603.0,fl-nex,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3461362,,cnx-3299511,cat-3299511,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,
3461363,,cnx-3299510,cat-3299510,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,
3461364,,cnx-3299509,cat-3299509,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,
3461365,,cnx-3299508,cat-3299508,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,


Now that we have these tables in memory, we can use predefined functions/services to get more insight from our data/methods

### Finding an Origin point

In [4]:
find_origin(network_table=network, identifier=18471000, id_type="comid")

Unnamed: 0,id,toid,vpuid,topo,hydroseq
0,wb-824123,nex-824124,5,fl-nex,3565.0


### Reading a geopandas dataframe

In [None]:
flowpaths = catalog.load_table("hydrofabric.flowpaths")
flowpaths.scan().to_pandas()
table_to_geopandas(flowpaths)

### Getting a Replace and Route Segment as a Geopackage

In [None]:
from icefabric_tools import load_hydrofabric
from icefabric_tools.rnr import get_rnr_segment

catalog_settings = {
    'type': 'sql',
    "uri": "sqlite:///../data/warehouse/pyiceberg_catalog.db",
    "warehouse": "file://../data/warehouse",
}
catalog = load_hydrofabric(catalog_settings)
get_rnr_segment(catalog, 18471000, "../data/test_rnr.gpkg")