### Icefabric QuickStart
Author: Tadd Bindas

In [1]:
import os
from pathlib import Path

from pyiceberg.catalog import load_catalog

from icefabric_manage import build
from icefabric_tools import find_origin, table_to_geopandas

First, you will need to define where your local data warehouse will live. This file will be located on disk and requires you to already have some geoparquet files to load

In [2]:
warehouse_path = Path("../data/warehouse")
warehouse_path.mkdir(exist_ok=True)

catalog_settings = {
    'type': 'sql',
    "uri": f"sqlite:///{warehouse_path}/pyiceberg_catalog.db",
    "warehouse": f"file://{warehouse_path}",
}

catalog = load_catalog("hydrofabric", **catalog_settings)
build(catalog, Path("/Users/taddbindas/projects/NGWPC/icefabric/data/parquet"))

Table network already exists. Skipping build
Table nexus already exists. Skipping build
Table flowpath-attributes already exists. Skipping build
Table divides already exists. Skipping build
Table pois already exists. Skipping build
Table flowpath-attributes-ml already exists. Skipping build
Table divide-attributes already exists. Skipping build
Table flowpaths already exists. Skipping build
Table hydrolocations already exists. Skipping build
Table lakes already exists. Skipping build


In [3]:
catalog

hydrofabric (<class 'pyiceberg.catalog.sql.SqlCatalog'>)

Once the catalog is created, you can load the table into memory, scan the file based on a query, then return the object via pandas dataframe

In [4]:
network = catalog.load_table("hydrofabric.network")
network.scan().to_pandas()

Unnamed: 0,id,toid,divide_id,ds_id,mainstem,hydroseq,hf_source,hf_id,lengthkm,areasqkm,tot_drainage_areasqkm,type,vpuid,hf_hydroseq,hf_lengthkm,hf_mainstem,topo,poi_id,hl_uri
0,wb-20469,tnx-1000000125,cat-20469,,2613576.0,20283.0,NOAA Reference Fabric,166196253.0,7.006821,13.96665,72.707401,terminal,01,2613576.0,7.006821,2613576.0,fl-nex,,
1,wb-20469,tnx-1000000125,cat-20469,,2613576.0,20283.0,NOAA Reference Fabric,166196257.0,7.006821,13.96665,72.707401,terminal,01,2613602.0,0.992107,2613601.0,fl-nex,,
2,wb-20469,tnx-1000000125,cat-20469,,2613576.0,20283.0,NOAA Reference Fabric,166196256.0,7.006821,13.96665,72.707401,terminal,01,2613601.0,0.940432,2613601.0,fl-nex,,
3,wb-20479,tnx-1000000697,cat-20479,,2613603.0,20277.0,NOAA Reference Fabric,4599061.0,1.445138,5.88735,42.698700,terminal,01,2613604.0,0.793282,2613603.0,fl-nex,,
4,wb-20479,tnx-1000000697,cat-20479,,2613603.0,20277.0,NOAA Reference Fabric,4599715.0,1.445138,5.88735,42.698700,terminal,01,2613605.0,0.205191,2613603.0,fl-nex,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3461362,,cnx-3299511,cat-3299511,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,
3461363,,cnx-3299510,cat-3299510,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,
3461364,,cnx-3299509,cat-3299509,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,
3461365,,cnx-3299508,cat-3299508,,,,NOAA Reference Fabric,,,,,,,,,,fl-nex,,


Now that we have these tables in memory, we can use predefined functions/services to get more insight from our data/methods

### Finding an Origin point

In [5]:
find_origin(network_table=network, identifier=18471000, id_type="comid")

Unnamed: 0,id,toid,vpuid,topo,hydroseq
0,wb-824123,nex-824124,5,fl-nex,3565.0


### Reading a geopandas dataframe

In [6]:
flowpaths = catalog.load_table("hydrofabric.flowpaths")
flowpaths.scan().to_pandas()
table_to_geopandas(flowpaths)

Unnamed: 0,id,toid,mainstem,order,hydroseq,lengthkm,areasqkm,tot_drainage_areasqkm,has_divide,divide_id,poi_id,vpuid,geometry
0,wb-20466,nex-20467,2613576.0,1.0,20292,2.443793,4.304250,4.304250,True,cat-20466,19534,01,"MULTILINESTRING ((1873700.565 2684810.523, 187..."
1,wb-20474,nex-20475,2613597.0,1.0,20291,3.982518,4.829400,4.829400,True,cat-20474,,01,"MULTILINESTRING ((1875378.459 2678583.977, 187..."
2,wb-20475,nex-20468,2613597.0,1.0,20290,1.006645,6.713101,11.542501,True,cat-20475,,01,"MULTILINESTRING ((1875781.183 2682056.744, 187..."
3,wb-20473,nex-20467,2613587.0,1.0,20289,1.892781,4.486050,4.486050,True,cat-20473,,01,"MULTILINESTRING ((1875252.558 2684695.906, 187..."
4,wb-20467,nex-20468,2613576.0,2.0,20288,1.616463,2.583900,22.916701,True,cat-20467,1193,01,"MULTILINESTRING ((1875278.618 2683667.5, 18754..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
828283,wb-3054945,nex-3054946,2523667.0,2.0,236,3.515287,9.335699,84.047850,True,cat-3054945,,17,"MULTILINESTRING ((-2219263.794 2463211.731, -2..."
828284,wb-3054946,nex-3054947,2523667.0,2.0,235,4.365587,13.306500,97.354350,True,cat-3054946,,17,"MULTILINESTRING ((-2222330.448 2464615.711, -2..."
828285,wb-3054961,nex-3054958,2523700.0,1.0,234,2.032194,5.235750,5.235750,True,cat-3054961,,17,"MULTILINESTRING ((-2218810.457 2467656.224, -2..."
828286,wb-3054958,nex-3054959,2523693.0,1.0,233,6.466212,10.670850,15.906600,True,cat-3054958,,17,"MULTILINESTRING ((-2217798.39 2468518.649, -22..."


### Getting a Replace and Route Segment as a Geopackage

In [None]:
from icefabric_tools import load_hydrofabric
from icefabric_tools.rnr import get_rnr_segment

catalog_settings = {
    'type': 'sql',
    "uri": "sqlite:///../data/warehouse/pyiceberg_catalog.db",
    "warehouse": "file://../data/warehouse",
}
catalog = load_hydrofabric(catalog_settings)
get_rnr_segment(catalog, 18471000, "../data/test_rnr.gpkg")

In [8]:
import geopandas as gpd
gpd.read_file("../data/test_rnr.gpkg", layer="flowpaths")

Unnamed: 0,id,toid,mainstem,order,hydroseq,lengthkm,areasqkm,tot_drainage_areasqkm,has_divide,divide_id,poi_id,vpuid,geometry
0,wb-824123,nex-824124,1547279.0,5.0,3565,9.516844,17.6634,12878.231399,True,cat-824123,,5,"MULTILINESTRING ((752258.435 1788547.25, 75221..."
1,wb-824124,nex-824125,1547279.0,5.0,3564,4.186372,4.710151,12997.00305,True,cat-824124,93863.0,5,"MULTILINESTRING ((753323.441 1781497.25, 75323..."
2,wb-824125,nex-824126,1547279.0,5.0,3562,7.33588,6.62985,13098.6297,True,cat-824125,,5,"MULTILINESTRING ((751418.443 1779097.245, 7514..."
3,wb-824126,nex-824127,1547279.0,5.0,3561,7.164683,20.9385,13119.5682,True,cat-824126,74964.0,5,"MULTILINESTRING ((751778.441 1774552.25, 75185..."
4,wb-824127,nex-824128,1547279.0,5.0,3558,1.754889,5.896801,13549.954953,True,cat-824127,,5,"MULTILINESTRING ((754178.441 1771642.252, 7542..."
5,wb-824128,nex-824129,1547279.0,5.0,3557,8.993345,21.1329,13571.087853,True,cat-824128,73141.0,5,"MULTILINESTRING ((754396.812 1770074.121, 7543..."
6,wb-824129,nex-824130,1547279.0,5.0,3556,8.903493,8.90055,13680.138604,True,cat-824129,,5,"MULTILINESTRING ((754523.437 1764907.249, 7544..."
7,wb-824130,nex-824131,1547279.0,5.0,3542,2.378809,1.035,13875.044853,True,cat-824130,,5,"MULTILINESTRING ((754338.731 1760185.448, 7544..."
