In [None]:
import os
import shutil
import fsspec
import ujson
from kerchunk.hdf import SingleHdf5ToZarr
from kerchunk.combine import MultiZarrToZarr
import xarray as xr
import dask
import hvplot.xarray
from datetime import datetime, timedelta
from time import time

In [None]:
fs = fsspec.filesystem("gcs", anon=True)
file_collections = []
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_1.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_2.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_3.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_4.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_5.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_6.conus.nc"
file_collections.append(file_url)
file_url = "gcs://awi-ciroh-persistent/nwm_transform_nc/medium_range/channel_rt/nwm.t2022120100z.medium_range.channel_rt_7.conus.nc"
file_collections.append(file_url)

In [None]:
import sys
import subprocess

try:
    import google.colab

    ENV_IS_CL = True
    subprocess.run(
        [
            "git",
            "clone",
            "https://github.com/AlabamaWaterInstitute/data_access_examples",
        ]
    )
    sys.path.append("/content/data_access_examples")
except:
    ENV_IS_CL = False
    # not all of these paths are needed -- they full list results from some
    # trial and error with different environments during testing
    sys.path.append(r"..")
    sys.path.append(r"../data_access_examples")
    sys.path.append(r"git/data_access_examples")

import nwm_filenames.listofnwmfilenames as lnf

from nwm_network.NWM_2_1_outlets import outlets_sorted

In [None]:
%%time
# id = 101
id_list = [22811611]  # Mississippi River outlet 
# id_list = [22811611, 20427622]  # Mississippi River outlet 
id_list = outlets_sorted

# id = 20427622  # Random small interior outlet somewhere in Arizona; see https://github.com/AlabamaWaterInstitute/data_access_examples/blob/main/nwm_network/route_link_fsspec.ipynb
ds_list = []
df_list = []
    
for _i, file_url in enumerate(file_collections):
    st = time()
    print(f"generating jsons for {_i}", end="\t")
    with fs.open(file_url) as f:
        %time    ds_zarr = SingleHdf5ToZarr(f, file_url, inline_threshold=0)
        %time    ds_zarrjson = ds_zarr.translate()
    print(f"{time()-st} elapsed")
    
    backend_args_1 = { "consolidated": False,
                     "storage_options": { "fo": ds_zarrjson,
                                    "remote_protocol": "gcs", 
                                    "remote_options": {'anon':True} }}
    ds_1 = xr.open_dataset(
        "reference://", 
        engine="zarr",
        backend_kwargs=backend_args_1
    )
    print(f"{time()-st} elapsed")

    print(f"slicing dataset to feature for {_i}", end="\t")
    ds_select_1 = ds_1.sel(feature_id=id_list)
    ds_list.append(ds_select_1)
    print(f"{time()-st} elapsed")
    

    print(f"querying/retrieving data and creating dataframe for {_i}", end="\t")
    df_select_1 = ds_select_1["streamflow"].to_dataframe()
    df_list.append(df_select_1)
    print(f"{time()-st} elapsed")

    # print(f"selecting feature for {_i}", end="\t")
    # ds_select_1.plot.scatter("time","streamflow")
    # print(f"{time()-st} elapsed")
    
    print(f"finishing {_i}", end="\t")
    print(f"{time()-st} total time elapsed")
    print(f"\n")
    

In [None]:
df_list[0].xs(22811611, axis=0, level=0, drop_level=False)
# for more help, see https://stackoverflow.com/questions/53927460/select-rows-in-pandas-multiindex-dataframeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa