In [1]:
import glob
import numpy as np
import xarray as xr
import pandas as pd

routelink_subset_file = "../test/input/geo/wrf-hydro-run/DOMAIN/routeLink_subset.nc"

usgs_timeslices_folder = "../test/input/geo/wrf-hydro-run/nudgingTimeSliceObs/"

usgs_file_pattern_filter = "2020-03*.usgsTimeSlice.ncdf"

usgs_files = glob.glob(usgs_timeslices_folder + usgs_file_pattern_filter)

len(usgs_files), len(glob.glob(routelink_subset_file))


(481, 1)

In [26]:
# TEST our Function on a single Datset
ds = xr.open_dataset(usgs_files[0])
# ds.stationId.values.bytes.strip()

stationId_da = map(bytes.strip, ds.stationId.values)
# list(map(bytes.isdigit, stationId_da))
from compose import compose

stationId_da_mask = list(
    map(bytes.isdigit, stationId_da)
)  # Make a mask to remove the blank values
stationId_da_mask = list(
    map(compose(bytes.isdigit, bytes.strip), ds.stationId.values)
)  # Make a mask to remove the blank values
stationId = ds.stationId[stationId_da_mask].values.astype(int)

unique_times = np.unique(ds.time.values)
for i, t in enumerate(unique_times):
    unique_times[i] = t.replace(b"_", b"T")

data_vars = {}
for v in ("discharge", "discharge_quality"):
    data_vars[v] = (["stationId"], ds[v][stationId_da_mask].values)
xr.Dataset(data_vars=data_vars, coords={"stationId": stationId, "time": unique_times})
stationId_da = list(map(bytes.strip, ds.stationId.values))
stationId_da_mask = list(
    map(bytes.isdigit, stationId_da)
)  # Make a mask to remove the blank values


In [27]:
def preprocess_time_station_index(xd):

    stationId_da_mask = list(
        map(compose(bytes.isdigit, bytes.strip), xd.stationId.values)
    )
    stationId = xd.stationId[stationId_da_mask].values.astype(int)

    unique_times_str = np.unique(xd.time.values).tolist()

    # Three different ways to do the dates
    # 1) as strings
    unique_times = np.array(unique_times_str, dtype="str")

    # 2) as strings, but a different way
    #     unique_times = np.unique(xd.time.values)
    # 3) as dates
    #     for i, t in enumerate(unique_times_str):
    #         unique_times_str[i] = t.replace(b"_",b"T")
    #     unique_times = np.array(unique_times_str,dtype="datetime64")

    data_var_dict = {}
    data_vars = ("discharge", "discharge_quality")
    for v in data_vars:
        data_var_dict[v] = (["stationId"], xd[v].values[stationId_da_mask])
    return xr.Dataset(
        data_vars=data_var_dict, coords={"stationId": stationId, "time": unique_times}
    )


# return xr.Dataset(data_vars=data_vars{'discharge': (['stationId'], xd.discharge.values), 'discharge_quality': (['stationId'], xd.discharge_quality.values)},
# coords={'stationId': stationId, 'time': np.unique(xd.time.values)})


In [28]:
#%%timeit
#%%prun
# How long to preprocess one dataset?
preprocess_time_station_index(ds)


In [29]:
#%%time
# %%prun
xr.open_mfdataset(
    usgs_files[0 : len(usgs_files) // 1],
    preprocess=preprocess_time_station_index,
    combine="by_coords",
    # data_vars="minimal",
    # coords="minimal",
    # compat="override",
    # chunks= {'time':481,'stationId':1} #3869*481,
    parallel=True,
)


In [30]:
#%%time
# %%prun
# This version appears to be the fastest
def read_netcdfs(files, dim, transform_func=None):
    def process_one_path(path):
        # use a context manager, to ensure the file gets closed after use
        with xr.open_dataset(path) as ds:
            # transform_func should do some sort of selection or
            # aggregation
            if transform_func is not None:
                ds = transform_func(ds)
            # load all data from the transformed dataset, to ensure we can
            # use it after closing each original file
            ds.load()
            return ds

    paths = sorted(glob.glob(files))
    datasets = [process_one_path(p) for p in paths]
    combined = xr.concat(datasets, dim)
    return combined


# you might also use indexing operations like .sel to subset datasets
# combined = read_netcdfs('/all/my/files/*.nc', dim='time',
#                         transform_func=lambda ds: ds.mean())

with read_netcdfs(
    usgs_timeslices_folder + usgs_file_pattern_filter,
    "time",
    preprocess_time_station_index,
) as ds2:
    df2 = pd.DataFrame(
        ds2["discharge"].values.T,
        index=ds2["stationId"].values,
        columns=ds2.time.values,
    )

df2


Unnamed: 0,2020-03-19_18:00:00,2020-03-19_18:15:00,2020-03-19_18:30:00,2020-03-19_18:45:00,2020-03-19_19:00:00,2020-03-19_19:15:00,2020-03-19_19:30:00,2020-03-19_19:45:00,2020-03-19_20:00:00,2020-03-19_20:15:00,...,2020-03-24_15:45:00,2020-03-24_16:00:00,2020-03-24_16:15:00,2020-03-24_16:30:00,2020-03-24_16:45:00,2020-03-24_17:00:00,2020-03-24_17:15:00,2020-03-24_17:30:00,2020-03-24_17:45:00,2020-03-24_18:00:00
8158930,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
2336300,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
8086212,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
2215260,252.019928,251.772156,251.524384,251.276611,251.028839,250.781067,250.533295,250.285522,250.037750,249.789978,...,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810
2439400,58.899040,58.769257,58.639469,58.509682,58.379898,58.250114,58.120327,57.990540,57.860756,57.730972,...,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
3353200,9.202975,9.300904,9.398833,9.496762,9.594691,9.692620,9.790550,9.888479,9.986408,10.084337,...,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023
2303000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
9386950,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [31]:
#%%time
with xr.open_mfdataset(
    usgs_files, preprocess=preprocess_time_station_index, combine="by_coords"
) as ds2:
    df3 = pd.DataFrame(
        ds2["discharge"].values.T,
        index=ds2["stationId"].values,
        columns=ds2.time.values,
    )

df3


Unnamed: 0,2020-03-19_18:00:00,2020-03-19_18:15:00,2020-03-19_18:30:00,2020-03-19_18:45:00,2020-03-19_19:00:00,2020-03-19_19:15:00,2020-03-19_19:30:00,2020-03-19_19:45:00,2020-03-19_20:00:00,2020-03-19_20:15:00,...,2020-03-24_15:45:00,2020-03-24_16:00:00,2020-03-24_16:15:00,2020-03-24_16:30:00,2020-03-24_16:45:00,2020-03-24_17:00:00,2020-03-24_17:15:00,2020-03-24_17:30:00,2020-03-24_17:45:00,2020-03-24_18:00:00
8158930,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
2336300,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
8086212,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
2215260,252.019928,251.772156,251.524384,251.276611,251.028839,250.781067,250.533295,250.285522,250.037750,249.789978,...,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810,206.429810
2439400,58.899040,58.769257,58.639469,58.509682,58.379898,58.250114,58.120327,57.990540,57.860756,57.730972,...,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404,89.764404
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
3353200,9.202975,9.300904,9.398833,9.496762,9.594691,9.692620,9.790550,9.888479,9.986408,10.084337,...,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023,6.258023
2303000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
9386950,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [32]:
ds_t = xr.open_dataset(usgs_files[0])
ds_t


In [33]:
xs = ds_t.set_coords(["stationId", "time"])
xs = xs.set_index(stationIdInd="stationId")
# xr.Dataset(data_vars={"discharge": da_discharge})
xs


In [34]:
# Open the Route-Link File to convert the indexes
with xr.open_dataset(routelink_subset_file) as ds:
    gage_list = list(map(bytes.strip, ds.gages.values))
    gage_mask = list(map(bytes.isdigit, gage_list))

    gage_da = ds.gages[gage_mask].values.astype(int)

    data_var_dict = {}
    data_vars = ("link", "to", "ascendingIndex")
    for v in data_vars:
        data_var_dict[v] = (["gages"], ds[v].values[gage_mask])
    ds = xr.Dataset(data_vars=data_var_dict, coords={"gages": gage_da})
df = ds.to_dataframe()
df


Unnamed: 0_level_0,link,to,ascendingIndex
gages,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5059480,14828145,14828071,2723024
5085450,7069483,7069499,1548739
5120500,14172539,14172507,1423703
5061500,6689073,6689049,1349895
5085000,14434343,14434353,1350164
...,...,...,...
378,2851585,2851605,2718843
11390500,2851625,2852741,2719415
136,2858219,2858243,2656953
336,15039173,15039309,2719007


In [35]:
usgs_df = df.join(df2)
usgs_df = usgs_df.reset_index()
usgs_df = usgs_df.rename(columns={"index": "gages"})
usgs_df = usgs_df.set_index("link")
usgs_df = usgs_df.drop(["gages", "ascendingIndex", "to"], axis=1)
usgs_df


Unnamed: 0_level_0,2020-03-19_18:00:00,2020-03-19_18:15:00,2020-03-19_18:30:00,2020-03-19_18:45:00,2020-03-19_19:00:00,2020-03-19_19:15:00,2020-03-19_19:30:00,2020-03-19_19:45:00,2020-03-19_20:00:00,2020-03-19_20:15:00,...,2020-03-24_15:45:00,2020-03-24_16:00:00,2020-03-24_16:15:00,2020-03-24_16:30:00,2020-03-24_16:45:00,2020-03-24_17:00:00,2020-03-24_17:15:00,2020-03-24_17:30:00,2020-03-24_17:45:00,2020-03-24_18:00:00
link,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
18897408,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
17848800,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
5491577,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19374372,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19408130,1922.713867,1924.247681,1925.781494,1927.315308,1928.849121,1930.383057,1931.916870,1933.450684,1934.984497,1936.518311,...,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7869629,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
1131000447,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
1131000374,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19440431,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000


In [36]:
columns_list = usgs_df.columns
# columns_list


In [37]:
for i in range(0, (len(columns_list) * 3) - 12, 12):
    original_string = usgs_df.columns[i]
    original_string_shortened = original_string[:-5]
    temp_name1 = original_string_shortened + str("05:00")
    temp_name2 = original_string_shortened + str("10:00")
    temp_name3 = original_string_shortened + str("20:00")
    temp_name4 = original_string_shortened + str("25:00")
    temp_name5 = original_string_shortened + str("35:00")
    temp_name6 = original_string_shortened + str("40:00")
    temp_name7 = original_string_shortened + str("50:00")
    temp_name8 = original_string_shortened + str("55:00")
    usgs_df.insert(i + 1, temp_name1, np.nan)
    usgs_df.insert(i + 2, temp_name2, np.nan)
    usgs_df.insert(i + 4, temp_name3, np.nan)
    usgs_df.insert(i + 5, temp_name4, np.nan)
    usgs_df.insert(i + 7, temp_name5, np.nan)
    usgs_df.insert(i + 8, temp_name6, np.nan)
    usgs_df.insert(i + 10, temp_name7, np.nan)
    usgs_df.insert(i + 11, temp_name8, np.nan)

usgs_df = usgs_df.interpolate(method="linear", axis=1)
usgs_df
# get_usgs_from_wrf_hydro(routelink_subset_file,usgs_timeslices_folder)


Unnamed: 0_level_0,2020-03-19_18:00:00,2020-03-19_18:05:00,2020-03-19_18:10:00,2020-03-19_18:15:00,2020-03-19_18:20:00,2020-03-19_18:25:00,2020-03-19_18:30:00,2020-03-19_18:35:00,2020-03-19_18:40:00,2020-03-19_18:45:00,...,2020-03-24_17:15:00,2020-03-24_17:20:00,2020-03-24_17:25:00,2020-03-24_17:30:00,2020-03-24_17:35:00,2020-03-24_17:40:00,2020-03-24_17:45:00,2020-03-24_17:50:00,2020-03-24_17:55:00,2020-03-24_18:00:00
link,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
18897408,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
17848800,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
5491577,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19374372,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19408130,1922.713867,1923.225138,1923.736410,1924.247681,1924.758952,1925.270223,1925.781494,1926.292765,1926.804036,1927.315308,...,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7869629,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
1131000447,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
1131000374,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19440431,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000


In [5]:
from compose import compose
def read_netcdfs(files, dim, transform_func=None):
    def process_one_path(path):
        with xr.open_dataset(path) as ds:
            if transform_func is not None:
                ds = transform_func(ds)
            ds.load()
            return ds

    paths = sorted(glob.glob(files))
    datasets = [process_one_path(p) for p in paths]
    combined = xr.concat(datasets, dim)
    return combined

def preprocess_time_station_index(xd):
    stationId_da_mask = list(
        map(compose(bytes.isdigit, bytes.strip), xd.stationId.values)
    )
    stationId = xd.stationId[stationId_da_mask].values.astype(int)

    unique_times_str = np.unique(xd.time.values).tolist()

    unique_times = np.array(unique_times_str, dtype="str")

    data_var_dict = {}
    data_vars = ("discharge", "discharge_quality")
    for v in data_vars:
        data_var_dict[v] = (["stationId"], xd[v].values[stationId_da_mask])
    return xr.Dataset(
        data_vars=data_var_dict, coords={"stationId": stationId, "time": unique_times}
    )


# you might also use indexing operations like .sel to subset datasets
# combined = read_netcdfs('/all/my/files/*.nc', dim='time',
#                         transform_func=lambda ds: ds.mean())

with read_netcdfs(
    usgs_timeslices_folder + usgs_file_pattern_filter,
    "time",
    preprocess_time_station_index,
) as ds2:
    df2 = pd.DataFrame(
        ds2["discharge"].values.T,
        index=ds2["stationId"].values,
        columns=ds2.time.values,
    )

with xr.open_dataset(routelink_subset_file) as ds:
    gage_list = list(map(bytes.strip, ds.gages.values))
    gage_mask = list(map(bytes.isdigit, gage_list))

    gage_da = ds.gages[gage_mask].values.astype(int)

    data_var_dict = {}
    data_vars = ("link", "to", "ascendingIndex")
    for v in data_vars:
        data_var_dict[v] = (["gages"], ds[v].values[gage_mask])
    ds = xr.Dataset(data_vars=data_var_dict, coords={"gages": gage_da})
df = ds.to_dataframe()

usgs_df = df.join(df2)
usgs_df = usgs_df.reset_index()
usgs_df = usgs_df.rename(columns={"index": "gages"})
usgs_df = usgs_df.set_index("link")
usgs_df = usgs_df.drop(["gages", "ascendingIndex", "to"], axis=1)
columns_list = usgs_df.columns

for i in range(0, (len(columns_list) * 3) - 12, 12):
    original_string = usgs_df.columns[i]
    original_string_shortened = original_string[:-5]
    temp_name1 = original_string_shortened + str("05:00")
    temp_name2 = original_string_shortened + str("10:00")
    temp_name3 = original_string_shortened + str("20:00")
    temp_name4 = original_string_shortened + str("25:00")
    temp_name5 = original_string_shortened + str("35:00")
    temp_name6 = original_string_shortened + str("40:00")
    temp_name7 = original_string_shortened + str("50:00")
    temp_name8 = original_string_shortened + str("55:00")
    usgs_df.insert(i + 1, temp_name1, np.nan)
    usgs_df.insert(i + 2, temp_name2, np.nan)
    usgs_df.insert(i + 4, temp_name3, np.nan)
    usgs_df.insert(i + 5, temp_name4, np.nan)
    usgs_df.insert(i + 7, temp_name5, np.nan)
    usgs_df.insert(i + 8, temp_name6, np.nan)
    usgs_df.insert(i + 10, temp_name7, np.nan)
    usgs_df.insert(i + 11, temp_name8, np.nan)

usgs_df = usgs_df.interpolate(method="linear", axis=1)
usgs_df


Unnamed: 0_level_0,2020-03-19_18:00:00,2020-03-19_18:05:00,2020-03-19_18:10:00,2020-03-19_18:15:00,2020-03-19_18:20:00,2020-03-19_18:25:00,2020-03-19_18:30:00,2020-03-19_18:35:00,2020-03-19_18:40:00,2020-03-19_18:45:00,...,2020-03-24_17:15:00,2020-03-24_17:20:00,2020-03-24_17:25:00,2020-03-24_17:30:00,2020-03-24_17:35:00,2020-03-24_17:40:00,2020-03-24_17:45:00,2020-03-24_17:50:00,2020-03-24_17:55:00,2020-03-24_18:00:00
link,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
18897408,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
17848800,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
5491577,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19374372,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19408130,1922.713867,1923.225138,1923.736410,1924.247681,1924.758952,1925.270223,1925.781494,1926.292765,1926.804036,1927.315308,...,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836,3709.506836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7869629,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
1131000447,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
1131000374,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
19440431,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,...,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000,-999999.000000
