In [1]:
import xarray as xr
import numpy as np
import glob, os, sys, toml
import pandas as pd

In [2]:
filesinfo = ['/glade/work/guoqiang/CTSM_cases/CAMELS_Calib/data_mesh_surf/HillslopeHydrology/CAMELS_level1_basin_info.csv', 
             '/glade/work/guoqiang/CTSM_cases/CAMELS_Calib/data_mesh_surf/HillslopeHydrology/CAMELS_level2_basin_info.csv',
             '/glade/work/guoqiang/CTSM_cases/CAMELS_Calib/data_mesh_surf/HillslopeHydrology/CAMELS_level3_basin_info.csv']

basins = []
for i in range(627):
    basins.append(f'level1_{i}')
    
for i in range(40):
    basins.append(f'level2_{i}')
    
for i in range(4):
    basins.append(f'level3_{i}')
    

levelnum = np.array([int(i[5]) for i in basins])
basinnum = np.array([int(i.split('_')[1]) for i in basins])
    
cols = ['hru_id', 'lon_cen', 'lat_cen', 'elev_mean', 'latoutlet', 'lonoutlet', 'areaUSGS']
basininfo = np.nan * np.zeros([len(levelnum), len(cols)]) # lat, lon, areaUSGS
flag = 0
for i in range(3):
    dfi = pd.read_csv(filesinfo[i])
    basinnumi = basinnum[levelnum==i+1]
    for j in range(len(basinnumi)):
        basininfo[flag, :] = dfi.loc[basinnumi[j], cols].values
        flag = flag + 1

In [3]:
dsall = []

# Loop over each basin
for i in range(len(basins)):
    
    if np.mod(i,50)==0:
        print(i)
    
    b = basins[i]
    file = f'/glade/campaign/cgd/tss/people/guoqiang/CTSMcases/CAMELS_Calib/Calib_all_HH_Ostrich/{b}_Ostrich/archive_stats/Bestsimu1_QPT.nc'
    
    # Open the dataset
    dsb = xr.open_dataset(file)
    
    # Add a new coordinate 'hru' to the dataset
    dsb = dsb.assign_coords(hru=basininfo[i, 0])
    
    # Append the modified dataset to the list
    dsall.append(dsb)

# Concatenate all datasets along the new 'hru' dimension
dsall = xr.concat(dsall, dim='hru')

0
50
100
150
200
250
300
350
400
450
500
550
600
650


In [4]:
# Load calibration periods

t1 = []
t2 = []

for i in range(len(basins)):
    
    b = basins[i]
    b = b.replace('_', '-')
    file = f'/glade/work/guoqiang/CTSM_cases/CAMELS_Calib/Calib_all_HH_Ostrich/configuration/_{b}_config_Ostrich.toml'
    
    config = toml.load(file)
    RUN_STARTDATE = config['RUN_STARTDATE']
    STOP_N = config['STOP_N']
    # RUN_ENDDATE = pd
    calibstart = pd.Timestamp(RUN_STARTDATE)
    calibend = calibstart + pd.tseries.offsets.DateOffset(months=STOP_N)
    t1.append(calibstart)
    t2.append(calibend)
    
dsall['calibstart'] = xr.DataArray(t1, dims=('hru'))
dsall['calibend'] = xr.DataArray(t2, dims=('hru'))

In [5]:
dsall = dsall.sortby('hru')
outpath = '/glade/campaign/cgd/tss/people/guoqiang/CTSMcases/CAMELS_Calib/Calib_all_HH_Ostrich/analysis'
os.makedirs(outpath, exist_ok=True)
dsall.to_netcdf(f'{outpath}/CAMELS_Bestsimu1_QPT.nc')