In [1]:
# section 1 load all the necessary modules and packages
import glob
import time
import geopandas as gpd
import netCDF4 as nc4
import numpy as np
import pandas as pd
import xarray as xr
from shapely.geometry import Polygon
import matplotlib  as mpl
font = {'family' : 'Times New Roman',
            'weight' : 'bold',
            'size'   : 40}
mpl.rc('font', **font)
# not neccessary for the function but for visualziation
import matplotlib.pyplot as plt
import warnings
import sys
import os
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import contextily as ctx


def NSE(s,o):
    """
    Created on Thu Jan 20 15:36:37 2011
    @ author:                  Sat Kumar Tomer
    @ author's webpage:        http://civil.iisc.ernet.in/~satkumar/
    @ author's email id:       satkumartomer@gmail.com
    @ author's website:        www.ambhas.com

    Nash Sutcliffe efficiency coefficient
    input:
        s: simulated [time]
        o: observed [time]
    output:
        ns: Nash Sutcliffe efficient coefficient
    """
    s,o = filter_nan(s,o)
    return 1 - sum((s-o)**2)/sum((o-np.mean(o))**2)

def filter_nan(s,o):
    """
    Created on Thu Jan 20 15:36:37 2011
    @ author:                  Sat Kumar Tomer
    @ author's webpage:        http://civil.iisc.ernet.in/~satkumar/
    @ author's email id:       satkumartomer@gmail.com
    @ author's website:        www.ambhas.com

    this functions removed the data  from simulated and observed data
    whereever the observed data contains nan

    this is used by all other functions, otherwise they will produce nan as
    output
    """
    data = np.array([s.flatten(),o.flatten()])
    data = np.transpose(data)
    data = data[~np.isnan(data).any(1)]
    return data[:,0],data[:,1]





In [2]:
# read the netcdf files and merge them into one netcdf

file_names = glob.glob('/Users/shg096/Downloads/GRDC_discharge_Data/*.nc')

print(file_names)

datasets = [xr.open_dataset(file_name) for file_name in file_names]

merged_dataset = xr.merge(datasets)

merged_dataset['area'][:] = merged_dataset['area'][:] * 1000000 # m2 km2

['/Users/shg096/Downloads/GRDC_discharge_Data/2023-04-28_14-33.nc', '/Users/shg096/Downloads/GRDC_discharge_Data/2023-04-28_14-42.nc', '/Users/shg096/Downloads/GRDC_discharge_Data/2023-04-28_14-47.nc']


In [3]:
ds = xr.open_dataset('/Users/shg096/Desktop/mizuRoute_HDMA_Global/code/output/WM_1988.nc')
print(ds)

<xarray.Dataset>
Dimensions:     (time: 366, ID: 298277)
Coordinates:
  * time        (time) datetime64[ns] 1988-01-01 1988-01-02 ... 1988-12-31
  * ID          (ID) float64 1e+06 1e+06 1e+06 ... 8.427e+06 8.427e+06 8.428e+06
Data variables:
    target_vol  (time, ID) float64 ...
    abs_inj     (time, ID) float64 ...
