In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# get relative path to the data folder
def get_data_dir():
    return os.path.join(os.path.dirname(os.getcwd()), "data")

# root directory
data_dir = get_data_dir()
data_dir

In [None]:
# set up the file paths
f_runoff = os.path.join(data_dir, 'example/output/pm_abcd_mrtm_watch_1971_2001/q_km3peryear_pm_abcd_mrtm_watch_1971_2001.csv')
f_mapping = os.path.join(data_dir, 'example/input/reference/basin.csv')
f_basin_name = os.path.join(data_dir, 'example/input/reference/BasinNames235.txt')
f_coord = os.path.join(data_dir, 'example/input/reference/coordinates.csv')

In [None]:
# load gridded runoff
df_runoff = pd.read_csv(f_runoff)
df_runoff

In [None]:
# load mapping from grid cells to basin id
df_mapping = pd.read_csv(f_mapping)
df_mapping

In [None]:
# load basin name
df_basin_name = pd.read_table(f_basin_name, header=None, names=['basin_name'])
df_basin_name

In [None]:
# load coordinates
df_coord = pd.read_csv(f_coord, header=None, names=['id', 'lon', 'lat', 'x', 'y'])
df_coord

In [None]:
# merge the mapping of basin id and name into the runoff dataframe
df_runoff['basin_id'] = df_mapping

# add basin id to basin names
df_basin_name['basin_id'] = df_basin_name.index + 1

# merge basin_name to runoff dataframe by basin_id
df_runoff = df_runoff.merge(df_basin_name, on='basin_id').drop('id', axis=1)
df_runoff

In [None]:
# aggregate the runoff by basin id and basin name
df_runoff_basin = df_runoff.groupby(['basin_id', 'basin_name'], as_index=False).sum()
df_runoff_basin

In [None]:
# plot basin runoff (e.g., basin 229 - Pacific Northwest Basin)
df_runoff_basin.iloc[228, 2:].plot(kind='line')
plt.title('Basin 229 - Pacific Northwest Basin')
plt.ylabel('Runoff (km3/year)')