In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from dotenv import load_dotenv
import os

import warnings
warnings.filterwarnings("ignore")

In [2]:
load_dotenv()
path = os.getenv("path")
raw_station_path = os.getenv("raw_station_path")

Flow metrics computation helper functions

In [3]:
def RB_Flashiness(Q):
    """Richards-Baker Flashiness Index for a series of daily mean discharges."""
    Qsum = np.sum(Q)
    Qpath = 0.0
    for i in range(len(Q)):
        if i == 0:
            Qpath = Q.iloc[i]
        else:
            Qpath += np.abs(Q.iloc[i] - Q.iloc[i - 1])
    RBindex = Qpath / Qsum

    return round(RBindex[0], 2)


def max_flow(Q):
    Q = Q.to_numpy()
    maxFlow = Q.max()
    return maxFlow


def min_flow(Q):
    Q = Q.to_numpy()
    minFlow = Q.min()
    return minFlow


def med_flow(Q):
    Q = Q.to_numpy()
    medFlow = np.median(Q)
    return medFlow


def cvQ(Q):
    Q = Q.to_numpy()
    """ The ratio between the standard deviation and the mean. """
    Q_std = np.std(Q, ddof=1)
    Q_mean = np.mean(Q)
    cv = Q_std / Q_mean
    cv = round(cv, 4)
    return cv


def q95(Q):
    """Q 95%: The specific discharge that is exceeded only 5% of all days at a particular site"""
    q95_percentile = np.percentile(Q, 95)

    return q95_percentile


def q5(Q):
    """Q 5%: The specific discharge that is exceeded up to 95% of all days at a particular site"""
    q5_percentile = np.percentile(Q, 5)

    return q5_percentile


def time_max(Q):
    tmax_time = Q[Q["Flow"] == max_flow(Q)].index
    tmax_value = tmax_time[0]
    tmax_str = str(tmax_value)
    tmax = tmax_str.split(" ")
    tmaxt = datetime.strptime(tmax[0], "%Y-%m-%d")
    day_of_year = tmaxt.timetuple().tm_yday
    return day_of_year


def time_min(Q):
    tmin_time = Q[Q["Flow"] == min_flow(Q)].index
    tmin_value = tmin_time[0]
    tmin_str = str(tmin_value)
    tmin = tmin_str.split(" ")
    tmint = datetime.strptime(tmin[0], "%Y-%m-%d")
    day_of_year = tmint.timetuple().tm_yday
    return day_of_year

# Unit Testing

In [7]:
# year flow metric test -- Canada
station_path = os.path.join(
    path + raw_station_path, "CA_flow_data/02GA003_Daily_Flow_ts.csv"
)
station_df = pd.read_csv(station_path, parse_dates=[2],index_col=[2], encoding="unicode_escape")

station_filtered_years = station_df.iloc[(station_df.index.year >= 2011) & (station_df.index.year <= 2020)
                                        ].drop(columns=["PARAM", "SYM", " ID"] ).rename(columns={"Flow(m³/s)": "Flow"})

count = 0
years = np.arange(2011, 2021, 1)
percent = list()
for y in years:
    for i in range(0, len(station_filtered_years.iloc[station_filtered_years.index.year == y]["Flow"])):
        if pd.isnull(station_filtered_years.iloc[station_filtered_years.index.year == y]["Flow"][i]):
            count += 1
    # count_percent=round((count*100)/len(station_filtered_years.iloc[station_filtered_years.index.year== y]["Value"]))
    count_percent = round((count * 100) / 365)
    if count_percent < 1:
        Q = station_filtered_years.iloc[station_filtered_years.index.year == y]
        maximum = max_flow(Q)
        median = med_flow(Q)
        minimum = min_flow(Q)
        q_95 = q95(Q)
        q_5 = q5(Q)
        bfi = np.nan
        tmax = time_max(Q)
        tmin = time_min(Q)
        rbindex = RB_Flashiness(Q)
        cv_Q = cvQ(Q)
        print(maximum, median, minimum,q_95,q_5,bfi,tmax,tmin,rbindex,cv_Q)

    else:
        maximum = np.nan
        median = np.nan
        minimum = np.nan
        q_95 = np.nan
        q_5 = np.nan
        bfi = np.nan
        tmax = np.nan
        tmin = np.nan
        rbindex = np.nan
        cv_Q = np.nan

369.0 31.7 14.5 172.00000000000006 15.92 nan 78 214 0.24 3.6938
142.0 20.65 11.4 82.325 12.5 nan 74 286 0.18 2.9707
454.0 39.4 12.4 167.8 16.8 nan 101 23 0.24 3.7659
573.0 30.6 17.2 159.4000000000001 18.3 nan 105 80 0.19 3.1565
188.0 21.5 11.6 81.14 11.82 nan 165 56 0.17 2.8718
418.0 17.95 6.45 154.5 10.725 nan 92 350 0.24 3.8454
780.0 36.4 8.84 162.60000000000014 16.5 nan 175 360 0.26 3.9624
481.0 24.7 14.5 134.40000000000003 15.8 nan 52 217 0.21 3.4385
275.0 27.3 13.5 129.8 15.219999999999999 nan 74 20 0.21 3.394
639.0 24.7 12.3 127.25 14.25 nan 12 214 0.22 3.5891


In [8]:
# seasonal flow metrics test:
# non-growing -- Canada
non_growing_days = 213
start_month, end_month = 4, 9

station_path = os.path.join(path + raw_station_path, "CA_flow_data/02GA003_Daily_Flow_ts.csv")
dataset = pd.read_csv(station_path, parse_dates=[2], index_col=[2], encoding='unicode_escape')
station_filtered_months = dataset.iloc[(dataset.index.month <= start_month) | (dataset.index.month > end_month )
                               ].drop(columns=["PARAM", "SYM", " ID"] ).rename(columns={"Flow(m³/s)": "Flow"})

count = 0
years = np.arange(2011, 2021, 1)
percent = list()
for y in years:
    for i in range(
        0,
        len(
            station_filtered_months.iloc[station_filtered_months.index.year == y]["Flow"]
        ),
    ):
        if pd.isnull(
            station_filtered_months.iloc[station_filtered_months.index.year == y]["Flow"][
                i
            ]
        ):
            count += 1
    # count_percent=round((count*100)/len(station_filtered_months.iloc[station_filtered_months.index.year== y]["Value"]))
    count_percent = round((count * 100) / non_growing_days)
    if count_percent < 1:
        Q = station_filtered_months.iloc[station_filtered_months.index.year == y]
        maximum = max_flow(Q)
        median = med_flow(Q)
        minimum = min_flow(Q)
        q_95 = q95(Q)
        q_5 = q5(Q)
        bfi = np.nan
        tmax = time_max(Q)
        tmin = time_min(Q)
        rbindex = RB_Flashiness(Q)
        cv_Q = cvQ(Q)
        print(maximum, minimum, median, q_95, q_5, bfi, tmax, tmin, rbindex, cv_Q)

    else:
        maximum = np.nan
        median = np.nan
        minimum = np.nan
        q_95 = np.nan
        q_5 = np.nan
        bfi = np.nan
        tmax = np.nan
        tmin = np.nan
        rbindex = np.nan
        cv_Q = np.nan

369.0 15.5 47.3 196.0 17.61 nan 78 23 0.26 3.0579
142.0 11.4 32.1 94.07999999999984 13.360000000000001 nan 74 286 0.18 2.293
454.0 12.4 51.6 202.45 15.620000000000001 nan 101 23 0.23 2.8003
573.0 17.2 31.35 201.34999999999997 18.2 nan 105 80 0.18 2.511
136.0 11.6 23.05 78.47999999999998 11.6 nan 100 56 0.15 1.9116
418.0 6.45 26.9 193.79999999999995 10.3 nan 92 350 0.27 3.1796
284.0 8.84 42.7 158.39999999999986 15.855 nan 56 360 0.21 2.5367
481.0 14.6 33.25 182.89999999999998 15.9 nan 52 1 0.24 2.9263
275.0 13.5 34.95 155.45 16.455 nan 74 20 0.25 2.9771
639.0 16.9 32.6 166.5999999999999 19.92 nan 12 286 0.24 2.937


In [None]:
def test_flow_metrics(station_path:str, start_month:int, end_month:int, days:int) -> list:

    calculated_flow_metrics = {}

    dataset = pd.read_csv(station_path, parse_dates=[2], index_col=[2], encoding="unicode_escape")
    daily_streamflow = (dataset.iloc[(dataset.index.month <= start_month) | (dataset.index.month > end_month)]
                               .drop(columns=["PARAM", "SYM", " ID"])
                               .rename(columns={"Flow(m³/s)": "Flow"}))
    count = 0
    years = np.arange(2011, 2021, 1)
    # percent = list()
    for y in years:
        flow_metrics = []
        for i in range(
            0,
            len(daily_streamflow.iloc[daily_streamflow.index.year == y]["Flow"]),):
            if pd.isnull(daily_streamflow.iloc[daily_streamflow.index.year == y]["Flow"][i]):
                count += 1
    #     count_percent = round((count * 100) / days)
    #     if count_percent < 1:
    #         Q = daily_streamflow.iloc[daily_streamflow.index.year == y]
    #         maximum = max_flow(Q)
    #         median = med_flow(Q)
    #         minimum = min_flow(Q)
    #         q_95 = q95(Q)
    #         q_5 = q5(Q)
    #         bfi = None
    #         tmax = time_max(Q)
    #         tmin = time_min(Q)
    #         rbindex = RB_Flashiness(Q)
    #         cv_Q = cvQ(Q)

    #         flow_metrics = [maximum, minimum, median, q_95, q_5, bfi, tmax, tmin, rbindex, cv_Q]

    #     else:
    #         flow_metrics = [None]*10

    #     calculated_flow_metrics[y] = flow_metrics
    # return calculated_flow_metrics
    return daily_streamflow

# year flow metric test -- Canada
station_path = os.path.join(path + raw_station_path, "CA_flow_data/02GA003_Daily_Flow_ts.csv")
calculated_flow_metrics = test_flow_metrics(station_path, 1, 13, 365)
print(calculated_flow_metrics)

# seasonal flow metrics test:
# non-growing -- Canada
# calculated_flow_metrics = test_flow_metrics(station_path, 4, 9, 213)
# print(calculated_flow_metrics)

# growing -- Canada
# calculated_flow_metrics = test_flow_metrics(station_path, 5, 11, 152)
# print(calculated_flow_metrics)
