In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
from dotenv import load_dotenv
import os

import warnings
warnings.filterwarnings("ignore")

In [3]:
load_dotenv()
path = os.getenv("path")
raw_station_path = os.getenv("raw_station_path")

Flow metrics computation helper functions

In [4]:
def RB_Flashiness(Q):
    """Richards-Baker Flashiness Index for a series of daily mean discharges."""
    Qsum = np.sum(Q)
    Qpath = 0.0
    for i in range(len(Q)):
        if i == 0:
            Qpath = Q.iloc[i]
        else:
            Qpath += np.abs(Q.iloc[i] - Q.iloc[i - 1])
    RBindex = Qpath / Qsum

    return round(RBindex[0], 2)


def max_flow(Q):
    Q = Q.to_numpy()
    maxFlow = Q.max()
    return maxFlow


def min_flow(Q):
    Q = Q.to_numpy()
    minFlow = Q.min()
    return minFlow


def med_flow(Q):
    Q = Q.to_numpy()
    medFlow = np.median(Q)
    return medFlow


def cvQ(Q):
    Q = Q.to_numpy()
    """ The ratio between the standard deviation and the mean. """
    Q_std = np.std(Q, ddof=1)
    Q_mean = np.mean(Q)
    cv = Q_std / Q_mean
    cv = round(cv, 4)
    return cv


def q95(Q):
    """Q 95%: The specific discharge that is exceeded only 5% of all days at a particular site"""
    q95_percentile = np.percentile(Q, 95)

    return q95_percentile


def q5(Q):
    """Q 5%: The specific discharge that is exceeded up to 95% of all days at a particular site"""
    q5_percentile = np.percentile(Q, 5)

    return q5_percentile


def time_max(Q):
    tmax_time = Q[Q["Flow"] == max_flow(Q)].index
    tmax_value = tmax_time[0]
    tmax_str = str(tmax_value)
    tmax = tmax_str.split(" ")
    tmaxt = datetime.strptime(tmax[0], "%Y-%m-%d")
    day_of_year = tmaxt.timetuple().tm_yday
    return day_of_year


def time_min(Q):
    tmin_time = Q[Q["Flow"] == min_flow(Q)].index
    tmin_value = tmin_time[0]
    tmin_str = str(tmin_value)
    tmin = tmin_str.split(" ")
    tmint = datetime.strptime(tmin[0], "%Y-%m-%d")
    day_of_year = tmint.timetuple().tm_yday
    return day_of_year

Test cases:

In [5]:
station_path = os.path.join(
    path + raw_station_path, "CA_flow_data/02GA003_Daily_Flow_ts.csv"
)
station_df = pd.read_csv(station_path, parse_dates=[2],index_col=[2], encoding="unicode_escape")

station_filtered_years = station_df.iloc[(station_df.index.year >= 2011) & (station_df.index.year <= 2020)
                                        ].drop(columns=["PARAM", "SYM", " ID"] ).rename(columns={"Flow(m³/s)": "Flow"})

count = 0
years = np.arange(2011, 2021, 1)
percent = list()
for y in years:
    for i in range(0, len(station_filtered_years.iloc[station_filtered_years.index.year == y]["Flow"])):
        if pd.isnull(station_filtered_years.iloc[station_filtered_years.index.year == y]["Flow"][i]):
            count += 1
    # count_percent=round((count*100)/len(station_filtered_years.iloc[station_filtered_years.index.year== y]["Value"]))
    count_percent = round((count * 100) / 365)
    if count_percent < 1:
        Q = station_filtered_years.iloc[station_filtered_years.index.year == y]
        # Q = Q.drop(columns=["PARAM", "SYM", " ID"])
        maximum = max_flow(Q)
        median = med_flow(Q)
        minimum = min_flow(Q)
        q_95 = q95(Q)
        q_5 = q5(Q)
        bfi = np.nan
        tmax = time_max(Q)
        tmin = time_min(Q)
        rbindex = RB_Flashiness(Q)
        cv_Q = cvQ(Q)
        print(maximum, median, minimum,q_95,q_95,bfi,tmax,tmin,rbindex,cv_Q)

    else:
        maximum = np.nan
        median = np.nan
        minimum = np.nan
        q_95 = np.nan
        q_5 = np.nan
        bfi = np.nan
        tmax = np.nan
        tmin = np.nan
        rbindex = np.nan
        cv_Q = np.nan

369.0 31.7 14.5 172.00000000000006 172.00000000000006 nan 78 214 0.24 3.6938
142.0 20.65 11.4 82.325 82.325 nan 74 286 0.18 2.9707
454.0 39.4 12.4 167.8 167.8 nan 101 23 0.24 3.7659
573.0 30.6 17.2 159.4000000000001 159.4000000000001 nan 105 80 0.19 3.1565
188.0 21.5 11.6 81.14 81.14 nan 165 56 0.17 2.8718
418.0 17.95 6.45 154.5 154.5 nan 92 350 0.24 3.8454
780.0 36.4 8.84 162.60000000000014 162.60000000000014 nan 175 360 0.26 3.9624
481.0 24.7 14.5 134.40000000000003 134.40000000000003 nan 52 217 0.21 3.4385
275.0 27.3 13.5 129.8 129.8 nan 74 20 0.21 3.394
639.0 24.7 12.3 127.25 127.25 nan 12 214 0.22 3.5891
