In [1]:
import teehr
from pathlib import Path
import pandas as pd
import numpy as np
import shutil

from teehr import TimeseriesAwareCalculatedFields as tcf

### Set-up evaluation 

In [2]:
import pyspark

conf = (
    pyspark.SparkConf()
    .setAppName("TEEHR")
    .setMaster("local[*]")
    .set("spark.sql.sources.partitionOverwriteMode", "dynamic")
    .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    .set("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider")
    .set("spark.sql.execution.arrow.pyspark.enabled", "true")
    .set("spark.sql.session.timeZone", "UTC")
    .set("spark.driver.maxResultSize", "32g")
    .set("spark.driver.memory", "32g")
)
spark_session = pyspark.sql.SparkSession.builder.config(conf=conf).getOrCreate()

Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
25/08/14 10:41:48 WARN Utils: Your hostname, RTI-504155, resolves to a loopback address: 127.0.1.1; using 10.255.255.254 instead (on interface lo)
25/08/14 10:41:48 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/08/14 10:41:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
# set up the eval directory, clear contents if any exist
test_eval_dir = Path(Path.home(), 'temp', 'baseflow_test')
if test_eval_dir.exists():
    shutil.rmtree(test_eval_dir)
    print("Removed the existing test eval directory...")
test_eval_dir.mkdir(parents=True, exist_ok=True)
print(f"Created the test eval directory at {test_eval_dir}")

# copy contents of s3 directory to the test directory folder
s3_e1_dir = Path(Path.home(), 'temp', 's3_e0_08122025')
shutil.copytree(s3_e1_dir, test_eval_dir, dirs_exist_ok=True)
print(f"Copied contents from {s3_e1_dir} to {test_eval_dir}")

Removed the existing test eval directory...
Created the test eval directory at /home/slandsteiner/temp/baseflow_test
Copied contents from /home/slandsteiner/temp/s3_e0_08122025 to /home/slandsteiner/temp/baseflow_test


In [4]:
# set up evaluation
ev = teehr.Evaluation(test_eval_dir, spark=spark_session)

Version file not found in /home/slandsteiner/temp/baseflow_test.


### Examine the dataset

In [5]:
sdf = ev.primary_timeseries.to_sdf()
sdf.show()

+--------------+-------------------+---------+---------+-------------+------------------+--------------------+
|reference_time|         value_time|    value|unit_name|  location_id|configuration_name|       variable_name|
+--------------+-------------------+---------+---------+-------------+------------------+--------------------+
|          NULL|2000-10-01 00:00:00|3.3413877|    m^3/s|usgs-14138800| usgs_observations|streamflow_hourly...|
|          NULL|2000-10-01 01:00:00|3.9926753|    m^3/s|usgs-14138800| usgs_observations|streamflow_hourly...|
|          NULL|2000-10-01 02:00:00| 4.445745|    m^3/s|usgs-14138800| usgs_observations|streamflow_hourly...|
|          NULL|2000-10-01 03:00:00| 5.408518|    m^3/s|usgs-14138800| usgs_observations|streamflow_hourly...|
|          NULL|2000-10-01 04:00:00|5.6067357|    m^3/s|usgs-14138800| usgs_observations|streamflow_hourly...|
|          NULL|2000-10-01 05:00:00| 5.153666|    m^3/s|usgs-14138800| usgs_observations|streamflow_hourly...|
|

In [6]:
ev.secondary_timeseries.to_sdf().show()

+-------------------+-----+---------+--------------+------+-------------------+--------------------+--------------+
|         value_time|value|unit_name|   location_id|member| configuration_name|       variable_name|reference_time|
+-------------------+-----+---------+--------------+------+-------------------+--------------------+--------------+
|2000-10-01 00:00:00| 0.38|    m^3/s|nwm30-23894572|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 00:00:00| 0.06|    m^3/s|nwm30-23736071|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 01:00:00| 0.38|    m^3/s|nwm30-23894572|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 01:00:00| 0.06|    m^3/s|nwm30-23736071|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 02:00:00| 0.38|    m^3/s|nwm30-23894572|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 02:00:00| 0.06|    m^3/s|nwm30-23736071|  NULL|nwm30_retrosp

In [7]:
# rewrite JTS without attributes
#ev.joined_timeseries.create(add_attrs=False, execute_scripts=False)
sdf = ev.joined_timeseries.to_sdf()
sdf.show()

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 00:00:00|      usgs-14138800|       nwm30-23736071|    3.3413877|           0.06|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL

In [8]:
sdf = ev.locations.to_sdf()
sdf.show()

+-------------+--------------------+--------------------+
|           id|                name|            geometry|
+-------------+--------------------+--------------------+
|usgs-14316700|STEAMBOAT CREEK N...|[01 01 00 00 00 9...|
|usgs-14138800|BLAZED ALDER CREE...|[01 01 00 00 00 B...|
+-------------+--------------------+--------------------+



### Local Testing

##### Debug Chapman

In [9]:
def clean_streamflow(series):
    """
    Cleans a streamflow time series by removing invalid values and keeping only years with at least 120 data points.

    Args:
        series (pandas.Series): The streamflow time series to be cleaned.

    Returns:
        tuple: A tuple containing the cleaned streamflow values and the corresponding dates.
    """
    date, Q = series.index, series.values.astype(float)
    has_value = np.isfinite(Q)
    date, Q = date[has_value], np.abs(Q[has_value])
    year_unique, counts = np.unique(date.year, return_counts=True)
    keep = np.isin(date.year, year_unique[counts >= 120])
    return Q[keep], date[keep]

def format_method(method):
    """
    Formats the input method parameter to a list of method names.

    Args:
        method (str or list): The input method parameter, which can be a single string or a list of strings.

    Returns:
        list: A list of method names.
    """
    if method == 'all':
        method = ['UKIH', 'Local', 'Fixed', 'Slide', 'LH', 'Chapman',
                  'CM', 'Boughton', 'Furey', 'Eckhardt', 'EWMA', 'Willems']
    elif isinstance(method, str):
        method = [method]
    return method

def strict_baseflow(Q, ice=None, quantile=0.9):
    """
    Identify the strict baseflow component of a flow time series.

    This function applies a series of heuristic rules to identify the strict baseflow
    component of a flow time series. The rules are based on the behavior of the
    derivative of the flow time series, as well as the magnitude of the flow values.

    The function returns a boolean mask indicating the time steps that correspond to
    the strict baseflow component.

    Parameters:
        Q (numpy.ndarray): The flow time series.
        ice (numpy.ndarray, optional): A boolean mask indicating time steps with ice
            conditions, which can invalidate the groundwater-baseflow relationship.
        quantile (float, optional): The quantile value used to identify major events.
            Default is 0.9 (90th percentile).

    Returns:
        numpy.ndarray: A boolean mask indicating the time steps that correspond to
            the strict baseflow component.
    """
    dQ = (Q[2:] - Q[:-2]) / 2

    # 1. flow data associated with positive and zero values of dy / dt
    wet1 = np.concatenate([[True], dQ >= 0, [True]])

    # 2. previous 2 points before points with dy/dt≥0, as well as the next 3 points
    idx_first = np.where(wet1[1:].astype(int) - wet1[:-1].astype(int) == 1)[0] + 1
    idx_last = np.where(wet1[1:].astype(int) - wet1[:-1].astype(int) == -1)[0]
    idx_before = np.repeat([idx_first], 2) - np.tile(range(1, 3), idx_first.shape)
    idx_next = np.repeat([idx_last], 3) + np.tile(range(1, 4), idx_last.shape)
    idx_remove = np.concatenate([idx_before, idx_next])
    wet2 = np.full(Q.shape, False)
    wet2[idx_remove.clip(min=0, max=Q.shape[0] - 1)] = True

    # 3. five data points after major events (quantile)
    growing = np.concatenate([[True], (Q[1:] - Q[:-1]) >= 0, [True]])
    idx_major = np.where((Q >= np.quantile(Q, quantile)) & growing[:-1] & ~growing[1:])[0]
    idx_after = np.repeat([idx_major], 5) + np.tile(range(1, 6), idx_major.shape)
    wet3 = np.full(Q.shape, False)
    wet3[idx_after.clip(min=0, max=Q.shape[0] - 1)] = True

    # 4. flow data followed by a data point with a larger value of -dy / dt
    wet4 = np.concatenate([[True], dQ[1:] - dQ[:-1] < 0, [True, True]])

    # dry points, namely strict baseflow
    dry = ~(wet1 + wet2 + wet3 + wet4)

    # avoid ice conditions which invalidate the groundwater-baseflow relationship
    if ice is not None:
        dry[ice] = False

    return dry

def recession_coefficient(Q, strict):
    """
    Calculates the recession coefficient `K` from the given discharge `Q` and a boolean mask `strict` indicating which values to use.

    The recession coefficient `K` is calculated as follows:
    1. Extract the middle values of `Q` (`cQ`) and the centered finite difference of `Q` (`dQ`) using the `strict` mask.
    2. Sort `dQ / cQ` in descending order and take the value at the 5th percentile.
    3. Calculate `K` as the negative ratio of `cQ` to `dQ` at the selected index.
    4. Return the exponential of `-1 / K` as the final recession coefficient.

    Args:
        Q (numpy.ndarray): Array of discharge values.
        strict (numpy.ndarray): Boolean mask indicating which values of `Q` to use.

    Returns:
        float: The calculated recession coefficient.
    """
    cQ, dQ = Q[1:-1], (Q[2:] - Q[:-2]) / 2
    cQ, dQ = cQ[strict[1:-1]], dQ[strict[1:-1]]

    idx = np.argsort(-dQ / cQ)[np.floor(dQ.shape[0] * 0.05).astype(int)]
    K = - cQ[idx] / dQ[idx]
    return np.exp(-1 / K)

def lh(Q, beta=0.925, return_exceed=False):
    """LH digital filter (Lyne & Hollick, 1979)
    Lyne, V. and Hollick, M. (1979) Stochastic Time-Variable Rainfall-Runoff Modeling. Institute of Engineers Australia National Conference, 89-93.

    Args:
        Q (np.array): streamflow
        beta (float): filter parameter, 0.925 recommended by (Nathan & McMahon, 1990)
    """
    if return_exceed:
        b = np.zeros(Q.shape[0] + 1)
    else:
        b = np.zeros(Q.shape[0])

    # first pass
    b[0] = Q[0]
    for i in range(Q.shape[0] - 1):
        b[i + 1] = beta * b[i] + (1 - beta) / 2 * (Q[i] + Q[i + 1])
        if b[i + 1] > Q[i + 1]:
            b[i + 1] = Q[i + 1]
            if return_exceed:
                b[-1] += 1

    # second pass
    b1 = np.copy(b)
    for i in range(Q.shape[0] - 2, -1, -1):
        b[i] = beta * b[i + 1] + (1 - beta) / 2 * (b1[i + 1] + b1[i])
        if b[i] > b1[i]:
            b[i] = b1[i]
            if return_exceed:
                b[-1] += 1
    return b

def chapman(Q, a = 0.925, initial_method='Q0', return_exceed=False):
    """Chapman filter (Chapman, 1991)
    Chapman, Tom G. "Comment on 'Evaluation of Automated Techniques for Base Flow and Recession Analyses' by R. J. Nathan and T. A. McMahon." Water Resources Research 27, no. 7 (1991): 1783–84. https://doi.org/10.1029/91WR01007.

    Args:
        Q (np.array): streamflow
        a (float): recession coefficient
        initial_method (str or float, optional): method to calculate the initial baseflow value.
            Accepted string values are:
            - 'Q0': Use Q[0] as the initial baseflow value.
            - 'min': Use np.min(Q) as the initial baseflow value.
            - 'LH': Calculate the initial baseflow value using the LH method.
            Alternatively, a float value can be provided to directly set the initial baseflow value.
            Default is 'Q0'.
        return_exceed (bool, optional): if True, returns the number of times the
            baseflow exceeds the streamflow.
    """
    if return_exceed:
        b = np.zeros(Q.shape[0] + 1)
    else:
        b = np.zeros(Q.shape[0])

    # Set initial value for b based on the specified method
    if isinstance(initial_method, str):
        if initial_method == 'Q0':
            b[0] = Q[0]
        elif initial_method == 'min':
            b[0] = np.min(Q)
        elif initial_method == 'LH':
            b[0] = lh(Q)[0]  # Calculate the initial value using the LH method
        else:
            raise ValueError(f"Invalid initial_method: {initial_method}")
    else:
        b[0] = initial_method

    for i in range(Q.shape[0] - 1):
        b[i + 1] = (3 * a - 1) / (3 - a) * b[i] + (1 - a) / (3 - a) * (Q[i + 1] + Q[i])
        if b[i + 1] > Q[i + 1]:
            b[i + 1] = Q[i + 1]
            if return_exceed:
                b[-1] += 1
    return b

def single_chapman_test(series, area=None, ice=None, method='all', return_kge=True):
    """
    Calculates baseflow on a single station using multiple methods.

    Args:
        series (pandas.Series): The streamflow time series to separate.
        area (float): The area of the basin in km^2.
        ice (numpy.ndarray): INSERT DESCRIPTION.
        method (callable): The baseflow estimation method to use.
        return_kge (bool): If True, returns the KGE values for each method.

    Returns:
        pandas.DataFrame or dict: The baseflow time series for each method, or error dictionary if an error occurred.
        pandas.Series or None: The KGE values for each method, or None if an error occurred.
    """
    try:
        Q, date = clean_streamflow(series)
        method = format_method(method)

        # Check if we have sufficient data after cleaning
        if len(Q) == 0:
            error_msg = "No valid data points remaining after cleaning streamflow"
            return {'error': error_msg, 'error_type': 'insufficient_data'}, None

        if len(Q) < 3:
            error_msg = f"Insufficient data points ({len(Q)}) for baseflow analysis. Need at least 3 points."
            return {'error': error_msg, 'error_type': 'insufficient_data'}, None

        # convert ice_period ([11, 1], [3, 31]) to bool array
        # if not isinstance(ice, np.ndarray) or ice.shape[0] == 12:
        #     ice = exist_ice(date, ice)

        strict = strict_baseflow(Q, ice)

        # Check if we have sufficient strict baseflow points for recession coefficient calculation
        strict_count = np.sum(strict[1:-1])  # middle points only for recession calculation
        if any(m in ['Chapman'] for m in method) and strict_count < 1:
            error_msg = f"Insufficient strict baseflow points ({strict_count}) for Chapman method recession coefficient calculation"
            return {'error': error_msg, 'error_type': 'insufficient_strict_points'}, None

        if any(m in ['Chapman'] for m in method):
            a = recession_coefficient(Q, strict)

        b_LH = lh(Q)
        b = pd.DataFrame(np.nan, index=date, columns=method)

        for m in method:
            if m == 'LH':
                b[m] = b_LH

            if m == 'Chapman':
                b[m] = chapman(Q, a)

        if return_kge:
            # Check if we have valid strict points for KGE calculation
            strict_values = b[strict].values
            if strict_values.size == 0:
                error_msg = "No strict baseflow points available for KGE calculation"
                return {'error': error_msg, 'error_type': 'no_strict_points'}, None

            KGEs = pd.Series(return_kge(strict_values, np.repeat(
                Q[strict], len(method)).reshape(-1, len(method))), index=b.columns)
            return b, KGEs
        else:
            return b, None

    except IndexError as e:
        error_msg = f"IndexError in baseflow calculation: {str(e)}"
        return {'error': error_msg, 'error_type': 'index_error'}, None
    except ValueError as e:
        error_msg = f"ValueError in baseflow calculation: {str(e)}"
        return {'error': error_msg, 'error_type': 'value_error'}, None
    except Exception as e:
        error_msg = f"Unexpected error in baseflow calculation: {str(e)}"
        return {'error': error_msg, 'error_type': 'unexpected_error'}, None

In [10]:
# Assemble the input series manually
jts = ev.joined_timeseries.to_pandas()

# trim to gage-A, sort by value_time, reindex
jts_trimmed = jts[jts['primary_location_id'] == 'usgs-14138800']
jts_trimmed = jts_trimmed.sort_values('value_time').reset_index(drop=True)

# isolate streamflow series for baseflow separation
streamflow = pd.Series(jts_trimmed['primary_value'].values,
                       index=jts_trimmed['value_time'])


In [11]:
jts_trimmed

Unnamed: 0,value_time,primary_location_id,secondary_location_id,primary_value,secondary_value,unit_name,member,configuration_name,variable_name,reference_time
0,2000-10-01 00:00:00,usgs-14138800,nwm30-23736071,3.341388,0.06,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
1,2000-10-01 01:00:00,usgs-14138800,nwm30-23736071,3.992675,0.06,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
2,2000-10-01 02:00:00,usgs-14138800,nwm30-23736071,4.445745,0.06,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
3,2000-10-01 03:00:00,usgs-14138800,nwm30-23736071,5.408518,0.06,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
4,2000-10-01 04:00:00,usgs-14138800,nwm30-23736071,5.606736,0.06,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
...,...,...,...,...,...,...,...,...,...,...
100273,2012-09-29 20:00:00,usgs-14138800,nwm30-23736071,0.052953,0.07,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
100274,2012-09-29 21:00:00,usgs-14138800,nwm30-23736071,0.052953,0.07,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
100275,2012-09-29 22:00:00,usgs-14138800,nwm30-23736071,0.052953,0.07,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,
100276,2012-09-29 23:00:00,usgs-14138800,nwm30-23736071,0.052953,0.07,m^3/s,,nwm30_retrospective,streamflow_hourly_inst,


In [12]:
streamflow

value_time
2000-10-01 00:00:00    3.341388
2000-10-01 01:00:00    3.992675
2000-10-01 02:00:00    4.445745
2000-10-01 03:00:00    5.408518
2000-10-01 04:00:00    5.606736
                         ...   
2012-09-29 20:00:00    0.052953
2012-09-29 21:00:00    0.052953
2012-09-29 22:00:00    0.052953
2012-09-29 23:00:00    0.052953
2012-09-30 00:00:00    0.052953
Length: 100278, dtype: float32

In [13]:
# try lyne-hollick
result = single_chapman_test(series=streamflow,
                             area=None,
                             ice=None,
                             method='LH',
                             return_kge=False)

In [14]:
result_df = result[0]
result_df

Unnamed: 0_level_0,LH
value_time,Unnamed: 1_level_1
2000-10-01 00:00:00,3.341388
2000-10-01 01:00:00,3.365811
2000-10-01 02:00:00,3.429816
2000-10-01 03:00:00,3.542115
2000-10-01 04:00:00,3.689528
...,...
2012-09-29 20:00:00,0.052953
2012-09-29 21:00:00,0.052953
2012-09-29 22:00:00,0.052953
2012-09-29 23:00:00,0.052953


In [15]:
# try chapman
result = single_chapman_test(series=streamflow,
                             area = None,
                             ice = None,
                             method='Chapman',
                             return_kge=False)

In [16]:
result_df = result[0]
result_df

Unnamed: 0_level_0,Chapman
value_time,Unnamed: 1_level_1
2000-10-01 00:00:00,3.341388
2000-10-01 01:00:00,3.308750
2000-10-01 02:00:00,3.282795
2000-10-01 03:00:00,3.265063
2000-10-01 04:00:00,3.253997
...,...
2012-09-29 20:00:00,0.027012
2012-09-29 21:00:00,0.027001
2012-09-29 22:00:00,0.026989
2012-09-29 23:00:00,0.026978


#### Test minimum record length

NOTE: remember that some cases were failing with >=120 hourly timesteps via the strict_baseflow() method. Need to try and replicate by choosing a baseflow dominated period and trying this same test.

##### length == 120 timesteps

In [17]:
# define start_POR and end_POR to trim streamflow to exactly 120 hours
start_POR = streamflow.index.min()
end_POR = start_POR + pd.Timedelta(hours=119)
streamflow = streamflow[start_POR:end_POR]

In [18]:
result = single_chapman_test(series=streamflow,
                             area = None,
                             ice = None,
                             method='Chapman',
                             return_kge=False)

In [19]:
result_df = result[0]
result_df

Unnamed: 0_level_0,Chapman
value_time,Unnamed: 1_level_1
2000-10-01 00:00:00,3.341388
2000-10-01 01:00:00,3.255230
2000-10-01 02:00:00,3.189770
2000-10-01 03:00:00,3.148275
2000-10-01 04:00:00,3.125736
...,...
2000-10-05 19:00:00,0.229031
2000-10-05 20:00:00,0.226462
2000-10-05 21:00:00,0.223634
2000-10-05 22:00:00,0.220564


##### length < 120 timesteps

In [20]:
# define start_POR and end_POR to trim streamflow to exactly 120 hours
start_POR = streamflow.index.min()
end_POR = start_POR + pd.Timedelta(hours=118)
streamflow = streamflow[start_POR:end_POR]

In [21]:
result = single_chapman_test(series=streamflow,
                             area = None,
                             ice = None,
                             method='Chapman',
                             return_kge=False)

In [22]:
result_df = result[0]
result_df

{'error': 'No valid data points remaining after cleaning streamflow',
 'error_type': 'insufficient_data'}

### Test adding baseflow

#### Lynn-Hollick

In [23]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.LyneHollickBaseflow(
        output_field_name = 'lyne_hollick_primary'
    ),
    tcf.LyneHollickBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'lyne_hollick_secondary',
    )
]).to_sdf()

sdf.show()

                                                                                

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+--------------------+----------------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|lyne_hollick_primary|lyne_hollick_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+--------------------+----------------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|                1.13|                  0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|     

#### Chapman

In [24]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.ChapmanBaseflow(
        output_field_name = 'chapman_primary'
    ),
    tcf.ChapmanBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'chapman_secondary',
    )
]).to_sdf()

sdf.show()

                                                                                

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+---------------+-----------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|chapman_primary|chapman_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+---------------+-----------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|           1.13|             0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|           1.13|             0

#### Chapman-Maxwell

In [25]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.ChapmanMaxwellBaseflow(
        output_field_name = 'chapman_maxwell_primary'
    ),
    tcf.ChapmanMaxwellBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'chapman_maxwell_secondary',
    )
]).to_sdf()

sdf.show()

                                                                                

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+-----------------------+-------------------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|chapman_maxwell_primary|chapman_maxwell_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+-----------------------+-------------------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|                   1.13|                     0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|st

#### Boughton

In [26]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.BoughtonBaseflow(
        output_field_name = 'boughton_primary'
    ),
    tcf.BoughtonBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'boughton_secondary',
    )
]).to_sdf()

sdf.show()

[Stage 92:>                                                         (0 + 2) / 2]

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+----------------+------------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|boughton_primary|boughton_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+----------------+------------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|            1.13|              0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|            1.13|     

                                                                                

#### Furey

In [27]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.FureyBaseflow(
        output_field_name = 'furey_primary'
    ),
    tcf.FureyBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'furey_secondary',
    )
]).to_sdf()

sdf.show()

[Stage 114:>                                                        (0 + 2) / 2]

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+-------------+---------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|furey_primary|furey_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+-------------+---------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|         1.13|           0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|         1.13|           0.38|
|2000-10-01 02:

                                                                                

#### Eckhardt

In [28]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.EckhardtBaseflow(
        output_field_name = 'eckhardt_primary'
    ),
    tcf.EckhardtBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'eckhardt_secondary',
    )
]).to_sdf()

sdf.show()

[Stage 136:>                                                        (0 + 2) / 2]

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+----------------+------------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|eckhardt_primary|eckhardt_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+----------------+------------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|            1.13|              0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|            1.13|     

                                                                                

#### EWMA

In [29]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.EWMABaseflow(
        output_field_name = 'ewma_primary'
    ),
    tcf.EWMABaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'ewma_secondary',
    )
]).to_sdf()

sdf.show()

[Stage 158:>                                                        (0 + 2) / 2]

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+------------+--------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|ewma_primary|ewma_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+------------+--------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|        1.13|          0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|        1.13|          0.38|
|2000-10-01 02:00:00|    

                                                                                

#### Willems

In [30]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.WillemsBaseflow(
        output_field_name = 'willems_primary'
    ),
    tcf.WillemsBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'willems_secondary',
    )
]).to_sdf()

sdf.show()

[Stage 180:>                                                        (0 + 2) / 2]

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+---------------+-----------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|willems_primary|willems_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+---------------+-----------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|           1.13|             0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|           1.13|             0

                                                                                

#### UKIH

In [31]:
sdf = ev.joined_timeseries.add_calculated_fields([
    tcf.UKIHBaseflow(
        output_field_name = 'ukih_primary'
    ),
    tcf.UKIHBaseflow(
        value_field_name = 'secondary_value',
        output_field_name = 'ukih_secondary',
    )
]).to_sdf()

sdf.show()

                                                                                

+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+------------+--------------+
|         value_time|primary_location_id|secondary_location_id|primary_value|secondary_value|unit_name|member| configuration_name|       variable_name|reference_time|ukih_primary|ukih_secondary|
+-------------------+-------------------+---------------------+-------------+---------------+---------+------+-------------------+--------------------+--------------+------------+--------------+
|2000-10-01 00:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|        1.13|          0.38|
|2000-10-01 01:00:00|      usgs-14316700|       nwm30-23894572|    1.1326739|           0.38|    m^3/s|  NULL|nwm30_retrospective|streamflow_hourly...|          NULL|        1.13|          0.38|
|2000-10-01 02:00:00|    