In [1]:
import subprocess
import os
import pandas as pd
import numpy as np
from typing import Tuple

In [2]:
def remove_invalid_values(simulated, observed):
    valid_indices = np.where((observed != -9999) & (simulated != -9999))
    return simulated[valid_indices], observed[valid_indices]


In [3]:
def remove_nan_rows(
    array1: np.ndarray, 
    array2: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Removes rows from two arrays where either array has NaN values.
    Retains the first row if it doesn't have any NaN values.
    
    Arguments:
    array1: np.ndarray:
        First input array
    array2: np.ndarray
        Second input array
    
    Returns:
    cleaned_array1: : np.ndarray
        Cleaned array1 without NaN rows
    cleaned_array2: np.ndarray
        Cleaned array2 without NaN rows
    """
    # checks for and removes any rows where either array has a value of NaN at a corresponding row 
    # including the first one
    
    mask = np.logical_and(~np.isnan(array1), ~np.isnan(array2))
    if not np.isnan(array1[0]) and not np.isnan(array2[0]):
        mask[0] = True
    cleaned_array1 = array1[mask]
    cleaned_array2 = array2[mask]
    return cleaned_array1, cleaned_array2

In [4]:
def compute_kge(simulated_array, observed_array):
    """
    Computes KGE (Kling-Gupta Efficiency) between observed and simulated values.

    Parameters:
        observed_array (numpy.ndarray): Array of observed values.
        simulated_array (numpy.ndarray): Array of simulated values.

    Returns:
        float: KGE value.
    """
    
    # Calculate Pearson correlation coefficient
    correlation_coefficient = np.corrcoef(observed_array, simulated_array)[0, 1]
    
    # Calculate standard deviation ratio
    std_observed = np.std(observed_array)
    std_simulated = np.std(simulated_array)
    std_ratio = std_simulated / std_observed
    
    # Calculate bias ratio
    mean_observed = np.mean(observed_array)
    mean_simulated = np.mean(simulated_array)
    bias_ratio = mean_simulated / mean_observed
    
    # Calculate KGE
    kge = 1 - np.sqrt((correlation_coefficient - 1)**2 + (std_ratio - 1)**2 + (bias_ratio - 1)**2)
    
    return kge

In [5]:
# Directory where Hype outputs are saved
file_path= "../../model/seperate_cal/combined_model/results/"

In [6]:
calibration_ranges = [('1980-10-01', '1984-09-30'),
               ('1989-10-01', '1998-09-30'),
               ('2003-10-01', '2007-09-30'),
               ('2012-10-01', '2015-09-30')]

In [7]:
validation_ranges = [('1984-10-01', '1989-09-30'),
               ('1998-10-01', '2003-09-30'),
               ('2007-10-01', '2010-09-30')]

In [8]:
# Initialize an empty DataFrame to store results
results_df = pd.DataFrame()

In [9]:
# Iterate through files in the output directory
for filename in os.listdir(file_path):
    if filename.endswith(".txt") and filename.startswith("00"):  # Process files with prefix '00' and end with '.txt'
        filepath = os.path.join(file_path, filename)
        
        # Read tab-separated file into DataFrame
        flow = pd.read_csv(filepath, sep='\t', index_col=0)
        flow = flow.iloc[1:]  # Drop the first row
        
        calibration_kge= []
        validation_kge= []

        for start_date, end_date in calibration_ranges:
            
            trimmed_df = flow.loc[start_date:end_date]
            
            cal_sim= trimmed_df['cout'].values.astype(float)
            cal_obs= trimmed_df['rout'].values.astype(float)

            simulated_array_cal, observed_array_cal = remove_invalid_values(cal_sim, cal_obs)
            
            cal_kge = compute_kge(simulated_array_cal, observed_array_cal)
            
                    # Save total KGE to the list
            calibration_kge.append(cal_kge)
            
            
                # Process and filter DataFrame based on calibration period
        for start_date, end_date in validation_ranges:
            trimmed_df = flow.loc[start_date:end_date]
            
            val_sim= trimmed_df['cout'].values.astype(float)
            val_obs= trimmed_df['rout'].values.astype(float)

            simulated_array_val, observed_array_val = remove_invalid_values(val_sim, val_obs)
            
            val_kge = compute_kge(simulated_array_val, observed_array_val)
            
                    # Save total KGE to the list
            validation_kge.append(val_kge)
            
                # Create a Series to store KGE values with year ranges as columns
        kge_series = pd.Series(
            data=calibration_kge + validation_kge,
            index=[f'Cal_{start}-{end}' for start, end in calibration_ranges] +
                  [f'Val_{start}-{end}' for start, end in validation_ranges],
            name=filename
        )
        
        # Append the Series to the results DataFrame
        results_df = results_df.append(kge_series)
            
        

  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  results_df = results_df.append(kge_series)
  results_df = results_df.append(kge_series)

In [10]:
# Index values of rows to drop
rows_to_drop = ['0058208.txt', '0058213.txt', '0058223.txt']

# Drop the specified rows
filtered_df = results_df.drop(index=rows_to_drop)

In [11]:
#filtered_df.to_csv('../../model/seperate_cal/combined_model/results/kge_in_depth_sans2011.csv')
filtered_df

Unnamed: 0,Cal_1980-10-01-1984-09-30,Cal_1989-10-01-1998-09-30,Cal_2003-10-01-2007-09-30,Cal_2012-10-01-2015-09-30,Val_1984-10-01-1989-09-30,Val_1998-10-01-2003-09-30,Val_2007-10-01-2010-09-30
0058290.txt,0.228953,0.541122,0.090852,0.509472,0.286807,-0.228912,-2.469857
0058308.txt,0.365071,0.363402,0.393207,0.27686,-0.857605,0.246031,0.361343
0058328.txt,0.440668,0.287814,0.011842,0.690468,0.026836,0.145081,0.314453
0058346.txt,-2.500069,0.15106,0.137638,0.763657,-2.028043,-20.904845,0.570438
0058356.txt,-0.088988,0.552615,-0.998919,-0.352273,0.501467,-1.120895,-4.600024
0058363.txt,0.342737,0.521827,-1.098819,-0.014195,0.617983,-1.162013,-2.168872
0058398.txt,0.299528,0.357164,0.286514,-0.950496,0.002821,0.377334,-1.130584
0058408.txt,0.517356,0.61253,0.390664,0.341216,-0.001337,0.546717,0.238186
0058418.txt,-0.022877,0.343189,0.194809,,0.44995,-0.929792,0.470876
0058425.txt,-0.278831,0.123432,0.099961,0.600823,-0.129967,-2.361609,0.029492


In [12]:
# Compute the mean of each column
column_means = results_df.mean()

# Print the means of each column
print("Mean of each column:")
print(column_means)

Mean of each column:
Cal_1980-10-01-1984-09-30    0.129166
Cal_1989-10-01-1998-09-30    0.488276
Cal_2003-10-01-2007-09-30    0.072436
Cal_2012-10-01-2015-09-30    0.332002
Val_1984-10-01-1989-09-30    0.022672
Val_1998-10-01-2003-09-30   -1.617045
Val_2007-10-01-2010-09-30   -0.395620
dtype: float64


In [13]:
# Example list of filenames to filter
filenames_to_include = ['0058356.txt', '0058363.txt', '0058290.txt']

# Filter results_df to include only the specified filenames
eastern_tribs = results_df.loc[filenames_to_include]

In [14]:
eastern_tribs

Unnamed: 0,Cal_1980-10-01-1984-09-30,Cal_1989-10-01-1998-09-30,Cal_2003-10-01-2007-09-30,Cal_2012-10-01-2015-09-30,Val_1984-10-01-1989-09-30,Val_1998-10-01-2003-09-30,Val_2007-10-01-2010-09-30
0058356.txt,-0.088988,0.552615,-0.998919,-0.352273,0.501467,-1.120895,-4.600024
0058363.txt,0.342737,0.521827,-1.098819,-0.014195,0.617983,-1.162013,-2.168872
0058290.txt,0.228953,0.541122,0.090852,0.509472,0.286807,-0.228912,-2.469857
