In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from math import pi
from sklearn.metrics import mean_squared_error, r2_score
import logging

In [3]:
%run Optimization_RTK_Functions.ipynb

## Loading Parameters 

In [248]:
# Suppress matplotlib INFO messages
logging.getLogger('matplotlib.category').setLevel(logging.WARNING)

# List of Excel file names (each representing an event)
# List of Excel file names (each representing an event)
files = [
    #'RTK_Parameters_all_algorithms_Ro_constraint_E1_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E2_CCW.xlsx', 
    'RTK_Parameters_all_algorithms_Ro_constraint_E3_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E4_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E5_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E6_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E7_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E8_CCW.xlsx',
    'RTK_Parameters_all_algorithms_Ro_constraint_E9_CCW.xlsx',
] 

# List of sheets and parameters to read
#sheets = [ 'CMA-ES','DE', 'GA', 'SA', 'PSO']
#sheets = ['DE', 'GA', 'SA', 'CMA-ES']
#sheets = ['DE', 'SA', 'CMA-ES']
sheets = ['DE', 'SA']

#sheets = ['CMA-ES']
#sheets = ['DE']
#sheets = ['GA']
#sheets = ['SA']
#sheets = ['PSO']

parameters = ['R1', 'R2', 'R3', 'T1', 'T2', 'T3', 'K1', 'K2', 'K3']
algorithms = sheets

#
###########################################################

# Initialize a dictionary to hold data aggregated by sheet
# Here we combine the data for each parameter from each file, separately for each sheet.
data_by_sheet = {sheet: {param: [] for param in parameters} for sheet in sheets}
for file in files:
    for sheet in sheets:
        df = pd.read_excel(file, sheet_name=sheet)
        for param in parameters:
            data_by_sheet[sheet][param].extend(df[param].tolist())

# Compute the median for each parameter in each sheet
sheet_averages = {}
for sheet in sheets:
    sheet_averages[sheet] = {}
    for param in parameters:
        values = data_by_sheet[sheet][param]
        # Compute the average (mean) of the values
        #sheet_averages[sheet][param] = np.mean(values)
        sheet_averages[sheet][param] = np.median(values)

# Convert the results into a DataFrame for an easy-to-read table.
# The resulting DataFrame will have the sheets as rows and the parameters as columns.
avg_df = pd.DataFrame(sheet_averages).T
print("Average Parameter Values for Each Sheet (across all events):")
print(avg_df)



Average Parameter Values for Each Sheet (across all events):
          R1        R2        R3      T1      T2       T3     K1      K2  \
DE  0.003811  0.013430  0.008374  3000.0  7800.0  12300.0  1.382  2.4345   
SA  0.004393  0.010908  0.008173  3000.0  5700.0  11700.0  1.477  2.5760   

       K3  
DE  5.461  
SA  5.224  


### 1 algorithm

In [12]:
# Creating the second DataFrame with weights for DE, SA, GA, and PSO
average_final  = pd.DataFrame({
    'Algorithm': ['CMA-ES'], #CMA_ES','SA', 'GA', 'PSO'
    'Rank_Weights': [1]
})

### 5 algorithm

In [230]:
# Creating the second DataFrame with weights for DE, SA, GE, and PSO
average_final= pd.DataFrame({
    'Algorithm': ['DE','SA', 'CMA-ES', 'GA', 'PSO'],
    'Rank_Weights': [0.333333, 0.266667, 0.2, 0.133333, 0.066667]
})

### 4 algorithm

In [238]:
# Creating the second DataFrame with weights for DE, SA, GE, and PSO
average_final = pd.DataFrame({
    'Algorithm': ['DE', 'SA', 'CMA-ES', 'GA'],
    'Rank_Weights': [0.4, 0.3, 0.2, 0.1]
})

### 3 algorithm

In [244]:
# Creating the first DataFrame with weights for only DE and SA
average_final  = pd.DataFrame({
    'Algorithm': ['DE','SA', 'CMA-ES'],
    'Rank_Weights': [0.5, 0.333333,0.166667]
})

### 2 algorithm

In [250]:
# Creating the first DataFrame with weights for only DE and SA
average_final = pd.DataFrame({
    'Algorithm': ['DE', 'SA'],
    'Rank_Weights': [0.666667, 0.333333]
})

## Event to validate 

In [232]:
#file_path = './Tamucc_event_4.xlsx'
file_path_validation = './CCW_event_1.xlsx'
data=pd.read_excel(file_path_validation, skiprows=0)
rainfall= data.iloc[:,2].dropna().tolist() 
obs_rdii = data.iloc[:,1].tolist() 
delta_t = 600 #in sec ( for 10 min time step)
area_acres= 491.153

## Validation Performance

In [None]:
''' First Execute the Function defination'''


predicted_flows, weighted_flow, metrics, final_weighted_score= RDII_all_algorithms_plot_with_weights_validation(
    avg_df,               # avg_df: DataFrame with average parameters
    average_final  , #df containing algorithms and weights
    delta_t, 
    rainfall, 
    
    area_acres, 
    obs_rdii, 
    weight_type='Rank_Weights'
)


## Function defination 

In [18]:
def RDII_all_algorithms_plot_with_weights_validation(avg_df, Rank_weight, delta_t, rainfall, Area, obs_rdii=None, weight_type='Linear_Weights', plot_name=None):
    """
    Use the average parameter values (from avg_df) for each algorithm (e.g. 'DE', 'GA', etc.)
    along with the corresponding weights (from Rank_weight) to compute the weighted RDII flow.
    Then plot the weighted flow against the observed RDII (padding the shorter array if necessary)
    and display the evaluation metrics along with a composite final score.

    Parameters:
      avg_df: DataFrame indexed by algorithm (e.g. 'DE', 'GA', etc.) containing average values for
              the parameters: R1, T1, K1, R2, T2, K2, R3, T3, K3.
      Rank_weight: DataFrame containing weights for each algorithm. Must include a column for the given weight_type.
      delta_t: Time step in seconds.
      rainfall: Rainfall time series (in inches).
      Area: Catchment area in acres.
      obs_rdii: Observed RDII time series (optional).
      weight_type: Type of weights to use ('Linear_Weights', 'Rank_Weights', or 'Softmax_Weights').
      plot_name: Optional string for the filename to save the figure (e.g., "figure.png").
      
    Returns:
      predicted_flows_all_algorithms: Dictionary with predicted flows (per algorithm).
      weighted_flow: Final weighted RDII flow time series.
      metrics: Dictionary with RMSE, R², PBIAS, and NSE (if obs_rdii is provided), else None.
      final_weighted_score: Composite score for the weighted flow (if obs_rdii is provided), else None.
    """
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.metrics import mean_squared_error, r2_score
    import seaborn as sns

    sns.set_style("whitegrid")
    
    # --- Helper functions for metrics transformation ---
    def calculate_metrics(simulated, observed):
        simulated = np.array(simulated)
        observed = np.array(observed)
        rmse = np.sqrt(mean_squared_error(observed, simulated))
        r2 = r2_score(observed, simulated)
        pbias = 100 * np.sum(observed - simulated) / np.sum(observed)
        numerator = np.sum((observed - simulated) ** 2)
        denominator = np.sum((observed - np.mean(observed)) ** 2)
        nse = 1 - (numerator / denominator)
        return {'RMSE': rmse, 'R2': r2, 'PBIAS': pbias, 'NSE': nse}
    
    def transform_metrics(metrics):
        f_rmse = 1 / (1 + metrics['RMSE'])
        f_pbias = max(0, 1 - abs(metrics['PBIAS']) / 100)
        f_r2 = max(0, min(metrics['R2'], 1))
        nse = metrics['NSE']
        if nse < -1:
            f_nse = 0
        elif nse > 1:
            f_nse = 1
        else:
            f_nse = (nse + 1) / 2
        return {'f_RMSE': f_rmse, 'f_R2': f_r2, 'f_PBIAS': f_pbias, 'f_NSE': f_nse}
    
    def composite_score(metrics):
        transformed = transform_metrics(metrics)
        score = (transformed['f_RMSE'] + transformed['f_R2'] + transformed['f_PBIAS'] + transformed['f_NSE']) / 4
        return score

    # --- Compute predicted flows using average parameters ---
    predicted_flows_all_algorithms = {}
    average_flows = {}  # to store final (padded) flows per algorithm
    flow_lengths = []
    max_total_flow = 0

    # Use a publication-ready colormap for algorithm distinction.
    algorithms = list(avg_df.index)
    cmap = plt.get_cmap('tab10')
    color_dict = {alg: cmap(i % 10) for i, alg in enumerate(algorithms)}

    # First pass: compute flows and record lengths.
    for algorithm in avg_df.index:
        row = avg_df.loc[algorithm]
        R1, T1, K1 = row['R1'], row['T1'], row['K1']
        R2, T2, K2 = row['R2'], row['T2'], row['K2']
        R3, T3, K3 = row['R3'], row['T3'], row['K3']

        # Compute unit hydrograph ordinates (assumed to be defined externally)
        uh1 = unit_hydrograph_ordinates(R1, T1, K1, delta_t)
        uh2 = unit_hydrograph_ordinates(R2, T2, K2, delta_t)
        uh3 = unit_hydrograph_ordinates(R3, T3, K3, delta_t)

        # Convolve with rainfall (assumed external function)
        Q1_inch_sec = hydrograph_convolution(uh1, rainfall)
        Q2_inch_sec = hydrograph_convolution(uh2, rainfall)
        Q3_inch_sec = hydrograph_convolution(uh3, rainfall)

        # Convert to cfs
        Q1_cfs = Q1_inch_sec * Area * 43560 / 12
        Q2_cfs = Q2_inch_sec * Area * 43560 / 12
        Q3_cfs = Q3_inch_sec * Area * 43560 / 12

        total_flow = add_flow(Q1_cfs, Q2_cfs, Q3_cfs)
        total_flow = np.array(total_flow)
        predicted_flows_all_algorithms[algorithm] = total_flow
        flow_lengths.append(len(total_flow))
        max_total_flow = max(max_total_flow, np.max(total_flow))
    
    # Determine the maximum flow length
    max_flow_length = max(flow_lengths)
    
    # Pad flows for each algorithm to the maximum length.
    for algorithm in predicted_flows_all_algorithms:
        flow = predicted_flows_all_algorithms[algorithm]
        if len(flow) < max_flow_length:
            flow = np.pad(flow, (0, max_flow_length - len(flow)), 'constant')
        average_flows[algorithm] = flow
        predicted_flows_all_algorithms[algorithm] = flow  # update stored value

    # Build time axis in hours.
    time_values = [i * delta_t / 3600 for i in range(max_flow_length)]
    
    # --- Compute the weighted flow ---
    weights = Rank_weight.set_index('Algorithm')[weight_type].to_dict()
    weighted_flow = np.zeros(max_flow_length)
    for algorithm, avg_flow in average_flows.items():
        weighted_flow += avg_flow * weights.get(algorithm, 0)
    
    # --- Plotting ---
    fig, ax1 = plt.subplots(figsize=(12, 8))
    
    # Plot average simulated RDII for each algorithm with markers and lines.
    for algorithm, avg_flow in average_flows.items():
        ax1.plot(time_values, avg_flow, label=f"Avg. Sim. RDII ({algorithm})", 
                 color=color_dict[algorithm], linestyle='-', marker='o', 
                 markersize=4, linewidth=1, markevery=5, alpha=0.7)
    
    # Plot weighted flow with distinct style and markers.
    ax1.plot(time_values, weighted_flow, label="Weighted Sim. RDII", 
             color='black', linestyle='-', marker='D', markersize=5, linewidth=2, markevery=10, alpha=0.9)
    
    metrics = None
    final_weighted_score = None
    if obs_rdii is not None:
        n_sim = len(weighted_flow)
        n_obs = len(obs_rdii)
        n_final = max(n_sim, n_obs)
        if n_sim < n_final:
            weighted_flow = np.pad(weighted_flow, (0, n_final - n_sim), 'constant')
        if n_obs < n_final:
            obs_rdii = np.pad(obs_rdii, (0, n_final - n_obs), 'constant')
        time_obs = [i * delta_t / 3600 for i in range(n_final)]
        ax1.plot(time_obs, obs_rdii, label="Observed RDII", color='green', 
                 linestyle='--', marker='^', markersize=6, linewidth=2, markevery=10)
        metrics = calculate_metrics(weighted_flow, obs_rdii)
        final_weighted_score = composite_score(metrics)
        metrics_text = (
            f"RMSE: {metrics['RMSE']:.4f}\n"
            f"R²: {metrics['R2']:.4f}\n"
            f"PBIAS: {metrics['PBIAS']:.4f}%\n"
            f"NSE: {metrics['NSE']:.4f}\n"
            f"Performance Score: {final_weighted_score:.4f}"
        )
        ax1.text(0.93, 0.68, metrics_text, transform=ax1.transAxes, fontsize=10,
                 verticalalignment='top', horizontalalignment='right',
                 bbox=dict(boxstyle='round', facecolor='white', edgecolor='black', alpha=0.8))
    
    # Plot rainfall as an inverted stem plot on a secondary y-axis.
    ax2 = ax1.twinx()
    rainfall_padded = np.pad(rainfall, (0, max_flow_length - len(rainfall)), 'constant')
    time_rain = time_values
    markerline, stemlines, baseline = ax2.stem(time_rain, rainfall_padded, 
                                               linefmt='blue', markerfmt=' ', basefmt=' ', label='Rainfall')
    plt.setp(stemlines, 'color', 'blue', 'alpha', 0.5)
    plt.setp(markerline, 'color', 'blue', 'alpha', 0.5)
    ax2.set_ylim(0.9, 0)
    
    # Set y-limits and axis labels.
    if obs_rdii is not None:
        y_max = max(np.max(weighted_flow), np.max(obs_rdii) * 1.2)
    else:
        y_max = np.max(weighted_flow)
    ax1.set_ylim(0, y_max)
    plt.xlim(left=0)
    
    ax1.set_xlabel('Time (Hours)', fontsize=12, fontweight='bold')
    ax1.set_ylabel('RDII (cfs)', fontsize=12, fontweight='bold')
    ax2.set_ylabel('Rainfall (in)', fontsize=12, fontweight='bold')
    ax1.set_title(f'Weighted RDII Flow Validation (Weight Type: {weight_type})', fontsize=14, fontweight='bold')
    
    ax1.legend(loc='upper right', bbox_to_anchor=(0.98, 0.98), fontsize=10, frameon=True)
    ax2.legend(loc='upper right', bbox_to_anchor=(0.98, 0.70), fontsize=10, frameon=True)
    ax1.grid(True)
    
    plt.tight_layout()
    
    # Save the figure if a plot name is provided.
    if plot_name is not None:
        fig.savefig(plot_name, format='png', dpi=300)
    
    plt.show()
    
    return predicted_flows_all_algorithms, weighted_flow, metrics, final_weighted_score
