# Required Libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from deap import base, creator, tools, algorithms
import random
from openpyxl import load_workbook


# Required Functions 

In [6]:
%run Optimization_RTK_Functions.ipynb

# Input

In [9]:
file_path = './CCW_event_9.xlsx'                # Sets the file path for the Excel data file.
data = pd.read_excel(file_path, skiprows=0)     # Reads the data from the Excel file into a DataFrame.

rainfall = data.iloc[:,2].dropna().tolist()     # Gets data from the 3rd column (index 2) for rainfall, drops empty rows, and converts to a list.
obs_rdii = data.iloc[:,1].tolist()              # Gets data from the 2nd column (index 1) for observed RDII flow and converts to a list.

delta_t = 600                                   # Sets the time step duration to 600 seconds (which is a 10-minute interval).
area_acres = 491.153                            # Defines the catchment area in acres.

total_rdii_period = (len(obs_rdii) - len(rainfall)) * delta_t # Calculates the decay period after rainfall stops.

### T and K Range 

In [13]:
# Allowed values for T (in seconds, 10-minute steps)
allowed_T1_values = [x * 60 for x in range(10, 121, 10)]  # 10 min to 2 hrs
allowed_T2_values = [x * 60 for x in range(20, 241, 10)]  # 20 min to 4 hrs
allowed_T3_values = [x * 60 for x in range(30, 421, 10)]  # 30 min to 7 hrs

# Allowed values for K (0.001 steps)
allowed_K1_values = [round(x, 3) for x in np.arange(1, 2.01, 0.001)]  # 1to 2
allowed_K2_values = [round(x, 3) for x in np.arange(2, 3.01, 0.001)]  # 2 to 3
allowed_K3_values = [round(x, 3) for x in np.arange(3.0, 7.01, 0.001)]  # 3 to 7


# Genetic Algorithm Optimization 

## Objective Function defination 

In [17]:
def objective_function(params):
    """
    Compute RMSE + penalty for given parameters.
    """
    # Convert flat array back to parameter tuples
    R1, T1_index, K1_index, R2, T2_index, K2_index, R3, T3_index, K3_index = params

       # **Strict Non-Negative Constraint for R Values**
    if R1 < 0 or R2 < 0 or R3 < 0:
        print("Invalid R values detected (negative), skipping iteration.")
        return (float('inf'),) 

    # Map indices to actual T and K values
    T1 = allowed_T1_values[int(T1_index)]
    T2 = allowed_T2_values[int(T2_index)]
    T3 = allowed_T3_values[int(T3_index)]

    K1 = allowed_K1_values[int(K1_index)]
    K2 = allowed_K2_values[int(K2_index)]
    K3 = allowed_K3_values[int(K3_index)]

    # Package parameters
    param_tuples = [(R1, T1, K1), (R2, T2, K2), (R3, T3, K3)]

    # Calculate simulated RDII
    sim_rdii = RDII_calculation(param_tuples, delta_t, rainfall, area_acres)

    sim_rdii = np.array(sim_rdii)

    # Check for invalid outputs
    if sim_rdii.size == 0 or np.any(np.isnan(sim_rdii)) or np.any(np.isinf(sim_rdii)):
        print("Invalid sim_rdii generated, returning large penalty.")
        return float('inf')

    # Pad the shorter array with zeros to match the length of the longer one
    max_length = max(len(obs_rdii), len(sim_rdii))
    obs_rdii_padded = np.pad(obs_rdii, (0, max_length - len(obs_rdii)), mode='constant')
    sim_rdii_padded = np.pad(sim_rdii, (0, max_length - len(sim_rdii)), mode='constant')

    # # Calculate fitness (RMSE)
    # rmse = fitness(obs_rdii, sim_rdii, delta_t)
    
    # Calculate fitness value
    fitness_value = fitness(
        obs_rdii_padded,
        sim_rdii_padded,
        delta_t,
        weight_rmse=0.25,
        weight_r2=0.25,
        weight_pbias=0.25,
        weight_nse=0.25
    )
    
    # Calculate penalties
    penalty = 0
    Ro = R_calc(rainfall, obs_rdii, delta_t, area_acres)
    
    # Enforce equality: R1 + R2 + R3 = Ro
    r_sum = R1 + R2 + R3
    penalty += 1e6 * (r_sum - Ro)**2  # quadratic penalty would be (r_sum - Ro)**2 if preferred
    
    # Temporal ordering constraints
    if not (T1 < T2 < T3):
        penalty += 1000
    if not (T1 + T1 * K1 < T2 + T2 * K2 < T3 + T3 * K3 <= total_rdii_period):
        penalty += 1000



    return (fitness_value + penalty,)  # Ensure the result is a tuple for GA algorithms

## Multiple Run GA

In [19]:
import random
from deap import base, creator, tools, algorithms

# Define the problem and fitness function (minimization)
if "FitnessMin" not in creator.__dict__:
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))

if "Individual" not in creator.__dict__:
    creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# --------- 1️⃣ Define genes (parameters) ensuring R values are non-negative ---------
def random_r():
    return random.uniform(0, 1)  # Forces R values to be between [0,1]

def random_index_t1():
    return random.randint(0, len(allowed_T1_values) - 1)

def random_index_k1():
    return random.randint(0, len(allowed_K1_values) - 1)

# --------- 2️⃣ Register functions in DEAP toolbox ---------
toolbox.register("R", random_r)
toolbox.register("T1_index", random_index_t1)
toolbox.register("T2_index", random_index_t1)
toolbox.register("T3_index", random_index_t1)
toolbox.register("K1_index", random_index_k1)
toolbox.register("K2_index", random_index_k1)
toolbox.register("K3_index", random_index_k1)

# --------- 3️⃣ Define Individual and Population ---------
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.R, toolbox.T1_index, toolbox.K1_index,
                  toolbox.R, toolbox.T2_index, toolbox.K2_index,
                  toolbox.R, toolbox.T3_index, toolbox.K3_index),
                 n=1)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# --------- 4️⃣ Register evaluation, mutation, crossover, and selection ---------
toolbox.register("evaluate", objective_function)  # Ensure it returns a tuple!
toolbox.register("mate", tools.cxTwoPoint)  # Two-point crossover

# --------- 5️⃣ Modify Mutation to Ensure \( R \geq 0 \) ---------
def mutate(individual):
    """
    Custom mutation function that ensures R values remain non-negative.
    """
    for i in range(0, len(individual), 3):  # R values are at positions 0, 3, 6
        if random.random() < 0.2:  # Mutation probability
            individual[i] = random.uniform(0, 1)  # Ensure R stays in [0,1]

    # Mutate indices (T, K)
    for i in range(1, len(individual)):  # Skip R values
        if random.random() < 0.2:
            if i in [1, 4, 7]:  # T indices
                individual[i] = random.randint(0, len(allowed_T1_values) - 1)
            elif i in [2, 5, 8]:  # K indices
                individual[i] = random.randint(0, len(allowed_K1_values) - 1)

    return individual,

toolbox.register("mutate", mutate)  # Use the custom mutation

toolbox.register("select", tools.selTournament, tournsize=3)

In [None]:
# Number of runs
n_runs = 20  # Set the number of runs

# Lists to store results
all_params = []  # To store parameter values
all_fitness_scores = []  # To store fitness scores

for run in range(n_runs):
    print(f"Running Genetic Algorithm: Run {run + 1}/{n_runs}")
    
    # Initialize population
    population = toolbox.population(n=100)  # Population size

    # Run the GA
    result, log = algorithms.eaSimple(
        population, toolbox,
        cxpb=0.5,  # Crossover probability
        mutpb=0.20,  # Mutation probability
        ngen=100,  # Number of generations
        verbose=False  # Set to False to avoid excessive output
    )

    # Extract the best solution
    best_individual = tools.selBest(population, k=1)[0]
    best_params = best_individual

    # Extract parameter indices
    R1, T1_index, K1_index, R2, T2_index, K2_index, R3, T3_index, K3_index = best_params

    # Map indices to actual values
    T1 = allowed_T1_values[int(T1_index)]
    T2 = allowed_T2_values[int(T2_index)]
    T3 = allowed_T3_values[int(T3_index)]

    K1 = allowed_K1_values[int(K1_index)]
    K2 = allowed_K2_values[int(K2_index)]
    K3 = allowed_K3_values[int(K3_index)]

    # Extract the best fitness function value
    best_fitness_value = best_individual.fitness.values[0]

    # Final best parameter set
    best_params_actual = [(R1, T1, K1), (R2, T2, K2), (R3, T3, K3)]
    
    # Store results
    all_params.append(best_params_actual)
    all_fitness_scores.append(best_fitness_value)

print("Completed all runs.")




In [None]:
# Create DataFrame with run, parameters, and fitness score
columns = ['Run', 'R1', 'T1', 'K1', 'R2', 'T2', 'K2', 'R3', 'T3', 'K3', 'Fitness']
results_data = []

for run, (params, fitness) in enumerate(zip(all_params, all_fitness_scores), start=1):
    results_data.append([
        run,
        params[0][0], params[0][1], params[0][2],  # R1, T1, K1
        params[1][0], params[1][1], params[1][2],  # R2, T2, K2
        params[2][0], params[2][1], params[2][2],  # R3, T3, K3
        fitness  # Fitness value
    ])

results_df = pd.DataFrame(results_data, columns=columns)

# Display results
print("\nResults from all runs:")
print(results_df)

# Save results to a CSV file if needed
#results_df.to_excel("genetic_algorithm_results_event5.xlsx", index=False)

In [None]:
mean_values = results_df.mean()
std_values = results_df.std()
print("Mean values of parameters:")
print(mean_values)
print("\nStandard deviation of parameters:")
print(std_values)

## Saving RTK parameters

In [None]:
# Define the Excel file name
excel_filename = "RTK_Parameters_all_algorithms_Ro_constraint_E9_CCW.xlsx"
sheet_name = "GA"

# Check if "Run" column exists, and add it only if not present
if "Run" not in results_df.columns:
    results_df.insert(0, "Run", range(1, len(results_df) + 1))

# Select the desired columns
export_columns = ["Run", "R1", "T1", "K1", "R2", "T2", "K2", "R3", "T3", "K3"]

# Check if the Excel file exists
try:
    with pd.ExcelWriter(excel_filename, engine="openpyxl", mode="a") as writer:
        results_df[export_columns].to_excel(writer, sheet_name=sheet_name, index=False)
except FileNotFoundError:
    # If the file does not exist, create a new one
    with pd.ExcelWriter(excel_filename, engine="openpyxl", mode="w") as writer:
        results_df[export_columns].to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Results successfully saved to {excel_filename} in sheet {sheet_name}.")

In [None]:
plot=RDII_combined_plot(all_params, delta_t, rainfall, area_acres, obs_rdii)

## Saving Metrics

In [None]:
criteria=calculate_criteria_multiple_runs(all_params, obs_rdii, delta_t, rainfall, area_acres)

In [None]:
# Define the Excel file and sheet name
excel_filename = "Metrices_all_algorithms_Ro_E9_CCW.xlsx"
criteria_sheet_name = "GA"

# Select only the first 20 rows
criteria_df = criteria.iloc[:20].copy()

# Ensure "Run" column is present in criteria_df
if "Run" not in criteria_df.columns:
    criteria_df.insert(0, "Run", range(1, len(criteria_df) + 1))

# Add the "Fitness" column from results_df (first 20 rows)
criteria_df["Fitness"] = results_df.loc[:19, "Fitness"].values

# Select the required columns
export_columns_criteria = ["Run", "RMSE", "R2", "PBIAS", "NSE", "Fitness"]

# Check if the Excel file exists and append or create a new file
try:
    with pd.ExcelWriter(excel_filename, engine="openpyxl", mode="a") as writer:
        criteria_df[export_columns_criteria].to_excel(writer, sheet_name=criteria_sheet_name, index=False)
except FileNotFoundError:
    with pd.ExcelWriter(excel_filename, engine="openpyxl", mode="w") as writer:
        criteria_df[export_columns_criteria].to_excel(writer, sheet_name=criteria_sheet_name, index=False)

print(f"Criteria successfully saved to {excel_filename} in sheet {criteria_sheet_name}.")