In [None]:
import pandas as pd
import numpy as np
from scipy.integrate import solve_ivp
from deap import base, creator, tools, algorithms
import random
import multiprocessing

# Define the ASM1 function
def ASM1(t, y, mu_AOB, mu_NOB, K_O2_NOB, K_NO2_NOB):
    S_O2, S_s, S_NH4, S_NH2OH, S_NO2, S_NO3, S_NO, S_N2O, S_N2, X_s, X_H, X_AOB, X_NOB, X_I = y

    # Fixed parameters
    Y_AOB = 0.150  # Yield coefficient for AOB, g COD/g N
    Y_NOB = 0.041  # Yield coefficient for NOB, g COD/g N
    Y_H = 0.6      # Yield coefficient for HB, g COD/g COD
    i_NBM = 0.07   # Nitrogen content of biomass, g N/g COD
    i_NXI = 0.02   # Nitrogen content of XI, g N/g COD
    i_NXS = 0.04   # Nitrogen content of XS, g N/g COD
    f_I = 0.10     # Fraction of XI in biomass decay, g COD/g COD

    # Fixed parameters
    eta_AOB = 0.285
    b_AOB = 0.0054

    K_S1_O2_AOB = 0.4
    K_S2_O2_AOB = 0.073
    K_NH4_AOB = 2.4
    K_NH2OH_AOB = 2.4
    K_NO_AOB = 0.0084

    b_NOB = 0.0025
    # K_O2_NOB and K_NO2_NOB are now optimized parameters

    k_H = 0.125
    K_X = 1.0
    mu_H = 0.26
    b_H = 0.017

    eta_H1 = 0.28
    eta_H2 = 0.16
    eta_H3 = 0.35
    eta_H4 = 0.35

    K_OH1 = 0.1
    K_OH2 = 0.1
    K_OH3 = 0.1
    K_OH4 = 0.1
    K_OH5 = 0.1

    K_S1 = 20
    K_S2 = 20
    K_S3 = 20
    K_S4 = 20
    K_S5 = 40

    K_NO3_HB = 0.2
    K_NO2_HB = 0.2
    K_NO_HB = 0.05
    K_N2O_HB = 0.05

    K_I1_NO_HB = 0.5
    K_I2_NO_HB = 0.3
    K_I3_NO_HB = 0.075

    n = 1.65  # Example value, adjust as needed

    # System matrix
    A = np.array([
        [-1.14, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [-(1.71 - Y_AOB) / Y_AOB, 0, -i_NBM, -1 / Y_AOB, 0, 0, 1/Y_AOB, 0, 0, 0, 0, 1, 0, 0],
        [-(0.57 - Y_AOB) / Y_AOB, 0, -i_NBM, 0, 1/Y_AOB, 0, -1/Y_AOB, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, -1, 1, 0, -4, 4, 0, 0, 0, 0, 0, 0],
        [0, 0, i_NBM - i_NXI*f_I, 0, 0, 0, 0, 0, 0, 1 - f_I, 0, -1, 0, f_I],
        [-(1.14 - Y_NOB)/ Y_NOB, 0, -i_NBM, 0, -1 / Y_NOB, 1 / Y_NOB, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, i_NBM - i_NXI*f_I, 0, 0, 0, 0, 0, 0, 1-f_I, 0, 0, -1, f_I],
        [0, 1, i_NXS, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0],
        [-(1-Y_H)/Y_H, -1/Y_H, -i_NBM, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, -1/Y_H, -i_NBM, 0, (1-Y_H)/(1.14*Y_H), -(1-Y_H)/(1.14*Y_H), 0, 0, 0, 0, 1, 0, 0, 0],
        [0, -1/Y_H, -i_NBM, 0, -(1-Y_H)/(0.57*Y_H), 0, (1-Y_H)/(0.57*Y_H), 0, 0, 0, 1, 0, 0, 0],
        [0, -1/Y_H, -i_NBM, 0, 0, 0, -(1-Y_H)/(0.57*Y_H), (1-Y_H)/(0.57*Y_H), 0, 0, 1, 0, 0, 0],
        [0, -1/Y_H, -i_NBM, 0, 0, 0, 0, -(1-Y_H)/(0.57*Y_H), (1-Y_H)/(0.57*Y_H), 0, 1, 0, 0, 0],
        [0, 0, i_NBM - i_NXI*f_I, 0, 0, 0, 0, 0, 0, 1-f_I, -1, 0, 0, f_I]
    ]).T

    # Reaction rate equations
    B = [
        mu_AOB * S_O2 / (K_S1_O2_AOB + S_O2) * (S_NH4 / (K_NH4_AOB + S_NH4)) * X_AOB,
        (mu_AOB_HAO * S_O2) / (K_S2_O2_AOB + S_O2) * (S_NH2OH / (K_NH2OH_AOB + S_NH2OH)) * X_AOB,
        (mu_AOB_HAO * S_O2) / (K_S2_O2_AOB + S_O2) * (S_NO / (K_NO_AOB + S_NO)) * X_AOB,
        eta_AOB * (mu_AOB_HAO * S_NO) / (K_NO_AOB + S_NO) * S_NH2OH / (K_NH2OH_AOB + S_NH2OH) * X_AOB,
        b_AOB * X_AOB,
        mu_NOB * S_O2 * S_NO2 * X_NOB / (K_O2_NOB + S_O2) / (K_NO2_NOB + S_NO2),
        b_NOB * X_NOB,
        k_H * X_s / X_H / (K_X + X_s / X_H) * X_H,
        (mu_H * S_O2) / (K_OH1 + S_O2) * (S_s / (K_S1 + S_s)) * X_H,
        mu_H * eta_H1 * (K_OH2 / (K_OH2 + S_O2)) * (S_NO3 / (K_NO3_HB + S_NO3)) * (S_s / (K_S2 + S_s)) * X_H,
        mu_H * eta_H2 * (K_OH3 / (K_OH3 + S_O2)) * (S_NO2 / (K_NO2_HB + S_NO2)) * (S_s / (K_S3 + S_s)) * (K_I1_NO_HB / (K_I1_NO_HB + S_NO)) * X_H,
        mu_H * eta_H3 * (K_OH4 / (K_OH4 + S_O2)) * (S_NO / (K_NO_HB + S_NO + S_NO * S_NO / K_I2_NO_HB)) * (S_s / (K_S4 + S_s)) * X_H,
        mu_H * eta_H4 * (K_OH5 / (K_OH5 + S_O2)) * (S_N2O / (K_N2O_HB + S_N2O)) * (S_s / (K_S5 + S_s)) * (K_I3_NO_HB / (K_I3_NO_HB + S_NO)) * X_H,
        b_H * X_H
    ]

    return [np.dot(A[i, :], B) for i in range(14)]

# Load data
influent_data = pd.read_csv("influent.csv")
location1_data = pd.read_csv("location1.csv")
location2_data = pd.read_csv("location2.csv")
location3_data = pd.read_csv("location3.csv")
location4_data = pd.read_csv("location4.csv")
location5_data = pd.read_csv("location5.csv")
location6_data = pd.read_csv("location6.csv")

# Check data consistency
num_samples = len(influent_data)
assert len(location1_data) == num_samples, "Location 1 data row count does not match influent data."
assert len(location2_data) == num_samples, "Location 2 data row count does not match influent data."
assert len(location3_data) == num_samples, "Location 3 data row count does not match influent data."
assert len(location4_data) == num_samples, "Location 4 data row count does not match influent data."
assert len(location5_data) == num_samples, "Location 5 data row count does not match influent data."
assert len(location6_data) == num_samples, "Location 6 data row count does not match influent data."

# Reaction times corresponding to locations 1-6
reaction_times = [3, 6, 9, 12, 15, 18]  # in hours

# Define the fitness function
def evaluate_model(individual):
    mu_AOB, mu_NOB, K_O2_NOB, K_NO2_NOB = individual
    total_mse = 0.0

    for index in range(num_samples):
        # Get current sample's initial conditions
        y0 = influent_data.iloc[index].values

        # Define time span and evaluation points
        t_span = (0, max(reaction_times))
        t_eval = reaction_times

        # Solve the differential equations
        try:
            sol = solve_ivp(
                ASM1,
                t_span,
                y0,
                args=(mu_AOB, mu_NOB, K_O2_NOB, K_NO2_NOB),
                t_eval=t_eval,
                method='LSODA'
            )
        except Exception as e:
            # Assign a large error if the solver fails
            return (1e6,)

        if not sol.success:
            # Assign a large error if the solver did not converge
            return (1e6,)

        # Get model outputs
        y = sol.y.T  # Transpose to align each row with a time point

        # Compare model outputs with observed data
        for i, t in enumerate(reaction_times):
            # Get model output
            model_output = y[i, :]

            # Get corresponding observed data
            if i == 0:
                observed = location1_data.iloc[index].values
            elif i == 1:
                observed = location2_data.iloc[index].values
            elif i == 2:
                observed = location3_data.iloc[index].values
            elif i == 3:
                observed = location4_data.iloc[index].values
            elif i == 4:
                observed = location5_data.iloc[index].values
            elif i == 5:
                observed = location6_data.iloc[index].values
            else:
                continue

            # Calculate MSE
            mse = np.mean((model_output - observed) ** 2)
            total_mse += mse

    return (total_mse,)

# Set up the Genetic Algorithm
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))  # Minimizing MSE
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# Define parameter ranges based on domain knowledge
mu_AOB_min, mu_AOB_max = 0.05, 1.0        # Adjust as needed
mu_NOB_min, mu_NOB_max = 0.05, 1.0
K_O2_NOB_min, K_O2_NOB_max = 0.1, 10.0   # Adjust as needed
K_NO2_NOB_min, K_NO2_NOB_max = 0.1, 10.0  # Adjust as needed

# Attribute generators
toolbox.register("attr_mu_AOB", random.uniform, mu_AOB_min, mu_AOB_max)
toolbox.register("attr_mu_NOB", random.uniform, mu_NOB_min, mu_NOB_max)
toolbox.register("attr_K_O2_NOB", random.uniform, K_O2_NOB_min, K_O2_NOB_max)
toolbox.register("attr_K_NO2_NOB", random.uniform, K_NO2_NOB_min, K_NO2_NOB_max)

# Structure initializers
toolbox.register("individual", tools.initCycle, creator.Individual, 
                 (toolbox.attr_mu_AOB, toolbox.attr_mu_NOB, toolbox.attr_K_O2_NOB, toolbox.attr_K_NO2_NOB), n=1)
toolbox.register("population", tools.initPopulation, list, toolbox.individual)

# Register the evaluation function
toolbox.register("evaluate", evaluate_model)

# Register genetic operators
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

def main():
    population_size = 50
    num_generations = 100  # Set a large enough number
    cx_prob = 0.7  # Crossover probability
    mut_prob = 0.2  # Mutation probability
    patience = 10  # Number of generations with no improvement to wait before stopping
    improvement_threshold = 1e-6  # Minimum improvement to reset patience

    pop = toolbox.population(n=population_size)

    # Use multiprocessing to speed up evaluations
    pool = multiprocessing.Pool()
    toolbox.register("map", pool.map)

    # Initialize tracking variables
    best_fitness = None
    no_improve_count = 0

    for gen in range(num_generations):
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        offspring = list(map(toolbox.clone, offspring))

        # Apply crossover on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < cx_prob:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        # Apply mutation on the offspring
        for mutant in offspring:
            if random.random() < mut_prob:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Replace the current population with the offspring
        pop[:] = offspring

        # Gather all the fitnesses in one list and print stats
        fits = [ind.fitness.values[0] for ind in pop]
        min_fit = min(fits)
        avg_fit = np.mean(fits)
        max_fit = max(fits)

        print(f"Generation {gen+1}: Min MSE = {min_fit}, Avg MSE = {avg_fit}, Max MSE = {max_fit}")

        # Check for improvement
        if best_fitness is None or best_fitness - min_fit > improvement_threshold:
            best_fitness = min_fit
            no_improve_count = 0
            # Save the current best individual
            best_individual = tools.selBest(pop, 1)[0]
        else:
            no_improve_count += 1
            print(f"No significant improvement, patience count: {no_improve_count}/{patience}")

        # Check if patience has been exhausted
        if no_improve_count >= patience:
            print(f"No improvement in the last {patience} generations. Stopping optimization.")
            break

    pool.close()
    pool.join()

    # Retrieve and print the best individual
    best_ind = tools.selBest(pop, 1)[0]
    print(f"Best Individual: {best_ind}")
    print(f"Best Fitness (Total MSE): {best_ind.fitness.values[0]}")

    # Save the results to a file
    with open("C:/Users/Van/Desktop/ga_results.txt", "w") as f:
        f.write(f"Best Individual: {best_ind}\n")
        f.write(f"Best Fitness (Total MSE): {best_ind.fitness.values[0]}\n")

if __name__ == "__main__":
    main()
