In [None]:
# Import necessary libraries

import numpy as np
import pandas as pd
from pycaret.regression import *
! pip3 install pygad
import pygad

In [None]:
# Load the dataset
df = pd.read_csv("Database.csv")

# Select relevant columns for optimization
df_optimize = df[['MB Contact Angle (°)', 'DS MW (g mol-1)', 'DS Concentration (M)',
                  'Operating Time (h)', 'Cross Flow Velocity (cm/s)', 'Temperature (⁰C)',
                  'Water Flux, Jw (LMH)']]

In [None]:
# Initialize PyCaret setup
setup(data=df_optimize, target='Water Flux, Jw (LMH)', session_id=123)

In [None]:
# Define the fitness function
def fitness_function(ga_instance, variables, solution_idx):
    # Set the operating conditions based on the solution
    mb_contact_angle = variables[0]
    ds_mw = variables[1]
    ds_concentration = variables[2]
    operating_time = variables[3]
    cross_flow_velocity = variables[4]
    temperature = variables[5]

    # Set the operating conditions in the dataset
    df['MB Contact Angle (°)'] = mb_contact_angle
    df['DS MW (g mol-1)'] = ds_mw
    df['DS Concentration (M)'] = ds_concentration
    df['Operating Time (h)'] = operating_time
    df['Cross Flow Velocity (cm/s)'] = cross_flow_velocity
    df['Temperature (⁰C)'] = temperature
    
    # Train the machine learning model
    gbr = create_model('gbr')

    # Make predictions
    predictions = predict_model(gbr, data=df)

    # Calculate the fitness value (e.g., maximize water flux)
    fitness_value = -predictions['Water Flux, Jw (LMH)'].mean()  # Negative sign for maximization

    return fitness_value

In [None]:
# Define the bounds for each operating condition
bounds = np.array([
    [0, 110],  # MB Contact Angle (°)
    [40, 400],  # DS MW (g mol-1)
    [0.1,5],  # DS Concentration (M)
    [0.5, 48],  # Operating Time (h)
    [5, 100],  # Cross Flow Velocity (cm/s)
    [10, 50]   # Temperature (⁰C)
])

num_generations = 10
population_size = 50
mutation_prob = 0.1
crossover_prob = 0.5

In [None]:
# Create an instance of the pygad.GA class
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=10,
                       sol_per_pop=population_size,
                       num_genes=len(bounds),
                       fitness_func=fitness_function,
                       gene_type=np.float32,
                       gene_space=bounds,
                       mutation_percent_genes=mutation_prob,
                       crossover_probability=crossover_prob,
                       random_mutation_min_val=0,
                       random_mutation_max_val=1)

# Run the genetic algorithm optimization
ga_instance.run()

In [None]:
# Get the best solution and its fitness value
best_solution, best_fitness = ga_instance.best_solution(), ga_instance.best_solution_fitness()

# Print the best solution and its fitness value
print("Best Solution:", best_solution)
print("Best Fitness:", best_solution_fitness)