In [20]:
import pandas as pd
import random
import numpy as np
from joblib import load

# Number of individuals in each generation
POPULATION_SIZE = 100

# Load model
model = load("xgboost_model_90_10.pkl")

# Columns
fixed_columns = [
    'ni_in', 'fe_in', 'cao_in', 'al2o3_in', 's_m', 'bc', 'mc_kilnfeed',
    'fc_coal', 'gcv_coal', 'kg_tco', 'charge_kiln', 'tdo'
]
input_columns = [
    'ni_in', 'fe_in', 'cao_in', 'al2o3_in', 's_m', 'bc', 'mc_kilnfeed',
    'fc_coal', 'gcv_coal', 'current_pry', 'current_sec1', 'current_sec2',
    'current_sec3', 'load', 'power_factor', 'realisasi_beban', 'rpm',
    'kg_tco', 'charge_kiln', 'tdo', 'pry_p', 'sec_p', 'pry_v', 'sec_v',
    'total_coal', 'total_fuel', 'a_f_ratio', 'reductor_consume', 't_tic162',
    't_tic163'
]
output_columns = [
    'metal_temp', 'ni_met', 'c_met', 'si_met', 'fe_met', 's_met',
    'ni_slag', 'fe_slag', 't_kalsin', 'pic_161', 'loi_kalsin'
]

# Optimized columns (input_columns minus fixed_columns)
optimized_columns = [col for col in input_columns if col not in fixed_columns]

# Load dataframe
df = pd.read_csv("filtered_intersection_df_interpolated.csv")
df.drop(columns=["time"], inplace=True)
col_stds = df[optimized_columns].std()
col_mins = df[optimized_columns].min()
col_maxs = df[optimized_columns].max()

# Pick one row as the current situation
current_row = df.sample(1, random_state=42).iloc[0]
actual_outputs = current_row[output_columns].values
current_total_coal = current_row['total_coal']
current_total_fuel = current_row['total_fuel']

In [21]:
print(df.isnull().sum())

ni_in               0
fe_in               0
cao_in              0
al2o3_in            0
s_m                 0
bc                  0
mc_kilnfeed         0
fc_coal             0
gcv_coal            0
current_pry         0
current_sec1        0
current_sec2        0
current_sec3        0
load                0
power_factor        0
realisasi_beban     0
rpm                 0
kg_tco              0
charge_kiln         0
tdo                 0
pry_p               0
sec_p               0
pry_v               0
sec_v               0
total_coal          0
total_fuel          0
a_f_ratio           0
reductor_consume    0
t_tic162            0
t_tic163            0
metal_temp          0
ni_met              0
c_met               0
si_met              0
fe_met              0
s_met               0
ni_slag             0
fe_slag             0
t_kalsin            0
pic_161             0
loi_kalsin          0
dtype: int64


In [22]:
# Gene creation: random real values between min and max of each optimized column
def create_chromosome():
    return [
        random.uniform(col_mins[col], col_maxs[col])
        for col in optimized_columns
    ]

# Calculate fitness: difference between predicted and actual outputs, penalizing high fuel/coal usage
def calc_fitness(chromosome):
    # Build row for prediction
    data_for_prediction = current_row[fixed_columns].copy()

    # Replace optimized columns with chromosome
    o_dict = dict(zip(optimized_columns, chromosome))
    for col in optimized_columns:
        data_for_prediction[col] = o_dict[col]

    # Ensure correct feature names for XGBoost
    data_for_prediction = data_for_prediction[model.get_booster().feature_names]
    data_for_prediction = pd.DataFrame([data_for_prediction])

    # Predict
    preds = model.predict(data_for_prediction)[0]

    # Sum of absolute differences
    fitness = abs(preds - actual_outputs).sum()

    # Penalize high total_coal and total_fuel values
    total_coal = o_dict["total_coal"]
    total_fuel = o_dict["total_fuel"]
    
    penalty = 0
    if total_coal > current_total_coal:
        penalty += (total_coal - current_total_coal) * 10  # Adjust penalty weight
    if total_fuel > current_total_fuel:
        penalty += (total_fuel - current_total_fuel) * 10  # Adjust penalty weight

    return fitness + penalty

class Individual:
    def __init__(self, chromosome):
        self.chromosome = chromosome
        self.fitness = calc_fitness(chromosome)

    def mate(self, partner):
        child_chrom = []
        for c1, c2 in zip(self.chromosome, partner.chromosome):
            prob = random.random()
            if prob < 0.40:
                child_chrom.append(c1)
            elif prob < 0.80:
                child_chrom.append(c2)
            else:
                # mutation
                idx = len(child_chrom)
                col_name = optimized_columns[idx]
                child_chrom.append(random.uniform(col_mins[col_name], col_maxs[col_name]))
        return Individual(child_chrom)

def main():
    # Initialize population
    population = [Individual(create_chromosome()) for _ in range(POPULATION_SIZE)]
    generation = 1
    MAX_GEN = 200
    print("Starting genetic algorithm...")

    for _ in range(MAX_GEN):
        population = sorted(population, key=lambda x: x.fitness)
        best_ind = population[0]
        print(f"Gen {generation} | Best fitness: {best_ind.fitness:.4f}")

        # Generate new population
        new_generation = []
        s_elite = int(0.1 * POPULATION_SIZE)
        new_generation.extend(population[:s_elite])
        s_mate = POPULATION_SIZE - s_elite

        for __ in range(s_mate):
            parent1 = random.choice(population[:50])
            parent2 = random.choice(population[:50])
            child = parent1.mate(parent2)
            new_generation.append(child)

        population = new_generation
        generation += 1

    # Final result
    final_best = sorted(population, key=lambda x: x.fitness)[0]
    
    # Compare current vs optimized values
    optimized_values = dict(zip(optimized_columns, final_best.chromosome))
    optimized_total_coal = optimized_values["total_coal"]
    optimized_total_fuel = optimized_values["total_fuel"]

    print("\n===== Comparison of Total Coal and Total Fuel =====")
    print(f"Total Coal (Current)   : {current_total_coal:.4f}")
    print(f"Total Coal (Optimized) : {optimized_total_coal:.4f} (↓ {current_total_coal - optimized_total_coal:.4f})")
    print(f"Total Fuel (Current)   : {current_total_fuel:.4f}")
    print(f"Total Fuel (Optimized) : {optimized_total_fuel:.4f} (↓ {current_total_fuel - optimized_total_fuel:.4f})")

    print("\n===== Optimized Parameters =====")
    for col, val in optimized_values.items():
        print(f"{col}: {val:.4f}")

if __name__ == "__main__":
    main()

Starting genetic algorithm...
Gen 1 | Best fitness: 2367.0101
Gen 2 | Best fitness: 2367.0101
Gen 3 | Best fitness: 2366.6642
Gen 4 | Best fitness: 2366.6642
Gen 5 | Best fitness: 2366.0612
Gen 6 | Best fitness: 2366.0612
Gen 7 | Best fitness: 2366.0612
Gen 8 | Best fitness: 2366.0612
Gen 9 | Best fitness: 2366.0612
Gen 10 | Best fitness: 2365.8973
Gen 11 | Best fitness: 2365.8973
Gen 12 | Best fitness: 2365.8973
Gen 13 | Best fitness: 2365.8973
Gen 14 | Best fitness: 2365.8973
Gen 15 | Best fitness: 2365.8973
Gen 16 | Best fitness: 2365.8973
Gen 17 | Best fitness: 2365.8973
Gen 18 | Best fitness: 2365.8287
Gen 19 | Best fitness: 2365.8287
Gen 20 | Best fitness: 2365.8287
Gen 21 | Best fitness: 2365.8287
Gen 22 | Best fitness: 2365.6470
Gen 23 | Best fitness: 2365.5700
Gen 24 | Best fitness: 2365.5700
Gen 25 | Best fitness: 2365.5700
Gen 26 | Best fitness: 2365.5700
Gen 27 | Best fitness: 2365.5700
Gen 28 | Best fitness: 2365.5700
Gen 29 | Best fitness: 2365.5700
Gen 30 | Best fitness: