# Capacity allocation

## 0. Load libraries

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import shutil

from functools import reduce
from mealpy import IntegerVar
from mealpy.evolutionary_based import DE, ES, GA
from mealpy.physics_based import SA
from mealpy.swarm_based import ABC, ACOR, GWO, PSO, WOA
from operator import mul
from pathlib import Path

from benchmarks.generator import get_revenue_behaviour_deprecated
from benchmarks.robin_railway import RevenueMaximization
from benchmarks.utils import sns_box_plot, sns_line_plot, int_input, get_schedule_from_supply, infer_line_stations, get_services_by_tsp_df, plot_marey_chart

from robin.scraping.entities import SupplySaver
from robin.services_generator.entities import ServiceGenerator
from robin.supply.entities import Supply
from src.entities import Solution
from src.timetabling_problem import MPTT

In [None]:
# Config files
supply_config_path = Path("../../configs/generator/supply_config.yml")
demand_config_path = Path("../../configs/demand/demand.yml")
generator_config_path = Path("../../configs/generator/generator_config.yml")

# Save paths
generator_save_path = Path(f'../../data/generator/supply_dummy.yml')
supply_save_path = '../../configs/mealpy/'
robin_save_path = '../../data/output/robin/'
figures = '../figures/'

# Clean save paths directories
if Path(generator_save_path.parent).exists():
    shutil.rmtree(generator_save_path.parent)

Path(generator_save_path.parent).mkdir(parents=True)

if Path(supply_save_path).exists():
    shutil.rmtree(supply_save_path)
    
Path(supply_save_path).mkdir(parents=True)

#if Path(figures).exists():
#    shutil.rmtree(figures)
#Path(figures).mkdir(parents=True)

In [None]:
seed = 21

if generator_config_path:
    n_services = int_input("Number of services to generate: ")
    generator = ServiceGenerator(supply_config_path=supply_config_path)
    _ = generator.generate(file_name=generator_save_path,
                           path_config=generator_config_path,
                           n_services=n_services,
                           seed=seed)
    print(f'Number of service requests generated: {len(_)}')

In [None]:
supply = Supply.from_yaml(generator_save_path)
tsp_df = get_services_by_tsp_df(supply.services)

print(tsp_df)

In [None]:
print("Services: ", len(supply.services))
requested_schedule = get_schedule_from_supply(generator_save_path)
revenue_behaviour = get_revenue_behaviour_deprecated(supply)
lines = supply.lines
line = infer_line_stations(lines)

In [None]:
plot_marey_chart(requested_supply=supply,
                 colors_by_tsp=True,
                 main_title="Marey chart - 25 requests",
                 plot_security_gaps=True,
                 security_gap=10,
                 save_path=Path('../../reports/mealpy/marey_chart_requests_25.pdf'))

## 1. Mealpy

In [None]:
mptt = MPTT(requested_schedule=requested_schedule,
            revenue_behaviour=revenue_behaviour,
            line=line,
            safe_headway=10)

In [None]:
mp_algos = {"Genetic Algorithm": GA.BaseGA,
            "Particle Swarm Optimization": PSO.OriginalPSO,
            "Simulated Annealing": SA.OriginalSA,
            "Differential Evolution": DE.OriginalDE,
            "Ant Colony Optimization Continuous (ACOR)": ACOR.OriginalACOR,
            "Covariance Matrix Adaptation Evolution Strategy": ES.CMA_ES,
            "Artificial Bee Colony": ABC.OriginalABC,
            "Grey Wolf Optimizer": GWO.OriginalGWO,
            "Whale Optimization Algorithm": WOA.OriginalWOA,
            "Hybrid Grey Wolf - Whale Optimization Algorithm": GWO.GWO_WOA}

In [None]:
opti_params = {"Genetic Algorithm": {"epoch": 500, "pop_size": 70},
               "Particle Swarm Optimization": {"epoch": 500, "pop_size": 80},
               "Simulated Annealing": {"epoch": 500, "pop_size": 90},
               "Differential Evolution": {"epoch": 300, "pop_size": 100},
               "Ant Colony Optimization Continuous (ACOR)": {"epoch": 500, "pop_size": 10},
               "Covariance Matrix Adaptation Evolution Strategy": {"epoch": 250, "pop_size": 80},
               "Artificial Bee Colony": {"epoch": 450, "pop_size": 60},
               "Grey Wolf Optimizer": {"epoch": 500, "pop_size": 100},
               "Whale Optimization Algorithm": {"epoch": 400, "pop_size": 80},
               "Hybrid Grey Wolf - Whale Optimization Algorithm": {"epoch": 450, "pop_size": 50}
               }

In [None]:
import os

max_pop_size = max([opti_params[algo]["pop_size"] for algo in opti_params])

runs = 5
seed_initializer = 29

# 0. Problem formulation
lb, ub = zip(*mptt.boundaries.real)

problem = {"obj_func": mptt.objective_function,
           "bounds": IntegerVar(lb=lb,
                              ub=ub),
           "minmax": "max",
           "save_population": True}

# Configuración de archivos CSV
history_csv = 'history_results.csv'
population_csv = 'population_results.csv'

# Crear archivos CSV con encabezados si no existen
if not os.path.exists(history_csv):
    pd.DataFrame(columns=['Run', 'Algo', 'Iteration', 'Fitness', 'ExecutionTime', 'Discrete', 
                          'Real', 'Diversity', 'Exploitation', 'Exploration']
                 ).to_csv(history_csv, sep=';', index=False)

if not os.path.exists(population_csv):
    population_columns = ['Run', 'Algo', 'Iteration'] + \
                         [f'real_{i}' for i in range(max_pop_size)] + \
                         [f'discrete_{i}' for i in range(max_pop_size)]
    pd.DataFrame(columns=population_columns).to_csv(population_csv, sep=';' , index=False)

# Ejecución principal
for algo_name, algo in mp_algos.items():
    
    # Re-inicializar semilla
    np.random.seed(seed_initializer)
    
    print(f"Running {algo_name}")
    for r in range(1, runs+1):
        print("\tRun: ", r)
        
        run_seed = np.random.randint(0, 1000)
        
        # Ejecutar algoritmo
        if algo_name == "Simulated Annealing":
            optimizer = algo(reduce(mul, opti_params[algo_name].values()))
            optimizer.solve(problem, seed=run_seed)
        else:
            optimizer = algo(**opti_params[algo_name])
            optimizer.solve(problem, seed=run_seed)
            
        # Guardar historial
        run_history = []
        for i, data in enumerate(optimizer.history.list_global_best):
            mptt.update_schedule(data.solution)
            run_history.append([
                r, algo_name, i, data.target.fitness, optimizer.history.list_epoch_time[i], 
                mptt.get_heuristic_schedule(), data.solution, 
                optimizer.history.list_diversity[i], optimizer.history.list_exploitation[i], 
                optimizer.history.list_exploration[i]
            ])
        
        # Guardar en CSV
        pd.DataFrame(run_history, columns=['Run', 'Algo', 'Iteration', 'Fitness', 'ExecutionTime', 
                                           'Discrete', 'Real', 'Diversity', 'Exploitation', 'Exploration']
                    ).to_csv(history_csv, sep=';', mode='a', header=False, index=False)
        
        # Guardar población
        if algo_name == "Simulated Annealing":
            continue
        
        run_population = []
        for i, pop_epoch in enumerate(optimizer.history.list_population):
            real = []
            discrete = []
            for individual in pop_epoch:
                mptt.update_schedule(individual.solution)
                real.append(individual.solution)
                discrete.append(mptt.get_heuristic_schedule())
            
            if len(real) < max_pop_size:
                real += [np.nan] * (max_pop_size - len(real))
                discrete += [np.nan] * (max_pop_size - len(discrete))
                
            run_population.append([r, algo_name, i, *real, *discrete])
        
        # Guardar en CSV
        population_columns = ['Run', 'Algo', 'Iteration'] + \
                             [f'real_{i}' for i in range(max_pop_size)] + \
                             [f'discrete_{i}' for i in range(max_pop_size)]
        
        pd.DataFrame(run_population, columns=population_columns).to_csv(population_csv, sep=';', mode='a', header=False, index=False) 

## 2. Results analysis

In [None]:
df_history_old = pd.read_csv('../../reports/mealpy_untracked/df_history_5_seed.csv', sep=";")



In [None]:
df_history = pd.read_csv('../history_results.csv', sep=";")

In [None]:
df_population = pd.read_csv('../population_results.csv', sep=";", low_memory=False)

In [None]:

def transform_columns(df):
    columns_to_transform = df.filter(like="Real").columns

    df[columns_to_transform] = df[columns_to_transform].apply(
        lambda col: col.map(lambda x: np.array(list(map(float, x[1:-1].split())), dtype=np.int32))
    )

    columns_to_transform = df.filter(like="Discrete").columns

    df[columns_to_transform] = df[columns_to_transform].apply(
        lambda col: col.map(lambda x: np.array(list(map(lambda s: True if s == "True" else False, x[1:-1].split())), dtype=bool))
    )

    print(df.head())
    return df

df_history_old = transform_columns(df_history_old)

In [None]:
df_history = transform_columns(df_history)

In [None]:
df_population.head()

In [None]:
columns_to_transform = df_population.filter(like="real_").columns

def converter(x):
    if isinstance(x, str):
        return np.array(list(map(float, x[1:-1].split())), dtype=np.int32)
    elif isinstance(x, float):
        return np.nan
    else:
        return list(map(float, x))
    
df_population[columns_to_transform] = df_population[columns_to_transform].apply(
    lambda col: col.map(converter)
)

df_population.head()

In [None]:
from src.timetabling_problem import MPTT

# Define new revenue maximization object for results analysis
mptt = MPTT(requested_schedule=requested_schedule,
            revenue_behaviour=revenue_behaviour,
            line=line,
            safe_headway=10)

top_3_algos = ("Genetic Algorithm",
               "Ant Colony Optimization Continuous (ACOR)",
               "Differential Evolution")

# Create dataframe with fitness values for each individual of the population
df_pop = df_population[df_population['Algo'].isin(top_3_algos)]

set_filtered_algos = set(df_pop["Algo"])
print(f"Filtered algos: {set_filtered_algos}")

df_pop = df_pop.replace(r'\n', ' ', regex=True)

# Drop unnecessary columns
df_pop.drop(columns=[col for col in df_pop.columns if 'discrete' in col], inplace=True)

# Function to compute fitness
def fitness_from_real(x: np.array) -> float:
    if isinstance(x, float):
        return np.nan
    return mptt.objective_function(x)

# Iterate through the dataframe and apply the function with progress feedback
real_columns = df_pop.columns[df_pop.columns.str.contains('real_')]
previous_run, previous_algo = None, None

for index, row in df_pop.iterrows():
    current_run = row['Run']
    current_algo = row['Algo']

    # Print message if there's a change in 'Run' or 'Algo'
    if current_run != previous_run or current_algo != previous_algo:
        print(f"Processing new combination: Run = {current_run}, Algo = {current_algo}")
        previous_run, previous_algo = current_run, current_algo

    # Apply the fitness function to the real columns
    df_pop.loc[index, real_columns] = row[real_columns].map(fitness_from_real)

print("Processing completed.")
df_pop

In [None]:
# Save df_pop
df_pop.to_csv('df_pop_5_opti_fit_top3_GA_ACOR_DE.csv', sep=";", index=False)

In [None]:
# Prepare dataframe for boxplot
rows = []
for row in df_pop.iterrows():
    run = row[1]['Run']
    iteration = row[1]['Iteration']
    algo = row[1]['Algo']
    
    for individual in [ind_name for ind_name in row[1].index if 'real_' in ind_name]:
        fitness = row[1][individual]
        if not np.isnan(fitness):
            rows.append({'Run': run, 'Iteration': iteration, 'Fitness': fitness, 'Algorithm': algo, 'Individual': f'ID_{individual}'})
        
df_boxplot = pd.DataFrame.from_dict(rows, orient='columns')
df_boxplot

In [None]:
# Filter df, only rows with 'Iteration' values included in [0, .., 49]
df_boxplot_filtered = df_boxplot[df_boxplot['Iteration'].isin([0, 99, 199, 299, 399, 499])]

# Sum 1 to each value in 'Iteration' column
df_boxplot_filtered['Iteration'] = df_boxplot_filtered['Iteration'] + 1

sns_box_plot(df=df_boxplot_filtered,
             x_data='Iteration',
             y_data='Fitness',
             hue='Algorithm',
             title="Scattered Boxplot of Population for Top 3 Algorithms",
             x_label="Epoch",
             y_label="Fitness (Revenue)",
             save_path=Path('../../reports/mealpy/scattered_boxplot_top_3_25_opti_new.pdf'))

In [None]:
# Table with results by run

sm = RevenueMaximization(requested_schedule=requested_schedule,
                         revenue_behaviour=revenue_behaviour,
                         line=line,
                         safe_headway=10)

service_tsps = {service.id: service.tsp.name for service in supply.services}
columns = ['Algorithm', 'Run', 'Revenue', 'Execution Time (s.)', 'Scheduled Trains', 'Delta DT (min.)', 'Delta TT (min.)']
columns += set(service_tsps.values())

summary_df = pd.DataFrame(columns=columns)

algo_grouped_df = df_history.groupby('Algo')

for algo_group in algo_grouped_df:
    run_grouped_df = algo_group[1].groupby('Run')
    for group in run_grouped_df.groups:
        run = run_grouped_df.get_group(group)['Run'].iloc[-1]
        revenue = np.round(run_grouped_df.get_group(group)['Fitness'].iloc[-1], 2)
        execution_time = np.round(run_grouped_df.get_group(group)['ExecutionTime'].sum(), 2)
        scheduled_trains_array = run_grouped_df.get_group(group)['Discrete'].iloc[-1]
        scheduled_trains = int(sum(run_grouped_df.get_group(group)['Discrete'].iloc[-1]))
        real_solution = run_grouped_df.get_group(group)['Real'].iloc[-1]
        sm.update_schedule(Solution(real=real_solution, discrete=scheduled_trains))
        delta_dt = 0.0
        delta_tt = 0.0
        services_by_tsp = {tsp: 0 for tsp in service_tsps.values()}
        for i, service in enumerate(sm.requested_schedule):
            if not scheduled_trains_array[i]:
                continue
            departure_station = list(sm.requested_schedule[service].keys())[0]
            delta_dt += abs(sm.updated_schedule[service][departure_station][1] -
                            sm.requested_schedule[service][departure_station][1])
            for j, stop in enumerate(sm.requested_schedule[service].keys()):
                if j == 0 or j == len(sm.requested_schedule[service]) - 1:
                    continue
                delta_tt += abs(sm.updated_schedule[service][stop][1] - sm.requested_schedule[service][stop][1])
    
            service_tsp = service_tsps[service]
            services_by_tsp[service_tsp] += 1
    
        percentages_by_tsp = {}
        for tsp in services_by_tsp:
            percentages_by_tsp[tsp] = f"{np.round(services_by_tsp[tsp] / tsp_df.loc[tsp, 'Number of Services'] * 100, 2)} %"
        row_data =  [algo_group[0], run, revenue, execution_time, 
                     scheduled_trains, np.round(delta_dt, 2), np.round(delta_tt, 2), *list(percentages_by_tsp.values())]
        summary_df.loc[len(summary_df)] = row_data

summary_df = summary_df.sort_values('Revenue', ascending=False)
display(summary_df)
print()

In [None]:
summary_df.to_latex()

In [None]:
# Global status

def get_global_status(df: pd.DataFrame):
    algo_grouped_df = df.groupby('Algo')

    for algo_group in algo_grouped_df:
        run_grouped_df = algo_group[1].groupby('Run')
        
        print(f"Global {algo_group[0]} status:")
        
        # Execution time (mean and std.)
        run_times = run_grouped_df['ExecutionTime'].last()
        print(f'\tTotal execution time: {round(run_times.sum(), 4)} s.')
        print(f'\tExecution Time (by run) - Mean: {round(run_times.mean(), 4)} s. - Std: {round(run_times.std(), 4)} s.')
        
        # Revenue (mean and std.)
        run_revenues = run_grouped_df['Fitness'].last()
        print(f'\tRevenue - Mean: {round(run_revenues.mean(), 4)} - Std: {round(run_revenues.std(), 4)}')
        
        # Scheduled trains (mean and std.)
        run_trains = run_grouped_df['Discrete'].last().apply(sum)
        print(f'\tScheduled Trains - Mean: {np.round(run_trains.mean())} - Std: {np.round(run_trains.std())}')
        
        max_revenue = sum([sm.revenue[service]['canon'] for service in sm.revenue])
        print(f"\tMax Revenue: {max_revenue} - WARNING!: Scheduling all services could not be feasible")
        print()
        
get_global_status(df_history)

In [None]:

def get_global_status_as_dataframe(df: pd.DataFrame):
    algo_grouped_df = df.groupby('Algo')
    
    results = []
    for algo_name, algo_group in algo_grouped_df:
        run_grouped_df = algo_group.groupby('Run')
        
        run_times = run_grouped_df['ExecutionTime'].sum()
        
        run_revenues = run_grouped_df['Fitness'].last()
        run_trains = run_grouped_df['Discrete'].last().apply(sum)
        
        result = {
            'Algorithm': algo_name,
            'Mean Execution Time (seconds)': round(run_times.mean(), 2),
            'Std Execution Time (seconds)': round(run_times.std(), 2),
            'Mean Fitness (revenue)': round(run_revenues.mean(), 2),
            'Std Fitness (revenue)': round(run_revenues.std(), 2),
            'Mean Scheduled Trains': int(round(run_trains.mean())),
            'Std Scheduled Trains': int(round(run_trains.std()))
        }
        
        results.append(result)
    
    results_df = pd.DataFrame(results)
    
    # Short by column 'Mean Fitness' 
    results_df = results_df.sort_values(by='Mean Fitness (revenue)', ascending=False).reset_index(drop=True)
    return results_df

results_df = get_global_status_as_dataframe(df_history)
results_df

In [None]:
results_df.to_latex()

In [None]:
df_history_copy = df_history.copy()

df_history['Iteration'] = df_history['Iteration'] + 1

sns_line_plot(df=df_history,
              x_data="Iteration",
              y_data="Fitness",
              hue="Algo",
              title="Convergence curves - Optimized hiperparameters",  # Título: Curvas de convergencia - Hiperparámetros optimizados
              x_label="Epoch",
              y_label="Fitness (Revenue)",
              x_limit=(-1, 500),
              y_limit=(1500,4600),
              save_path=Path('../../reports/mealpy/mealpy_convergence_5_opti.pdf'),
              fig_size=(10, 9))

In [None]:
top_3_algos = ("Genetic Algorithm",
               "Ant Colony Optimization Continuous (ACOR)",
               "Differential Evolution")

# Create dataframe with fitness values for each individual of the population
df_history_top_3 = df_history[df_history['Algo'].isin(top_3_algos)]

df_history_top_3['Iteration'] = df_history_top_3['Iteration'] + 1

sns_line_plot(df=df_history_top_3,
              x_data="Iteration",
              y_data="Fitness",
              hue="Algo",
              title="Convergence curves - Top 3 Algorithms, Optimized hiperparameters",
              x_label="Epoch",
              y_label="Fitness (Revenue)",
              x_limit=(0, 500),
              y_limit=(2100,4600),
              legend_type="",
              save_path=Path('../../reports/mealpy/mealpy_convergence_5_opti_top_3.pdf'),
              fig_size=(10, 7))

In [None]:
df_history.head()

In [None]:
N = 50  # Número de saltos que quieres aplicar

algo_1_vals = df_history[df_history['Algo'] == 'Simulated Annealing'] \
    .groupby(['Run', 'Iteration'])['Fitness'].last() \
    .groupby(level='Run').apply(lambda x: x.iloc[::N].tolist()).sum()

algo_1_vals

In [None]:
algo_df = df_history[df_history['Algo'] == 'Genetic Algorithm']

algo_vals = algo_df.groupby(['Iteration'])['Fitness'].max().tolist()

algo_vals

In [None]:
print(len(algo_vals))

In [None]:
algo_df = df_history[df_history['Algo'] == 'Simulated Annealing']

N = 50
algo_vals = algo_df.groupby(algo_df['Iteration'] // N)['Fitness'].max().tolist()

algo_vals

In [None]:
print(len(algo_vals))

In [None]:
def recortar_listas(lista1, lista2):
    # Convertir a arrays de numpy
    arr1 = np.array(lista1)
    arr2 = np.array(lista2)
    
    # Encontrar índices donde aparecen np.nan
    nan_idx1 = np.where(np.isnan(arr1))[0]
    nan_idx2 = np.where(np.isnan(arr2))[0]
    
    # Determinar la posición mínima de np.nan
    min_idx = min(
        nan_idx1[0] if len(nan_idx1) > 0 else len(arr1),
        nan_idx2[0] if len(nan_idx2) > 0 else len(arr2)
    )
    
    # Recortar las listas
    return arr1[:min_idx].tolist(), arr2[:min_idx].tolist()

# Ejemplo de uso
lista1 = [1.0, 2, 3, 4]
lista2 = [5, 6, np.nan, np.nan]

recortada1, recortada2 = recortar_listas(lista1, lista2)
print(recortada1, recortada2)

In [None]:
# Confusion Matrix stadistical differences 

N = 50
from scipy import stats

def get_epoch_last_fitness(df, name):
    algo_df = df[df['Algo'] == name]
    if name != 'Simulated Annealing':
        algo_vals = algo_df.groupby(['Run', 'Iteration'])['Fitness'].last().groupby(level='Run').apply(list).sum()
    else:
        algo_vals = algo_df.groupby(['Run', 'Iteration'])['Fitness'].last().groupby(level='Run').apply(lambda x: x.iloc[::N].tolist()).sum()
    return algo_vals

def get_epoch_best_fitness(df, name):
    algo_df = df[df['Algo'] == name]
    if name != 'Simulated Annealing':
        algo_vals = algo_df.groupby(['Iteration'])['Fitness'].max().tolist()
    else:
        algo_vals = algo_df.groupby(algo_df['Iteration'] // N)['Fitness'].max().tolist()
    return algo_vals

def get_run_best_fitness(df, name):
    algo_df = df[df['Algo'] == name]
    algo_vals = algo_df.groupby(['Run'])['Fitness'].max().tolist()
    return algo_vals

matrix_wilcoxon = []
matrix_p_values = []
for algo_name, algo in mp_algos.items():
    row_w = []
    row_p = []
    for algo_name_2, algo_2 in mp_algos.items():
        if algo_name == algo_name_2:
            row_w.append(np.nan)
            row_p.append(np.nan)
            continue
        
        algo_1_vals = get_run_best_fitness(df_history, algo_name)
        algo_2_vals = get_run_best_fitness(df_history, algo_name_2)
        stat, p_value = stats.kstest(algo_1_vals, algo_2_vals)
        row_w.append(round(stat))
        row_p.append(round(p_value, 3))
    matrix_wilcoxon.append(row_w)
    matrix_p_values.append(row_p)

In [None]:
print("Wilcoxon Matrix")
df_wilcoxon = pd.DataFrame(matrix_wilcoxon, index=mp_algos.keys(), columns=mp_algos.keys())
print(df_wilcoxon)

In [None]:
print("P-Values Matrix")
df_p_values = pd.DataFrame(matrix_p_values, index=mp_algos.keys(), columns=mp_algos.keys())
print(df_p_values)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
plt.imshow(df_wilcoxon, cmap='viridis', interpolation='nearest')
plt.colorbar(label='Statistical difference')
plt.xticks(range(10), df_wilcoxon.columns, rotation=30, ha='right')
plt.yticks(range(10), df_wilcoxon.index)
plt.title('Confusion Matrix - Wilcoxon Statistic', fontweight='bold', fontsize=18)

for i in range(df_wilcoxon.shape[0]):
    for j in range(df_wilcoxon.shape[1]):
        value = df_wilcoxon.iloc[i, j]
        if np.isnan(value):
            # Manejo especial para NaN
            display_value = "NaN"
            text_color = 'white'  # Color negro para NaN
            rect_color = (0.2, 0.2, 0.2)  # Gris oscuro
            plt.gca().add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=rect_color))
        else:
            display_value = f"{int(value)}"
            text_color = 'black' if value > 50000 else 'white'
        
        plt.text(j, i, display_value, ha='center', va='center', color=text_color)
        
ax = plt.gca()
ax.set_xticks(np.arange(-0.5, 10, 1), minor=True)
ax.set_yticks(np.arange(-0.5, 10, 1), minor=True)
ax.grid(which="minor", color='gray', linestyle='-', linewidth=0.5)
ax.tick_params(which="minor", size=0)

plt.tight_layout()
plt.savefig("../figures/wilcoxon_confusion_matrix_5_seed_opti_df_history.pdf", format='pdf', dpi=300, bbox_inches='tight', transparent=True)

plt.show()

In [None]:
# Visualización con matplotlib
plt.figure(figsize=(10, 8))
plt.imshow(df_p_values, cmap='viridis', vmin=0, vmax=1, interpolation='nearest')
plt.colorbar(label='Statistical difference')
plt.xticks(range(10), df_p_values.columns, rotation=30, ha='right')
plt.yticks(range(10), df_p_values.index)
plt.title('Confusion Matrix - P-values', fontweight='bold', fontsize=18)

for i in range(df_p_values.shape[0]):
    for j in range(df_p_values.shape[1]):
        value = df_p_values.iloc[i, j]
        if np.isnan(value):
            # Manejo especial para NaN
            display_value = "NaN"
            text_color = 'white'  # Color negro para NaN
            rect_color = (0.2, 0.2, 0.2)  # Gris oscuro
            plt.gca().add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=rect_color))
        else:
            display_value = f"{value:.3f}"
            text_color = 'black' if value > 0.4 else 'white'
        
        plt.text(j, i, display_value, ha='center', va='center', color=text_color)
        
ax = plt.gca()
ax.set_xticks(np.arange(-0.5, 10, 1), minor=True)
ax.set_yticks(np.arange(-0.5, 10, 1), minor=True)
ax.grid(which="minor", color='gray', linestyle='-', linewidth=0.5)
ax.tick_params(which="minor", size=0)

plt.tight_layout()
plt.savefig("../figures/p_values_confusion_matrix_5_seed_opti_df_history.pdf", format='pdf', dpi=300, bbox_inches='tight', transparent=True)

plt.show()

In [None]:
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import pandas as pd

cmap = colors.ListedColormap(['#b3e5b3', '#ffb3b3'])
bounds = [0, 0.05, 1]
norm = colors.BoundaryNorm(bounds, cmap.N)

names = ["GA", "PSO", "SA", "DE", "ACOR", "CMA-ES", "ABC", "GWO", "WOA", "GWO-WOA"]
plt.figure(figsize=(10, 10))
plt.imshow(df_p_values, cmap=cmap, norm=norm, interpolation='nearest')
# plt.colorbar(label='P-value', shrink=0.8)
plt.xticks(range(df_p_values.shape[1]), names, rotation=35, ha='right', fontsize=14)
plt.yticks(range(df_p_values.shape[0]), names, fontsize=14)
plt.title('Algorithm Pairwise Comparison: p-Values', fontweight='bold', fontsize=22)

for i in range(df_p_values.shape[0]):
    for j in range(df_p_values.shape[1]):
        value = df_p_values.iloc[i, j]
        if np.isnan(value):
            display_value = "NaN"
            text_color = 'white'
            rect_color = (0.5, 0.5, 0.5)
            plt.gca().add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=rect_color))
        else:
            display_value = f"{value:.3f}"
            text_color = 'black'

        plt.text(j, i, display_value, ha='center', va='center', color=text_color, fontsize=14)

ax = plt.gca()
ax.set_xticks(np.arange(-0.5, df_p_values.shape[1], 1), minor=True)
ax.set_yticks(np.arange(-0.5, df_p_values.shape[0], 1), minor=True)
ax.grid(which="minor", color='gray', linestyle='-', linewidth=0.5)
ax.tick_params(which="minor", size=0)

plt.tight_layout()
plt.savefig("../figures/p_values_confusion_matrix_improved_opti_df_history.pdf", format='pdf', dpi=300, bbox_inches='tight', transparent=True)
plt.show()

In [None]:
# Confusion Matrix stadistical differences

N = 50
from scipy import stats

def get_run_best_fitness(df, name):
    algo_df = df[df['Algo'] == name]
    algo_vals = algo_df.groupby(['Run'])['Fitness'].max().tolist()
    print(algo_vals)
    return algo_vals

row_ks = {}
row_p_values = {}
for algo_name, algo in mp_algos.items():
    algo_1_vals = get_run_best_fitness(df_history_old, algo_name)
    algo_2_vals = get_run_best_fitness(df_history, algo_name)
    stat, p_value = stats.kstest(algo_1_vals, algo_2_vals)
    row_ks[algo_name] = round(stat)
    row_p_values[algo_name] = round(p_value, 3)

print(row_p_values)

In [None]:
# Select a solution from df_history:
algo_name = 'Genetic Algorithm'
run = 4
iteration = 499

filtered_row = df_history[
    (df_history["Algo"] == algo_name) &
    (df_history["Run"] == run) &
    (df_history["Iteration"] == iteration)
]

# Get values from columns "Real" & "Discrete"
if not filtered_row.empty:
    real_value = filtered_row["Real"].iloc[0]
    discrete_value = filtered_row["Discrete"].iloc[0]
    ga_best_sol = Solution(real=real_value, discrete=discrete_value)
    print(f"Real: {real_value}, Discrete: {discrete_value}")
else:
    print("Row not found matching the provided parameters.")

In [None]:
sm = RevenueMaximization(requested_schedule=requested_schedule,
                         revenue_behaviour=revenue_behaviour,
                         line=line,
                         safe_headway=10)

services = sm.update_supply(path=generator_save_path,
                            solution=ga_best_sol)

sm.update_schedule(solution=ga_best_sol)

filtered_services = {}
for i, service in enumerate(sm.updated_schedule):
    if ga_best_sol.discrete[i]:
        filtered_services[service] = sm.updated_schedule[service]

#plotter = TrainSchedulePlotter(filtered_services, line)
#plotter.plot(plot_security_gaps=True, save_path=Path('../figures/updated.pdf'))

tt_file_name = f'{supply_config_path.stem}_mealpy'
print(supply_save_path, tt_file_name)
SupplySaver(services).to_yaml(filename=f'{tt_file_name}.yml', save_path=supply_save_path)

# Copy previous file in reports
reports_path = Path('../../reports/mealpy/')
shutil.copyfile(f"{supply_save_path}{tt_file_name}.yml", reports_path / f'{tt_file_name}_25_mealpy_opti_hiper.yml')

supply_config_file = Path(f'{supply_save_path}{tt_file_name}.yml')

In [None]:
final_supply = Supply.from_yaml(supply_config_file)

In [None]:
plot_marey_chart(requested_supply=final_supply,
                 colors_by_tsp=True,
                 main_title="Marey chart - GA solution, 4th run",
                 plot_security_gaps=True,
                 security_gap=10,
                 save_path=Path('../../reports/mealpy/marey_chart_ga_25_opti_hiper.pdf'))

In [None]:
shutil.copyfile(f"{supply_save_path}{tt_file_name}.yml", reports_path / f'mealpy/{tt_file_name}_25.yml')

In [None]:
#!snakeviz profile.pstat