# CFLP Benchmark Analysis

In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys
import os
import re

# Get the directory of the current notebook
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
# Set the working directory to the project root (one level up from the notebook directory)
analysis_dir = os.path.join(notebook_dir, os.pardir)
project_root = os.path.join(analysis_dir, os.pardir)
os.chdir(project_root)

# You can verify the new working directory
print(f"Current Working Directory: {os.getcwd()}")

from notebooks.analysis.common_analysis_functions import (
    load_benchmark_data, parse_json_columns, filter_optimal_solutions,
    plot_objective_distribution, plot_solve_time_distribution,
    get_baseline_objective, read_baseline_log
)

# Configuration
CFLP_BENCHMARK_FILE = 'benchmark/CFLP/cflp_benchmark_25cust_25fac.parquet'
CFLP_MODEL_FILE_PATH = 'models/CFLP/capfacloc_model.py'
CFLP_MODEL_DATA_PATH = 'models/CFLP/data/capfacloc_data_25cust_25fac.json'

# Extract data configuration string (e.g., '10cust_10fac')
data_config_match = re.search(r'capfacloc_data_([\w_]+)\.json', CFLP_MODEL_DATA_PATH)
data_config_str = data_config_match.group(1) if data_config_match else 'default'

OUTPUT_PLOTS_DIR = Path(f'results/cflp_analysis_plots/{data_config_str}')
OUTPUT_PLOTS_DIR.mkdir(parents=True, exist_ok=True)

BASELINE_LOG_FILEPATH = Path(f'models/CFLP/baselines/baseline_cflp_log_{data_config_str}.csv')

print(f"Plots will be saved to: {OUTPUT_PLOTS_DIR}")
print(f"Baseline log will be saved to: {BASELINE_LOG_FILEPATH}")

Current Working Directory: /home/timpi/Projects/thesis/multi_agent_supply_chain_optimization
Current Working Directory: /home/timpi/Projects/thesis/multi_agent_supply_chain_optimization
Plots will be saved to: results/cflp_analysis_plots/25cust_25fac
Baseline log will be saved to: notebooks/baseline_cflp_log_25cust_25fac.csv


## 1. Load and Preprocess Data

In [2]:
cflp_df = load_benchmark_data(CFLP_BENCHMARK_FILE)
cflp_df = parse_json_columns(cflp_df, ['parameters', 'constraints', 'variables', 'modification', 'placeholder_values'])

print("\nDataFrame Info:")
cflp_df.info()
print("\nFirst 5 rows:")
print(cflp_df.head())

Successfully loaded data from benchmark/CFLP/cflp_benchmark_25cust_25fac.parquet. Shape: (39082, 14)

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39082 entries, 0 to 39081
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   run_id                     39082 non-null  object 
 1   timestamp                  39082 non-null  object 
 2   status                     39082 non-null  object 
 3   objective_value            39082 non-null  float64
 4   model_file_path            39082 non-null  object 
 5   model_data_path            39082 non-null  object 
 6   parameters                 39082 non-null  object 
 7   constraints                39082 non-null  object 
 8   variables                  39082 non-null  object 
 9   pulp_model_execution_time  39082 non-null  float64
 10  modification               39082 non-null  object 
 11  scenario_text              39082 non-nul

## 2. Determine Baseline and Calculate Delta Objective

In [3]:
# Get baseline objective
baseline_obj_value = get_baseline_objective(CFLP_MODEL_FILE_PATH, CFLP_MODEL_DATA_PATH, baseline_log_filepath=str(BASELINE_LOG_FILEPATH))

if baseline_obj_value is not None:
    # Calculate delta_obj for optimal solutions
    cflp_df['delta_obj'] = cflp_df.apply(
        lambda row: row['objective_value'] - baseline_obj_value if row['status'] == 'Optimal' else None,
        axis=1
    )
    print(f"Baseline objective value: {baseline_obj_value}")
    print("Delta objective calculated.")
else:
    print("Warning: Baseline objective not available. Delta objectives not calculated.")
    cflp_df['delta_obj'] = None

Running baseline model from models/CFLP/capfacloc_model.py with data models/CFLP/data/capfacloc_data_25cust_25fac.json...

log - Running optimization model...

log - Executing model source code...

log - Model execution completed.

log - Extracting optimization results...

log - Optimization results extracted.
Logging run 'baseline_run' to 'notebooks/baseline_cflp_log_25cust_25fac.csv'...
Successfully logged 1 runs to notebooks/baseline_cflp_log_25cust_25fac.csv

log - Optimization Completed.
Baseline objective value: 1093.601814554
Baseline objective value: 1093.601814554
Delta objective calculated.


## 3. Overall Performance Analysis

In [4]:
# Filter for optimal solutions for objective/solve time analysis
optimal_cflp_df = filter_optimal_solutions(cflp_df)

if not optimal_cflp_df.empty:
    print("\nOptimal Solutions Summary:")
    print(optimal_cflp_df[['objective_value', 'pulp_model_execution_time', 'delta_obj']].describe())

    plot_objective_distribution(optimal_cflp_df, title='CFLP Objective Value Distribution (Optimal Solutions)',
                                save_path=OUTPUT_PLOTS_DIR / 'cflp_objective_distribution.png')
    plot_solve_time_distribution(optimal_cflp_df, title='CFLP Solve Time Distribution (Optimal Solutions)',
                                 save_path=OUTPUT_PLOTS_DIR / 'cflp_solve_time_distribution.png')
    
    # Plotting Delta Objective Value Distribution
    if 'delta_obj' in optimal_cflp_df.columns and optimal_cflp_df['delta_obj'].notna().any():
        plt.figure(figsize=(10, 6))
        sns.histplot(optimal_cflp_df['delta_obj'].dropna(), kde=True, bins=30)
        plt.title('Distribution of CFLP Delta Objective Values (Optimal Solutions)')
        plt.xlabel('Delta Objective Value')
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.savefig(OUTPUT_PLOTS_DIR / 'cflp_delta_objective_distribution.png')
        plt.close()
    else:
        print("No valid delta objective values to plot.")

else:
    print("No optimal solutions found in the benchmark data for overall performance analysis.")


Optimal Solutions Summary:
       objective_value  pulp_model_execution_time     delta_obj
count     39082.000000               39082.000000  39082.000000
mean       1094.167224                   0.019181      0.565410
std          10.993677                   0.023122     10.993677
min         932.801815                   0.008178   -160.800000
25%        1093.601815                   0.011221      0.000000
50%        1093.601815                   0.014958      0.000000
75%        1093.601815                   0.020902      0.000000
max        1558.869823                   0.331887    465.268009


## 4. Scenario-Specific Analysis: Effects of Modifications

In [5]:
print("\nUnique Scenario Types:")
unique_scenario_types = cflp_df['scenario_type'].unique()
print(unique_scenario_types)

def plot_scenario_impact(df, scenario_type, x_param_key, x_label, title, plot_filename, baseline_value=None):
    """
    Generates a plot for a specific scenario type showing impact on objective value.
    """
    scenario_df = df[df['scenario_type'] == scenario_type].copy()
    scenario_df['varying_param'] = scenario_df['placeholder_values'].apply(lambda x: x.get(x_param_key) if isinstance(x, dict) else None)
    
    scenario_df_filtered = scenario_df.dropna(subset=['varying_param', 'objective_value']).sort_values(by='varying_param')

    if not scenario_df_filtered.empty:
        plt.figure(figsize=(12, 7))
        sns.lineplot(data=scenario_df_filtered, x='varying_param', y='objective_value', marker='o')
        
        if baseline_value is not None:
            plt.axhline(baseline_value, color='red', linestyle='--', label=f'Baseline Objective: {baseline_value:.2f}')
            plt.legend(loc='upper right')

        plt.title(title)
        plt.xlabel(x_label)
        plt.ylabel('Objective Value')
        plt.grid(True)
        plot_path = OUTPUT_PLOTS_DIR / plot_filename
        plt.savefig(plot_path)
        plt.close()
        print(f"Plot saved: {plot_path}")
    else:
        print(f"No valid data to plot for {scenario_type}.")

# --- Individual Scenario Plots ---

# 4.1 Demand Change - Customer Percentage
plot_scenario_impact(
    cflp_df,
    scenario_type='demand-change-customer-pct',
    x_param_key='P',
    x_label='Percentage Change of a Specific Customer Demand (in %)',
    title='Impact of Customer Demand Percentage Change on Objective Value',
    plot_filename='cflp_demand_change_customer_pct_impact.png',
    baseline_value=baseline_obj_value
)

# 4.2 Demand Increase - Customer Integer
plot_scenario_impact(
    cflp_df,
    scenario_type='demand-increase-customer-int',
    x_param_key='V',
    x_label='Customer Demand Value (in units)',
    title='Impact of Specific Customer Demand Value on Objective Value',
    plot_filename='cflp_demand_increase_customer_int_impact.png',
    baseline_value=baseline_obj_value
)

# 4.3 Demand Change - All Customers
plot_scenario_impact(
    cflp_df,
    scenario_type='demand-change-all',
    x_param_key='P',
    x_label='Percentage Change in All Demands (in %)',
    title='Impact of Overall Demand Change on Objective Value',
    plot_filename='cflp_demand_change_all_impact.png',
    baseline_value=baseline_obj_value
)

# 4.4 Capacity Change - Facility Percentage
plot_scenario_impact(
    cflp_df,
    scenario_type='capacity-change-facility-pct',
    x_param_key='P',
    x_label='Percentage Change in Facility Capacity (in %)',
    title='Impact of Facility Capacity Percentage Change on Objective Value',
    plot_filename='cflp_capacity_change_facility_pct_impact.png',
    baseline_value=baseline_obj_value
)

# 4.5 Capacity Change - Facility Integer
plot_scenario_impact(
    cflp_df,
    scenario_type='capacity-change-facility-int',
    x_param_key='V',
    x_label='New Facility Capacity Value (in units)',
    title='Impact of Specific Facility Capacity Value on Objective Value',
    plot_filename='cflp_capacity_change_facility_int_impact.png',
    baseline_value=baseline_obj_value
)

# 4.6 Capacity Percentage Change - All Facilities
plot_scenario_impact(
    cflp_df,
    scenario_type='capacity-pct-change-all',
    x_param_key='P',
    x_label='Percentage Change in All Capacities (in %)',
    title='Impact of Overall Capacity Change on Objective Value',
    plot_filename='cflp_capacity_pct_change_all_impact.png',
    baseline_value=baseline_obj_value
)

# 4.7 Fixed Cost Change - Facility Percentage
plot_scenario_impact(
    cflp_df,
    scenario_type='fixed-cost-change-facility-pct',
    x_param_key='P',
    x_label='Percentage Change in Facility Fixed Cost (in %)',
    title='Impact of Facility Fixed Cost Percentage Change on Objective Value',
    plot_filename='cflp_fixed_cost_change_facility_pct_impact.png',
    baseline_value=baseline_obj_value
)

# 4.8 Fixed Cost Change - Facility Integer
plot_scenario_impact(
    cflp_df,
    scenario_type='fixed-cost-change-facility-int',
    x_param_key='V',
    x_label='New Facility Fixed Cost Value (in cost)',
    title='Impact of Specific Facility Fixed Cost Value on Objective Value',
    plot_filename='cflp_fixed_cost_change_facility_int_impact.png',
    baseline_value=baseline_obj_value
)

# 4.9 Fixed Cost Percentage Change - All Facilities
plot_scenario_impact(
    cflp_df,
    scenario_type='fixed-cost-pct-change-all',
    x_param_key='P',
    x_label='Percentage Change in All Fixed Costs (in %)',
    title='Impact of Overall Fixed Cost Change on Objective Value',
    plot_filename='cflp_fixed_cost_pct_change_all_impact.png',
    baseline_value=baseline_obj_value
)

# 4.10 Transportation Cost Change - Percentage
plot_scenario_impact(
    cflp_df,
    scenario_type='transportation-cost-change-pct',
    x_param_key='P',
    x_label='Percentage Change in Transportation Cost (in %)',
    title='Impact of Transportation Cost Percentage Change on Objective Value',
    plot_filename='cflp_transportation_cost_change_pct_impact.png',
    baseline_value=baseline_obj_value
)

# 4.11 Transportation Cost Change - Integer
plot_scenario_impact(
    cflp_df,
    scenario_type='transportation-cost-change-int',
    x_param_key='V',
    x_label='New Transportation Cost Value (in cost)',
    title='Impact of Specific Transportation Cost Value on Objective Value',
    plot_filename='cflp_transportation_cost_change_int_impact.png',
    baseline_value=baseline_obj_value
)

# 4.12 Force Open Facility
plot_scenario_impact(
    cflp_df,
    scenario_type='force-open-facility',
    x_param_key='F',
    x_label='Facility Index',
    title='Impact of Forcing Facility Open on Objective Value',
    plot_filename='cflp_force_open_facility_impact.png',
    baseline_value=baseline_obj_value
)

# 4.13 Force Close Facility
plot_scenario_impact(
    cflp_df,
    scenario_type='force-close-facility',
    x_param_key='F',
    x_label='Facility Index',
    title='Impact of Forcing Facility Closed on Objective Value',
    plot_filename='cflp_force_close_facility_impact.png',
    baseline_value=baseline_obj_value
)


Unique Scenario Types:
['demand-change-customer-pct' 'demand-increase-customer-int'
 'demand-change-all' 'capacity-change-facility-pct'
 'capacity-change-facility-int' 'capacity-pct-change-all'
 'fixed-cost-change-facility-pct' 'fixed-cost-change-facility-int'
 'fixed-cost-pct-change-all' 'transportation-cost-change-pct'
 'transportation-cost-change-int' 'force-open-facility'
 'force-close-facility']
Plot saved: results/cflp_analysis_plots/25cust_25fac/cflp_demand_change_customer_pct_impact.png
Plot saved: results/cflp_analysis_plots/25cust_25fac/cflp_demand_increase_customer_int_impact.png
Plot saved: results/cflp_analysis_plots/25cust_25fac/cflp_demand_change_all_impact.png
Plot saved: results/cflp_analysis_plots/25cust_25fac/cflp_capacity_change_facility_pct_impact.png
Plot saved: results/cflp_analysis_plots/25cust_25fac/cflp_capacity_change_facility_int_impact.png
Plot saved: results/cflp_analysis_plots/25cust_25fac/cflp_capacity_pct_change_all_impact.png
Plot saved: results/cflp_

## 5. Model-Specific Analysis: Facility Selection Stability

In [6]:
def analyze_facility_selection(df: pd.DataFrame, num_facilities: int = 10):
    """
    Analyzes how frequently each facility is selected (opened) across scenarios.
    Assumes 'variables' column contains JSON with 'Open_X' variables.
    """
    facility_open_counts = {f: 0 for f in range(num_facilities)}
    total_scenarios = 0

    for _, row in df.iterrows():
        if row['status'] == 'Optimal' and isinstance(row['variables'], dict):
            total_scenarios += 1
            for i in range(num_facilities):
                open_var_name = f'Open_{i}'
                # Check if the variable exists and its value is close to 1 (for binary variables)
                if open_var_name in row['variables'] and row['variables'][open_var_name] > 0.5:
                    facility_open_counts[i] += 1
    
    if total_scenarios == 0:
        print("No optimal scenarios to analyze facility selection.")
        return

    facility_selection_freq = {f: (count / total_scenarios) * 100 for f, count in facility_open_counts.items()}

    # Plotting
    facilities = list(facility_selection_freq.keys())
    frequencies = list(facility_selection_freq.values())

    plt.figure(figsize=(12, 7))
    sns.barplot(x=facilities, y=frequencies, palette='viridis')
    plt.axhline(80, color='green', linestyle='--', label='Robust Threshold (80%)')
    plt.axhline(20, color='red', linestyle='--', label='Uncertain Threshold (20%)')
    plt.title('Facility Selection Frequency Across Optimal Scenarios')
    plt.xlabel('Facility Index')
    plt.ylabel('Selection Frequency (%)')
    plt.legend()
    plt.grid(axis='y', linestyle='--')
    plt.savefig(OUTPUT_PLOTS_DIR / 'cflp_facility_selection_frequency.png')
    plt.close()

    print("Facility Selection Frequencies:")
    for f, freq in facility_selection_freq.items():
        print(f"  Facility {f}: {freq:.2f}%")

# Assuming 10 facilities based on default data, adjust if needed
analyze_facility_selection(optimal_cflp_df, num_facilities=10)

Facility Selection Frequencies:
  Facility 0: 0.00%
  Facility 1: 0.10%
  Facility 2: 0.05%
  Facility 3: 0.30%
  Facility 4: 0.47%
  Facility 5: 0.04%
  Facility 6: 0.00%
  Facility 7: 98.94%
  Facility 8: 3.14%
  Facility 9: 99.90%



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=facilities, y=frequencies, palette='viridis')


## 6. Conclusion

In [7]:
print("CFLP benchmark analysis complete. Plots saved to:", OUTPUT_PLOTS_DIR)

CFLP benchmark analysis complete. Plots saved to: results/cflp_analysis_plots/25cust_25fac
