In [1]:
import pandas as pd
import json
from pathlib import Path
import sys
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns

# Set current working directory
# Get the directory of the current notebook
notebook_dir = os.path.dirname(os.path.abspath("__file__"))

# Set the working directory to the project root (one level up from the notebook directory)
project_root = os.path.join(notebook_dir, os.pardir)
os.chdir(project_root)

# You can verify the new working directory
print(f"Current Working Directory: {os.getcwd()}")

from notebooks.analysis.common_analysis_functions import get_baseline_objective, read_baseline_log

Current Working Directory: /home/timpi/Projects/thesis/multi_agent_supply_chain_optimization


In [2]:
# --- Configuration for CFLP --- 
CFLP_MODEL_FILE_PATH = 'models/CFLP/capfacloc_model.py'
CFLP_DATA_CONFIGS = [
    'models/CFLP/data/capfacloc_data_5cust_5fac.json',
    'models/CFLP/data/capfacloc_data_10cust_10fac.json',
    'models/CFLP/data/capfacloc_data_25cust_25fac.json',
    'models/CFLP/data/capfacloc_data_50cust_50fac.json',
    'models/CFLP/data/capfacloc_data_75cust_75fac.json',
    'models/CFLP/data/capfacloc_data_100cust_100fac.json',
    'models/CFLP/data/capfacloc_data_150cust_150fac.json',
    'models/CFLP/data/capfacloc_data_200cust_200fac.json',       
]

# --- Configuration for VRP --- 
VRP_MODEL_FILE_PATH = 'models/VRP/vrp_model.py'
VRP_DATA_CONFIGS = [
    'models/VRP/data/vrp_data_5cust_1veh_50cap.json',
    'models/VRP/data/vrp_data_5cust_2veh_50cap.json',
    'models/VRP/data/vrp_data_10cust_1veh_50cap.json',
    'models/VRP/data/vrp_data_10cust_1veh_100cap.json',
    'models/VRP/data/vrp_data_10cust_2veh_50cap.json',
    'models/VRP/data/vrp_data_10cust_2veh_100cap.json',
    'models/VRP/data/vrp_data_10cust_5veh_50cap.json',
    'models/VRP/data/vrp_data_15cust_1veh_100cap.json',
]
# Removed 'models/VRP/data/vrp_data_15cust_2veh_50cap.json' due to too long solve time

In [3]:
# Store data as a list of dictionaries for easier plotting and labeling
cflp_benchmark_data = []
vrp_benchmark_data = []

def generate_baselines(model_file_path, data_configs, model_type):
    """
    Generates baseline logs for a given model type and its data configurations.
    Collects execution times and parameter counts for plotting.
    """
    print(f"\n--- Generating Baselines for {model_type} ---")
    for data_path in data_configs:
        current_execution_time = None
        num_customers = 0
        num_facilities_vehicles = 0
        param_count = 0
        size_str = 'default'

        if model_type == 'CFLP':
            data_config_match = re.search(r'capfacloc_data_([\w_]+)\.json', data_path)
            log_filename_prefix = 'baseline_cflp_log'
            
            size_str = data_config_match.group(1) if data_config_match else 'default'
            size_match = re.search(r'(\d+)cust', size_str)
            
            if size_match:
                num_customers_facilities = int(size_match.group(1))
                num_customers = num_customers_facilities # For consistency in naming
                num_facilities_vehicles = num_customers_facilities # For consistency in naming
                # Corrected parameter count: N demands + N fixed costs + N capacities + N*N transportation costs
                param_count = num_customers_facilities + num_customers_facilities + num_customers_facilities + (num_customers_facilities * num_customers_facilities)
            
        elif model_type == 'VRP':
            data_config_match = re.search(r'vrp_data_([\w_]+)\.json', data_path)
            log_filename_prefix = 'baseline_vrp_log'
            
            size_str = data_config_match.group(1) if data_config_match else 'default'
            cust_match = re.search(r'(\d+)cust', size_str)
            veh_match = re.search(r'(\d+)veh', size_str)

            if cust_match and veh_match:
                num_customers = int(cust_match.group(1))
                num_vehicles = int(veh_match.group(1))
                num_facilities_vehicles = num_vehicles # For consistency in naming
                # VRP parameter count: C (customers) + V (vehicles) + (C+1)^2 (distances) + C (demands) + V (capacities)
                param_count = num_customers + num_vehicles + ((num_customers + 1)**2) + num_customers + num_vehicles
                
        else:
            data_config_match = None
            log_filename_prefix = 'baseline_log'

        # Define the baseline log directory within the model's folder
        baseline_log_dir = Path(model_file_path).parent / 'baselines'
        baseline_log_dir.mkdir(parents=True, exist_ok=True)
        
        baseline_log_filepath = baseline_log_dir / f'{log_filename_prefix}_{size_str}.csv'
        
        print(f"Generating baseline for {data_path} -> {baseline_log_filepath}")
        # Call get_baseline_objective to ensure the CSV is generated
        get_baseline_objective(model_file_path, data_path, str(baseline_log_filepath))
        
        # Read the generated CSV to get the execution time
        df_log = read_baseline_log(str(baseline_log_filepath))
        if not df_log.empty and 'pulp_model_execution_time' in df_log.columns:
            # Assuming only one entry per baseline log for simplicity
            current_execution_time = df_log['pulp_model_execution_time'].iloc[0]
        
        if model_type == 'CFLP':
            cflp_benchmark_data.append({
                'size': num_customers, # Using num_customers as the primary size metric
                'execution_time': current_execution_time,
                'parameter_count': param_count,
                'config_str': size_str
            })
        elif model_type == 'VRP':
            vrp_benchmark_data.append({
                'size': num_customers, # Using num_customers as the primary size metric
                'execution_time': current_execution_time,
                'parameter_count': param_count,
                'config_str': size_str
            })

In [4]:
# Run for CFLP
generate_baselines(CFLP_MODEL_FILE_PATH, CFLP_DATA_CONFIGS, 'CFLP')

# Convert collected data to DataFrame for easier plotting
df_cflp_benchmark = pd.DataFrame(cflp_benchmark_data)
df_cflp_benchmark = df_cflp_benchmark.dropna(subset=['execution_time', 'parameter_count']) # Remove rows with missing data


--- Generating Baselines for CFLP ---
Generating baseline for models/CFLP/data/capfacloc_data_5cust_5fac.json -> models/CFLP/baselines/baseline_cflp_log_5cust_5fac.csv
Running baseline model from models/CFLP/capfacloc_model.py with data models/CFLP/data/capfacloc_data_5cust_5fac.json...
Baseline objective value: 223.0
Baseline log file loaded from models/CFLP/baselines/baseline_cflp_log_5cust_5fac.csv.
Generating baseline for models/CFLP/data/capfacloc_data_10cust_10fac.json -> models/CFLP/baselines/baseline_cflp_log_10cust_10fac.csv
Running baseline model from models/CFLP/capfacloc_model.py with data models/CFLP/data/capfacloc_data_10cust_10fac.json...
Baseline objective value: 436.0625
Baseline log file loaded from models/CFLP/baselines/baseline_cflp_log_10cust_10fac.csv.
Generating baseline for models/CFLP/data/capfacloc_data_25cust_25fac.json -> models/CFLP/baselines/baseline_cflp_log_25cust_25fac.csv
Running baseline model from models/CFLP/capfacloc_model.py with data models/CFLP

In [5]:
# Run for VRP
generate_baselines(VRP_MODEL_FILE_PATH, VRP_DATA_CONFIGS, 'VRP')

df_vrp_benchmark = pd.DataFrame(vrp_benchmark_data)
df_vrp_benchmark = df_vrp_benchmark.dropna(subset=['execution_time', 'parameter_count']) # Remove rows with missing data


--- Generating Baselines for VRP ---
Generating baseline for models/VRP/data/vrp_data_5cust_1veh_50cap.json -> models/VRP/baselines/baseline_vrp_log_5cust_1veh_50cap.csv
Running baseline model from models/VRP/vrp_model.py with data models/VRP/data/vrp_data_5cust_1veh_50cap.json...
Baseline objective value: 241.07
Baseline log file loaded from models/VRP/baselines/baseline_vrp_log_5cust_1veh_50cap.csv.
Generating baseline for models/VRP/data/vrp_data_5cust_2veh_50cap.json -> models/VRP/baselines/baseline_vrp_log_5cust_2veh_50cap.csv
Running baseline model from models/VRP/vrp_model.py with data models/VRP/data/vrp_data_5cust_2veh_50cap.json...
Baseline objective value: 241.07
Baseline log file loaded from models/VRP/baselines/baseline_vrp_log_5cust_2veh_50cap.csv.
Generating baseline for models/VRP/data/vrp_data_10cust_1veh_50cap.json -> models/VRP/baselines/baseline_vrp_log_10cust_1veh_50cap.csv
Running baseline model from models/VRP/vrp_model.py with data models/VRP/data/vrp_data_10cu

ERROR:root:Execution Error:
Traceback (most recent call last):
  File "/home/timpi/Projects/thesis/multi_agent_supply_chain_optimization/utils.py", line 94, in _run_with_exec
    exec(src_code, locals_dict, locals_dict)
  File "<string>", line 21, in <module>
KeyError: 'coords'


Baseline objective value: 296.26000000000005
Baseline log file loaded from models/VRP/baselines/baseline_vrp_log_10cust_2veh_100cap.csv.
Generating baseline for models/VRP/data/vrp_data_10cust_5veh_50cap.json -> models/VRP/baselines/baseline_vrp_log_10cust_5veh_50cap.csv
Running baseline model from models/VRP/vrp_model.py with data models/VRP/data/vrp_data_10cust_5veh_50cap.json...
Failed to get optimal baseline objective: {'status': 'Error', 'message': "Model execution failed or 'model' not found in locals_dict."}
Baseline log file not found at models/VRP/baselines/baseline_vrp_log_10cust_5veh_50cap.csv. Please run baseline model first.
Generating baseline for models/VRP/data/vrp_data_15cust_1veh_100cap.json -> models/VRP/baselines/baseline_vrp_log_15cust_1veh_100cap.csv
Running baseline model from models/VRP/vrp_model.py with data models/VRP/data/vrp_data_15cust_1veh_100cap.json...
Baseline objective value: 321.65999999999997
Baseline log file loaded from models/VRP/baselines/baselin

## CFLP

In [6]:
# --- Plotting CFLP Execution Times ---
print("\n--- Plotting CFLP Execution Times ---")
if not df_cflp_benchmark.empty:
    plt.figure(figsize=(10, 6))
    
    # Plot the line connecting all points
    plt.plot(df_cflp_benchmark['size'], df_cflp_benchmark['execution_time'], color='blue', linestyle='-')

    # Plot each point as a scatter plot with its label for the legend
    scatter_handles = []
    for i, row in df_cflp_benchmark.iterrows():
        scatter_handle = plt.scatter(row['size'], row['execution_time'], marker='o', label=row['config_str'], color='blue')
        scatter_handles.append(scatter_handle)

    plt.title('CFLP Model Execution Time vs. Data Size')
    plt.xlabel('Number of Customers/Facilities (N)')
    plt.ylabel('PULP Model Execution Time (seconds)')
    plt.grid(True)
    plt.legend(handles=scatter_handles, title='Data Configuration', bbox_to_anchor=(1.05, 1), loc='upper left') # Place legend outside
    plt.tight_layout()
    
    # Save the plot
    plot_output_dir = Path('results/baseline_analysis_plots')
    plot_output_dir.mkdir(parents=True, exist_ok=True)
    plot_path = plot_output_dir / 'cflp_execution_time_vs_data_size.png'
    plt.savefig(plot_path)
    plt.close()
    print(f"Plot saved to: {plot_path}")
else:
    print("No valid CFLP execution time data to plot.")

# --- Plotting CFLP Parameter Counts ---
print("\n--- Plotting CFLP Parameter Counts ---")
if not df_cflp_benchmark.empty:
    plt.figure(figsize=(10, 6))
    
    # Plot the line connecting all points
    plt.plot(df_cflp_benchmark['size'], df_cflp_benchmark['parameter_count'], color='blue', linestyle='-')

    # Plot each point as a scatter plot with its label for the legend
    scatter_handles = []
    for i, row in df_cflp_benchmark.iterrows():
        scatter_handle = plt.scatter(row['size'], row['parameter_count'], marker='o', label=row['config_str'], color='blue')
        scatter_handles.append(scatter_handle)

    plt.title('CFLP Model Parameter Count vs. Data Size')
    plt.xlabel('Number of Customers/Facilities (N)')
    plt.ylabel('Total Parameter Count (3N + N^2)')
    plt.grid(True)
    plt.legend(handles=scatter_handles, title='Data Configuration', bbox_to_anchor=(1.05, 1), loc='upper left') # Place legend outside
    plt.tight_layout()
    
    # Save the plot
    plot_output_dir = Path('results/baseline_analysis_plots')
    plot_output_dir.mkdir(parents=True, exist_ok=True)
    plot_path_params = plot_output_dir / 'cflp_parameter_count_vs_data_size.png'
    plt.savefig(plot_path_params)
    plt.close()
    print(f"Parameter count plot saved to: {plot_path_params}")
else:
    print("No valid CFLP parameter count data to plot.")

# --- Overlay Plot: Execution Time and Parameter Count (CFLP) ---
print("\n--- Plotting Overlay of Execution Time and Parameter Count (CFLP) ---")
if not df_cflp_benchmark.empty:
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Execution Time
    sns.lineplot(data=df_cflp_benchmark, x='size', y='execution_time', marker='o', ax=ax1, color='blue', label='Execution Time')
    ax1.set_xlabel('Number of Customers/Facilities (N)')
    ax1.set_ylabel('PULP Model Execution Time (seconds)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.grid(True, which="both", ls="-", alpha=0.7)

    # Create a second y-axis for Parameter Count
    ax2 = ax1.twinx()
    sns.lineplot(data=df_cflp_benchmark, x='size', y='parameter_count', marker='x', ax=ax2, color='red', label='Parameter Count')
    ax2.set_ylabel('Total Parameter Count (3N + N^2)', color='red')
    ax2.tick_params(axis='y', labelcolor='red')

    # Combine legends
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='upper left')

    plt.title('CFLP Model Performance: Execution Time & Parameter Count vs. Data Size')
    plt.tight_layout()
    
    # Save the plot
    plot_output_dir = Path('results/baseline_analysis_plots')
    plot_output_dir.mkdir(parents=True, exist_ok=True)
    plot_path_combined = plot_output_dir / 'cflp_execution_time_and_parameter_count_overlay.png'
    plt.savefig(plot_path_combined)
    plt.close()
    print(f"Overlay plot saved to: {plot_path_combined}")
else:
    print("Not enough CFLP data collected for overlay plotting.")




--- Plotting CFLP Execution Times ---
Plot saved to: results/baseline_analysis_plots/cflp_execution_time_vs_data_size.png

--- Plotting CFLP Parameter Counts ---
Parameter count plot saved to: results/baseline_analysis_plots/cflp_parameter_count_vs_data_size.png

--- Plotting Overlay of Execution Time and Parameter Count (CFLP) ---
Overlay plot saved to: results/baseline_analysis_plots/cflp_execution_time_and_parameter_count_overlay.png


## VRP

In [7]:
# --- Plotting VRP Execution Times ---
print("\n--- Plotting VRP Execution Times ---")
if not df_vrp_benchmark.empty:
    plt.figure(figsize=(10, 6))
    
    # Plot the line connecting all points
    plt.plot(df_vrp_benchmark['size'], df_vrp_benchmark['execution_time'], color='blue', linestyle='-')

    # Plot each point as a scatter plot with its label for the legend
    scatter_handles_vrp = []
    for i, row in df_vrp_benchmark.iterrows():
        scatter_handle_vrp = plt.scatter(row['size'], row['execution_time'], marker='o', label=row['config_str'], color='blue')
        scatter_handles_vrp.append(scatter_handle_vrp)

    plt.title('VRP Model Execution Time vs. Number of Customers')
    plt.xlabel('Number of Customers (C)')
    plt.ylabel('PULP Model Execution Time (seconds)')
    plt.grid(True)
    plt.xscale('log') # Re-adding log scale for x-axis
    plt.xticks(df_vrp_benchmark['size'], [str(s) for s in df_vrp_benchmark['size']]) # Ensure all sizes are shown as ticks
    plt.legend(handles=scatter_handles_vrp, title='Data Configuration', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    
    plot_output_dir = Path('results/baseline_analysis_plots')
    plot_output_dir.mkdir(parents=True, exist_ok=True)
    plot_path = plot_output_dir / 'vrp_execution_time_vs_data_size.png'
    plt.savefig(plot_path)
    plt.close()
    print(f"Plot saved to: {plot_path}")
else:
    print("No valid VRP execution time data to plot.")

# --- Plotting VRP Parameter Counts ---
print("\n--- Plotting VRP Parameter Counts ---")
if not df_vrp_benchmark.empty:
    plt.figure(figsize=(10, 6))
    
    # Plot the line connecting all points
    plt.plot(df_vrp_benchmark['size'], df_vrp_benchmark['parameter_count'], color='blue', linestyle='-')

    # Plot each point as a scatter plot with its label for the legend
    scatter_handles_vrp_params = []
    for i, row in df_vrp_benchmark.iterrows():
        scatter_handle_vrp_params = plt.scatter(row['size'], row['parameter_count'], marker='o', label=row['config_str'], color='blue')
        scatter_handles_vrp_params.append(scatter_handle_vrp_params)

    plt.title('VRP Model Parameter Count vs. Number of Customers')
    plt.xlabel('Number of Customers (C)')
    plt.ylabel('Total Parameter Count (2C + 2V + (C+1)^2)')
    plt.grid(True)
    plt.xscale('log') # Re-adding log scale for x-axis
    plt.xticks(df_vrp_benchmark['size'], [str(s) for s in df_vrp_benchmark['size']]) # Ensure all sizes are shown as ticks
    plt.legend(handles=scatter_handles_vrp_params, title='Data Configuration', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    
    plot_output_dir = Path('results/baseline_analysis_plots')
    plot_output_dir.mkdir(parents=True, exist_ok=True)
    plot_path_params = plot_output_dir / 'vrp_parameter_count_vs_data_size.png'
    plt.savefig(plot_path_params)
    plt.close()
    print(f"Parameter count plot saved to: {plot_path_params}")
else:
    print("No valid VRP parameter count data to plot.")

# --- Overlay Plot: Execution Time and Parameter Count (VRP) ---
print("\n--- Plotting Overlay of Execution Time and Parameter Count (VRP) ---")
if not df_vrp_benchmark.empty:
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Execution Time
    sns.lineplot(data=df_vrp_benchmark, x='size', y='execution_time', marker='o', ax=ax1, color='blue', label='Execution Time')
    ax1.set_xlabel('Number of Customers (C)')
    ax1.set_ylabel('PULP Model Execution Time (seconds)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.grid(True, which="both", ls="-", alpha=0.7)
    ax1.set_xscale('log') # Re-adding log scale for x-axis
    ax1.set_xticks(df_vrp_benchmark['size'])
    ax1.set_xticklabels([str(s) for s in df_vrp_benchmark['size']])

    # Create a second y-axis for Parameter Count
    ax2 = ax1.twinx()
    sns.lineplot(data=df_vrp_benchmark, x='size', y='parameter_count', marker='x', ax=ax2, color='red', label='Parameter Count')
    ax2.set_ylabel('Total Parameter Count (2C + 2V + (C+1)^2)', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    ax2.set_yscale('log') # Re-adding log scale for y-axis for parameter count in overlay

    # Combine legends
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='upper left')

    plt.title('VRP Model Performance: Execution Time & Parameter Count vs. Data Size')
    plt.tight_layout()
    
    # Save the plot
    plot_output_dir = Path('results/baseline_analysis_plots')
    plot_output_dir.mkdir(parents=True, exist_ok=True)
    plot_path_combined = plot_output_dir / 'vrp_execution_time_and_parameter_count_overlay.png'
    plt.savefig(plot_path_combined)
    plt.close()
    print(f"Overlay plot saved to: {plot_path_combined}")
else:
    print("Not enough VRP data collected for overlay plotting.")



--- Plotting VRP Execution Times ---
Plot saved to: results/baseline_analysis_plots/vrp_execution_time_vs_data_size.png

--- Plotting VRP Parameter Counts ---
Parameter count plot saved to: results/baseline_analysis_plots/vrp_parameter_count_vs_data_size.png

--- Plotting Overlay of Execution Time and Parameter Count (VRP) ---
Overlay plot saved to: results/baseline_analysis_plots/vrp_execution_time_and_parameter_count_overlay.png
