# VRP Benchmark Analysis

In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add project root to sys.path to import common_analysis_functions
root_dir = Path(__file__).resolve().parents[3] # Adjust path as needed
sys.path.insert(0, str(root_dir))

from notebooks.analysis.common_analysis_functions import load_benchmark_data, parse_json_columns, filter_optimal_solutions, plot_objective_distribution, plot_solve_time_distribution

# Configuration
VRP_BENCHMARK_FILE = 'benchmark/VRP/vrp_benchmark_10cust_2veh.csv'
OUTPUT_PLOTS_DIR = 'results/vrp_analysis_plots'
Path(OUTPUT_PLOTS_DIR).mkdir(parents=True, exist_ok=True)

## 1. Load and Preprocess Data

In [None]:
vrp_df = load_benchmark_data(VRP_BENCHMARK_FILE)
vrp_df = parse_json_columns(vrp_df, ['parameters', 'constraints', 'variables', 'modification'])

print("\nDataFrame Info:")
vrp_df.info()
print("\nFirst 5 rows:")
print(vrp_df.head())

## 2. Overall Performance Analysis

In [None]:
# Filter for optimal solutions for objective/solve time analysis
optimal_vrp_df = filter_optimal_solutions(vrp_df)

if not optimal_vrp_df.empty:
    print("\nOptimal Solutions Summary:")
    print(optimal_vrp_df[['objective_value', 'pulp_model_execution_time']].describe())

    plot_objective_distribution(optimal_vrp_df, title='VRP Objective Value Distribution (Optimal Solutions)',
                                save_path=Path(OUTPUT_PLOTS_DIR) / 'vrp_objective_distribution.png')
    plot_solve_time_distribution(optimal_vrp_df, title='VRP Solve Time Distribution (Optimal Solutions)',
                                 save_path=Path(OUTPUT_PLOTS_DIR) / 'vrp_solve_time_distribution.png')
else:
    print("No optimal solutions found in the benchmark data for overall performance analysis.")

## 3. Scenario-Specific Analysis

In [None]:
# Example: Analyze impact of 'demand-increase-all' scenarios
demand_increase_df = vrp_df[vrp_df['scenario_type'] == 'demand-increase-all'].copy()
demand_increase_df['demand_pct_increase'] = demand_increase_df['modification'].apply(lambda x: x['ADD DATA'].split('/')[-1].replace(')', '') if isinstance(x, dict) and 'ADD DATA' in x else None).astype(float)

if not demand_increase_df.empty:
    plt.figure(figsize=(12, 7))
    sns.lineplot(data=demand_increase_df, x='demand_pct_increase', y='objective_value', marker='o')
    plt.title('Impact of Overall Demand Increase on Objective Value')
    plt.xlabel('Percentage Increase in All Demands')
    plt.ylabel('Objective Value')
    plt.grid(True)
    plt.savefig(Path(OUTPUT_PLOTS_DIR) / 'vrp_demand_increase_impact.png')
    plt.close()
else:
    print("No 'demand-increase-all' scenarios found.")

## 4. Model-Specific Analysis: Route Analysis (Example)

In [None]:
def extract_routes(variables_dict, num_nodes):
    routes = []
    # Assuming 'x_i_j_k' variables for flow from i to j by vehicle k
    # This is a simplified example and might need adjustment based on actual VRP model variable naming
    
    # For simplicity, let's just count active arcs for now
    active_arcs = []
    for var_name, var_value in variables_dict.items():
        if var_name.startswith('x_') and var_value > 0.5:
            try:
                parts = var_name.split('_')
                i = int(parts[1])
                j = int(parts[2])
                # k = int(parts[3]) # Vehicle index if needed
                active_arcs.append((i, j))
            except (ValueError, IndexError):
                continue
    return active_arcs

# Example usage (requires 'variables' column to be parsed JSON)
# This part needs actual VRP model variable structure to be precise
# For now, just demonstrate how to access and potentially process 'variables'
print("\nExample: Accessing variables for route analysis (requires specific VRP model variable structure):")
if not optimal_vrp_df.empty:
    sample_run = optimal_vrp_df.iloc[0]
    if 'variables' in sample_run and isinstance(sample_run['variables'], dict):
        # Assuming 'num_nodes' can be derived from parameters or data
        # For VRP, num_nodes is len(data['distance'])
        # You would need to pass the original data or derive num_nodes from parameters
        # For this example, let's assume a fixed number of nodes for demonstration
        num_nodes_example = 10 # Placeholder, replace with actual logic
        # active_arcs = extract_routes(sample_run['variables'], num_nodes_example)
        # print(f"Active arcs in a sample optimal run: {active_arcs[:5]}...")
        print("Variables column contains dictionary. Further VRP-specific route analysis can be implemented here.")
    else:
        print("Variables column is not in expected dictionary format or not present.")
else:
    print("No optimal VRP solutions to analyze routes.")

## 5. Conclusion

In [None]:
print("VRP benchmark analysis complete. Plots saved to:", OUTPUT_PLOTS_DIR)