# VRP Raw Data Exploration

In [None]:
import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
VRP_DATA_FILE = 'models/VRP/data/vrp_data_10cust_2veh.json'

## 1. Load Raw Data

In [None]:
try:
    with open(VRP_DATA_FILE, 'r') as f:
        vrp_raw_data = json.load(f)
    print(f"Successfully loaded raw VRP data from {VRP_DATA_FILE}")
    print("Keys available:", vrp_raw_data.keys())
except FileNotFoundError:
    print(f"Error: Data file not found at {VRP_DATA_FILE}")
    vrp_raw_data = {}
except json.JSONDecodeError as e:
    print(f"Error decoding JSON from {VRP_DATA_FILE}: {e}")
    vrp_raw_data = {}

## 2. Explore Data Structure and Contents

In [None]:
if vrp_raw_data:
    print("\nDistance Matrix (first 3x3):")
    if 'distance' in vrp_raw_data:
        for i, row in enumerate(vrp_raw_data['distance']):
            if i < 3:
                print(row[:3])
            else:
                break
    
    print("\nDemands:", vrp_raw_data.get('demand'))
    print("\nVehicle Capacity:", vrp_raw_data.get('vehicle_capacity'))
    print("\nNumber of Vehicles:", vrp_raw_data.get('num_vehicles'))
    print("\nDepot Index:", vrp_raw_data.get('depot'))
    
    print(f"\nNumber of Nodes (Customers + Depot): {len(vrp_raw_data.get('distance', []))}")

## 3. Basic Data Visualizations

In [None]:
if vrp_raw_data:
    # Demands distribution
    if 'demand' in vrp_raw_data and vrp_raw_data['demand']:
        # Exclude depot demand if it's 0 or not relevant for distribution
        customer_demands = [d for i, d in enumerate(vrp_raw_data['demand']) if i != vrp_raw_data.get('depot', -1)]
        if customer_demands:
            plt.figure(figsize=(8, 5))
            sns.histplot(customer_demands, kde=True)
            plt.title('Distribution of Customer Demands')
            plt.xlabel('Demand Value')
            plt.ylabel('Frequency')
            plt.show()

    # Distance matrix heatmap (if not too large)
    if 'distance' in vrp_raw_data and vrp_raw_data['distance']:
        dist_df = pd.DataFrame(vrp_raw_data['distance'])
        if dist_df.shape[0] <= 20 and dist_df.shape[1] <= 20: # Limit for readability
            plt.figure(figsize=(10, 8))
            sns.heatmap(dist_df, annot=True, cmap='viridis', fmt=".1f")
            plt.title('Distance Matrix Heatmap')
            plt.xlabel('Node Index')
            plt.ylabel('Node Index')
            plt.show()
        else:
            print("Distance matrix too large for heatmap visualization.")