In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
def generate_vrptw_dataset(
    n_customers=40,
    n_vehicles=5,
    vehicle_capacity=150,
    base_area=100,
    avg_speed=1.0,
    target_utilization=0.65,
    seed=42
):
    
    area_size = base_area + n_customers
    depot_due = 480 + (n_customers / 50) * 120
    
    depot = {
        'id': 0,
        'x': area_size / 2,
        'y': area_size / 2,
        'demand': 0,
        'ready_time': 0,
        'due_time': depot_due,
        'service_time': 0
    }
    
    customers = []
    for i in range(1, n_customers + 1):
        x = np.random.uniform(5, area_size - 5)
        y = np.random.uniform(5, area_size - 5)
        demand = np.random.randint(5, 26)
        service_time = np.random.randint(10, 21)
        
        dist = np.sqrt((x - depot['x'])**2 + (y - depot['y'])**2)
        travel_time = dist / avg_speed
        
        ready_time = np.random.randint(travel_time, min(depot_due - 120, travel_time + 180))
        window_width = np.random.randint(180, 300) + int(n_customers / 50) * 30
        due_time = min(ready_time + window_width, depot_due)
        
        customers.append({
            'id': i,
            'x': x,
            'y': y,
            'demand': demand,
            'ready_time': ready_time,
            'due_time': due_time,
            'service_time': service_time
        })
    
    df = pd.DataFrame([depot] + customers)
    
    total_demand = df['demand'].sum()
    
    required_fleet_capacity = total_demand / target_utilization
    required_vehicles = np.ceil(required_fleet_capacity / vehicle_capacity)
    
    if required_vehicles > n_vehicles:
        print(f"\n⚙️ Adjusting vehicles from {n_vehicles} → {int(required_vehicles)} to achieve ~{target_utilization*100:.0f}% utilization.")
        n_vehicles = int(required_vehicles)
    
    vehicle_info = {
        'n_vehicles': int(n_vehicles),
        'vehicle_capacity': vehicle_capacity
    }
    
    fleet_capacity = n_vehicles * vehicle_capacity
    utilization = total_demand / fleet_capacity * 100
    
    avg_demand = df[df['id'] > 0]['demand'].mean()
    avg_window = (df['due_time'] - df['ready_time']).mean()
    
    morning_count = sum(1 for c in customers if c['ready_time'] < 120)
    midday_count = sum(1 for c in customers if 120 <= c['ready_time'] < 240)
    afternoon_count = sum(1 for c in customers if c['ready_time'] >= 240)
    
    tight_windows = sum(1 for c in customers if c['due_time'] - c['ready_time'] < 120)
    moderate_windows = sum(1 for c in customers if 120 <= c['due_time'] - c['ready_time'] < 180)
    flexible_windows = sum(1 for c in customers if c['due_time'] - c['ready_time'] >= 180)
    
    print("\n Dataset Statistics:")
    print(f"  Customers: {n_customers}")
    print(f"  Vehicles: {n_vehicles}")
    print(f"  Vehicle capacity: {vehicle_capacity}")
    print(f"  Total demand: {total_demand:.1f}")
    print(f"  Fleet capacity: {fleet_capacity}")
    print(f"  Utilization: {utilization:.1f}% (target: {target_utilization*100:.0f}%)")
    print(f"  Average demand per customer: {avg_demand:.1f}")
    print(f"  Average time window width: {avg_window:.1f} minutes")
    print(f"  Depot operating window: 0–{int(depot_due)} min ({int(depot_due/60):.1f} hours)")
    
    print(f"\n  Time of day distribution:")
    print(f"    Morning (0–2h): {morning_count}")
    print(f"    Midday (2–4h): {midday_count}")
    print(f"    Afternoon (4h+): {afternoon_count}")
    
    print(f"\n  Window flexibility:")
    print(f"    Tight (<2h): {tight_windows}")
    print(f"    Moderate (2–3h): {moderate_windows}")
    print(f"    Flexible (≥3h): {flexible_windows}")
    
    return df, vehicle_info


In [8]:
def visualize_dataset(df, filename='dataset_visualization.png'):
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    ax = axes[0]
    depot = df[df['id'] == 0].iloc[0]
    ax.scatter(depot['x'], depot['y'], c='red', s=300, marker='s', 
               label='Depot', zorder=5, edgecolors='black', linewidths=2)
    customers = df[df['id'] > 0]
    scatter = ax.scatter(customers['x'], customers['y'], 
                        c=customers['demand'], cmap='viridis', 
                        s=100, alpha=0.7, edgecolors='black', linewidths=0.5)
    for idx, row in customers.iterrows():
        ax.annotate(str(row['id']), (row['x'], row['y']), 
                   fontsize=7, ha='center', va='center')
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('Customer Locations (colored by demand)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    plt.colorbar(scatter, ax=ax, label='Demand')
    ax = axes[1]
    customers = df[df['id'] > 0].sort_values('ready_time')
    y_pos = range(len(customers))
    for i, (idx, row) in enumerate(customers.iterrows()):
        ax.barh(i, row['due_time'] - row['ready_time'], 
               left=row['ready_time'], height=0.8, 
               alpha=0.6, color='skyblue', edgecolor='black', linewidth=0.5)
        ax.scatter(row['ready_time'] + (row['due_time'] - row['ready_time'])/2, 
                  i, c='red', s=30, zorder=5)
    ax.set_yticks(y_pos)
    ax.set_yticklabels(customers['id'].tolist())
    ax.set_xlabel('Time (minutes)')
    ax.set_ylabel('Customer ID')
    ax.set_title('Time Windows (red dot = mid-point)')
    ax.grid(True, alpha=0.3, axis='x')
    ax.axvline(x=480, color='red', linestyle='--', alpha=0.5, label='End of day')
    ax.legend()
    plt.tight_layout()
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    print(f"\n Visualization saved to {filename}")
    plt.show()

In [9]:
def generate_multiple_datasets(basic_customers=40,
      basic_vehicles=5,
      intermediate_customers=100,
      intermediate_vehicles=10,
      advanced_customers=250,
      advanced_vehicles=25):
    datasets = {}
    print("Basic dataset: \n")
    df_easy, info_easy = generate_vrptw_dataset(
        n_customers=basic_customers,
        n_vehicles=basic_vehicles,
        vehicle_capacity=200,
        seed=42
    )
    df_easy.to_csv('vrptw_easy.csv', index=False)
    pd.DataFrame([info_easy]).to_csv('vrptw_easy_info.csv', index=False)
    visualize_dataset(df_easy, 'vrptw_easy_visualization.png')
    datasets['easy'] = (df_easy, info_easy)
    
    print("Intermediate dataset: \n")
    df_medium, info_medium = generate_vrptw_dataset(
        n_customers=intermediate_customers,
        n_vehicles=intermediate_vehicles,
        vehicle_capacity=150,
        seed=123
    )
    df_medium.to_csv('vrptw_medium.csv', index=False)
    pd.DataFrame([info_medium]).to_csv('vrptw_medium_info.csv', index=False)
    visualize_dataset(df_medium, 'vrptw_medium_visualization.png')
    datasets['medium'] = (df_medium, info_medium)
    
    print("Advanced dataset: \n")
    df_hard, info_hard = generate_vrptw_dataset(
        n_customers=advanced_customers,
        n_vehicles=advanced_vehicles,
        vehicle_capacity=150,
        seed=456
    )
    df_hard.to_csv('vrptw_hard.csv', index=False)
    pd.DataFrame([info_hard]).to_csv('vrptw_hard_info.csv', index=False)
    visualize_dataset(df_hard, 'vrptw_hard_visualization.png')
    datasets['hard'] = (df_hard, info_hard)
    return datasets

