In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os

# Set random seeds
np.random.seed(42)
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x21f1759bcd0>

In [2]:
NUM_RECORDS = 10000

service_centers = [
    {'id': 'SC001', 'name': 'Downtown Center', 'lat': 22.7196, 'lon': 75.8577, 'capacity': 15, 'avg_technicians': 8},
    {'id': 'SC002', 'name': 'North Hub', 'lat': 22.7532, 'lon': 75.8937, 'capacity': 20, 'avg_technicians': 12},
    {'id': 'SC003', 'name': 'South Station', 'lat': 22.6843, 'lon': 75.8734, 'capacity': 12, 'avg_technicians': 6},
    {'id': 'SC004', 'name': 'East Point', 'lat': 22.7289, 'lon': 75.9125, 'capacity': 18, 'avg_technicians': 10},
    {'id': 'SC005', 'name': 'West Zone', 'lat': 22.7156, 'lon': 75.8234, 'capacity': 10, 'avg_technicians': 5}
]

service_types = [
    {'type': 'Oil Change', 'duration': 30, 'urgency_weight': 0.3},
    {'type': 'Brake Service', 'duration': 60, 'urgency_weight': 0.7},
    {'type': 'Tire Replacement', 'duration': 45, 'urgency_weight': 0.5},
    {'type': 'Engine Diagnostics', 'duration': 90, 'urgency_weight': 0.8},
    {'type': 'Transmission Repair', 'duration': 180, 'urgency_weight': 0.9},
    {'type': 'Battery Replacement', 'duration': 20, 'urgency_weight': 0.6},
    {'type': 'AC Repair', 'duration': 75, 'urgency_weight': 0.4},
    {'type': 'Suspension Work', 'duration': 120, 'urgency_weight': 0.7},
    {'type': 'Electrical Issues', 'duration': 90, 'urgency_weight': 0.75},
    {'type': 'General Inspection', 'duration': 30, 'urgency_weight': 0.2}
]

def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371
    lat1_rad = np.radians(lat1)
    lat2_rad = np.radians(lat2)
    delta_lat = np.radians(lat2 - lat1)
    delta_lon = np.radians(lon2 - lon1)
    a = np.sin(delta_lat/2)**2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(delta_lon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c

def calculate_urgency_score(service_type, vehicle_age, last_service_days, issue_severity):
    base_urgency = service_type['urgency_weight'] * 100
    age_factor = min(vehicle_age / 15 * 20, 20)
    service_factor = min(last_service_days / 365 * 25, 25)
    severity_factor = issue_severity
    total_urgency = base_urgency + age_factor + service_factor + severity_factor
    total_urgency += np.random.uniform(-5, 5)
    return min(max(total_urgency, 0), 100)

def calculate_traffic_factor(hour, day_of_week):
    if (8 <= hour <= 10) or (17 <= hour <= 19):
        peak_factor = 1.5
    elif (11 <= hour <= 16):
        peak_factor = 1.2
    else:
        peak_factor = 1.0
    day_factor = 1.3 if day_of_week < 5 else 1.0
    return peak_factor * day_factor

def generate_appointment_data(num_records):
    data = []
    start_date = datetime(2024, 1, 1)
    
    for i in range(num_records):
        days_offset = int(np.random.randint(0, 365))
        hour = int(np.random.randint(6, 20))
        minute = int(np.random.choice([0, 15, 30, 45]))
        timestamp = start_date + timedelta(days=days_offset, hours=hour, minutes=minute)
        
        customer_lat = np.random.uniform(22.65, 22.80)
        customer_lon = np.random.uniform(75.80, 75.95)
        vehicle_age = np.random.randint(0, 20)
        last_service_days = np.random.randint(30, 730)
        issue_severity = np.random.uniform(0, 35)
        service = random.choice(service_types)
        urgency_score = calculate_urgency_score(service, vehicle_age, last_service_days, issue_severity)
        
        distances = []
        travel_times = []
        traffic_factor = calculate_traffic_factor(timestamp.hour, timestamp.weekday())
        
        for sc in service_centers:
            distance = haversine_distance(customer_lat, customer_lon, sc['lat'], sc['lon'])
            distances.append(distance)
            avg_speed = 30
            travel_time = (distance / avg_speed) * 60 * traffic_factor
            travel_times.append(travel_time)
        
        sc_indices = np.argsort(distances)[:3]
        primary_sc = service_centers[sc_indices[0]]
        primary_distance = distances[sc_indices[0]]
        primary_travel_time = travel_times[sc_indices[0]]
        
        current_utilization = np.random.uniform(0.3, 0.95)
        available_slots = int(primary_sc['capacity'] * (1 - current_utilization))
        base_available_techs = primary_sc['avg_technicians']
        available_technicians = max(1, int(base_available_techs * (1 - current_utilization * 0.7)))
        queue_length = np.random.randint(0, int(primary_sc['capacity'] * 0.5))
        estimated_wait_time = queue_length * 15
        
        if urgency_score > 80:
            optimal_slot_offset = np.random.randint(0, 120)
        elif urgency_score > 60:
            optimal_slot_offset = np.random.randint(120, 480)
        else:
            optimal_slot_offset = np.random.randint(480, 2880)
        
        record = {
            'appointment_id': f'APT{i+1:06d}',
            'vehicle_urgency_score': round(urgency_score, 2),
            'distance_to_primary_center_km': round(primary_distance, 2),
            'estimated_travel_time_mins': round(primary_travel_time, 2),
            'estimated_service_duration_mins': service['duration'],
            'center_current_utilization_pct': round(current_utilization * 100, 2),
            'available_technicians': available_technicians,
            'queue_length': queue_length,
            'estimated_wait_time_mins': estimated_wait_time,
            'hour_of_day': timestamp.hour,
            'day_of_week': timestamp.weekday(),
            'service_center_id': sc_indices[0],
            'optimal_slot_offset_mins': optimal_slot_offset
        }
        data.append(record)
    
    return pd.DataFrame(data)

print("="*70)
print("APPOINTMENT SCHEDULING OPTIMIZATION WITH DEEP Q-NETWORK (DQN)")
print("="*70)


print("\n[1/5] Generating appointment scheduling dataset...")
df = generate_appointment_data(NUM_RECORDS)

os.makedirs("data", exist_ok=True)
df.to_csv("data/appointment_scheduling_data.csv", index=False)

print(f"✓ Generated {len(df)} records")



APPOINTMENT SCHEDULING OPTIMIZATION WITH DEEP Q-NETWORK (DQN)

[1/5] Generating appointment scheduling dataset...
✓ Generated 10000 records
