In [None]:
import osmnx as ox
import networkx as nx
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from shapely.geometry import Point, LineString

# Load road graph for Denver, Colorado
G = ox.graph_from_place("Denver, Colorado, USA", network_type="drive")

# Constants
NUM_TRUCKS = 100
PINGS_PER_TRUCK = 100

def haversine_distance(a, b):
    from geopy.distance import geodesic
    return geodesic(a, b).meters

def interpolate_path(coords, num_points):
    if len(coords) < 2:
        return [coords[0]] * num_points
    distances = [0]
    for i in range(1, len(coords)):
        d = haversine_distance(coords[i-1], coords[i])
        distances.append(distances[-1] + d)
    total_dist = distances[-1]
    if total_dist == 0:
        return [coords[0]] * num_points
    targets = np.linspace(0, total_dist, num_points)
    lats = [c[0] for c in coords]
    lons = [c[1] for c in coords]
    interp_lats = np.interp(targets, distances, lats)
    interp_lons = np.interp(targets, distances, lons)
    return list(zip(interp_lats, interp_lons))

def generate_route(G, start, end):
    try:
        orig = ox.nearest_nodes(G, start[1], start[0])
        dest = ox.nearest_nodes(G, end[1], end[0])
        route = nx.shortest_path(G, orig, dest, weight="length")
        return [(G.nodes[n]['y'], G.nodes[n]['x']) for n in route]
    except:
        return []

def simulate_truck(vin, truck_type):
    data = []
    total_pings = 0
    route_num = 1
    timestamp = datetime(2024, 1, 1, 8, 0, 0)
    anomaly_ratio = 0.02  # 2% anomalies per truck

    while total_pings < PINGS_PER_TRUCK:
        # Define start/end points based on truck type
        if truck_type == "fixed-route":
            start = (39.75, -105.00)  # Denver approx
            end = (39.78, -104.95)
        elif truck_type == "multi-route":
            start = random.choice([(39.75, -105.00), (39.74, -104.98)])
            end = random.choice([(39.78, -104.95), (39.79, -104.96)])
        else:  # variable-schedule
            start = (random.uniform(39.70, 39.85), random.uniform(-105.05, -104.90))
            end = (random.uniform(39.70, 39.85), random.uniform(-105.05, -104.90))

        full_route = generate_route(G, start, end)
        if not full_route:
            continue

        remaining_pings = PINGS_PER_TRUCK - total_pings
        num_points = min(random.randint(20, 40), remaining_pings)
        interp_coords = interpolate_path(full_route, num_points)

        if len(interp_coords) < 2:
            continue

        # Calculate how many anomalies for this batch

        available_indices = list(range(1, num_points - 1))
        num_anomalies = min(len(available_indices), max(1, int(num_points * anomaly_ratio)))
        anomaly_indices = random.sample(available_indices, k=num_anomalies) if available_indices else []

        for i, (lat, lon) in enumerate(interp_coords):
            is_anomaly = i in anomaly_indices
            deviation_type = "none"
            if is_anomaly:
                deviation_type = random.choice(["route-deviation", "stop-anomaly", "speed-anomaly"])
                if deviation_type == "route-deviation":
                    # Offset location by ~0.03-0.05 deg (~3-5km)
                    lat += random.uniform(0.03, 0.05) * random.choice([-1, 1])
                    lon += random.uniform(0.03, 0.05) * random.choice([-1, 1])
                elif deviation_type == "stop-anomaly":
                    timestamp += timedelta(minutes=random.randint(5, 10))
                elif deviation_type == "speed-anomaly":
                    timestamp -= timedelta(seconds=random.randint(10, 40))

            record = {
                "vin": vin,
                "timestamp": timestamp.isoformat(),
                "latitude": lat,
                "longitude": lon,
                "truck_type": truck_type,
                "route_id": f"R_{vin}_{route_num}",
                "is_anomaly": is_anomaly,
                "deviation_type": deviation_type
            }
            data.append(record)
            timestamp += timedelta(seconds=20)

        total_pings += num_points
        route_num += 1
        timestamp += timedelta(minutes=5)

    return data

# Simulate data for trucks
all_records = []
for i in range(1, NUM_TRUCKS + 1):
    vin = f"TRUCK_{i:03d}"
    truck_type = random.choice(["fixed-route", "multi-route", "variable-schedule"])
    records = simulate_truck(vin, truck_type)
    all_records.extend(records)

df = pd.DataFrame(all_records)
df.to_csv("telematics_simulation_denver.csv", index=False)
print("Simulation complete and saved to telematics_simulation_denver.csv")
