In [None]:
# Synthetic Telemetry Data Generator
#
# Purpose:
# --------
# This notebook generates realistic, fully synthetic vehicle telemetry data
# for use in downstream analytics pipelines.
#
# Key characteristics:
# - Generic and vendor-neutral
# - No proprietary formats or schemas
# - Physically plausible signal behavior
# - Multiple vehicles and trips
#
# IMPORTANT:
# ----------
# This notebook is intended to be run ONCE.
# Generated CSV outputs are consumed by the analytics pipeline.
# This notebook itself is NOT referenced by downstream processing.

In [None]:
# Cell 1: Imports, configuration, and project paths
# --------------------------------------------------
# Purpose:
# - Define global constants
# - Set project directory structure
# - Configure telemetry signal ranges
# - Ensure reproducibility

import numpy as np
import pandas as pd
from pathlib import Path
from datetime import timedelta

# Reproducibility
np.random.seed(42)

# Project root (generic, public-safe)
PROJECT_ROOT = Path(
    r"YOUR FOLDER PATH"
)

DATA_DIR = PROJECT_ROOT / "data"
INCOMING_DIR = DATA_DIR / "incoming_telemetry"

INCOMING_DIR.mkdir(parents=True, exist_ok=True)

print("Project root:", PROJECT_ROOT)
print("Telemetry output directory:", INCOMING_DIR)

# ------------------------------------------------------------------
# Telemetry signal ranges (generic vehicle system)
# ------------------------------------------------------------------
RPM_RANGE = (800, 6000)            # engine speed
TORQUE_RANGE = (-180, 200)         # load / torque (Nm)
GRADE_RANGE = (-20, 30)            # environmental grade (%)


In [None]:
# Cell 2: Define vehicle fleet and operating scenarios
# ----------------------------------------------------
# Purpose:
# - Define multiple vehicles
# - Define operating scenarios with distinct behavior
# - Keep naming generic and reusable

VEHICLES = [f"Vehicle_{i:02d}" for i in range(1, 6)]

SCENARIOS = {
    "highway": {
        "rpm_mean": 1800,
        "rpm_std": 300,
        "torque_mean": 60,
        "grade_bias": 2
    },
    "load_carrier": {
        "rpm_mean": 2200,
        "rpm_std": 400,
        "torque_mean": 140,
        "grade_bias": 8
    },
    "metal_carrier": {
        "rpm_mean": 2000,
        "rpm_std": 350,
        "torque_mean": 120,
        "grade_bias": 12
    },
    "wood_carrier": {
        "rpm_mean": 1600,
        "rpm_std": 250,
        "torque_mean": 80,
        "grade_bias": 5
    }
}

print("Vehicles:", VEHICLES)
print("Scenarios:", list(SCENARIOS.keys()))


In [None]:
# Cell 3: Generate a single synthetic telemetry trip
# --------------------------------------------------
# Purpose:
# - Create realistic time-series telemetry
# - Maintain signal correlations
# - Produce analytics-ready data

def generate_trip(
    vehicle_id,
    scenario,
    trip_id,
    duration_minutes=25,
    sample_rate_hz=1
):
    n_samples = duration_minutes * 60 * sample_rate_hz
    timestamps = pd.date_range(
        start=pd.Timestamp.now(),
        periods=n_samples,
        freq=f"{int(1000/sample_rate_hz)}ms"
    )

    profile = SCENARIOS[scenario]

    # Engine speed
    engine_rpm = np.clip(
        np.random.normal(profile["rpm_mean"], profile["rpm_std"], n_samples),
        *RPM_RANGE
    )

    # Torque/load (correlated with RPM)
    torque_nm = np.clip(
        np.random.normal(profile["torque_mean"], 40, n_samples)
        + 0.015 * (engine_rpm - profile["rpm_mean"]),
        *TORQUE_RANGE
    )

    # Vehicle speed (derived)
    speed_kmph = np.clip(engine_rpm / 40 + np.random.normal(0, 5, n_samples), 0, 120)

    # Environmental grade
    grade_pct = np.clip(
        np.random.normal(profile["grade_bias"], 6, n_samples),
        *GRADE_RANGE
    )

    # Distance integration
    dt = 1 / sample_rate_hz
    distance_m = np.cumsum(speed_kmph * 1000 / 3600 * dt)

    # Thermal behavior
    oil_temp_c = 75 + 0.015 * engine_rpm + np.random.normal(0, 2, n_samples)
    coolant_temp_c = 65 + 0.01 * engine_rpm + np.random.normal(0, 1.5, n_samples)

    # Actuation & state signals
    current_state = np.random.randint(1, 12, n_samples)
    target_state = np.clip(
        current_state + np.random.choice([-1, 0, 1], n_samples, p=[0.1, 0.8, 0.1]),
        1,
        12
    )

    actuator_state = np.random.choice([0, 1], n_samples, p=[0.15, 0.85])

    # Vibration (low + high frequency mix)
    t = np.arange(n_samples) * dt
    vibration_ax_g = (
        0.05 * np.sin(2 * np.pi * 5 * t)
        + 0.02 * np.sin(2 * np.pi * 18 * t)
        + np.random.normal(0, 0.02, n_samples)
    )

    df = pd.DataFrame({
        "timestamp": timestamps,
        "vehicle_id": vehicle_id,
        "scenario": scenario,
        "trip_id": f"{vehicle_id}_trip_{trip_id:03d}",

        "engine_rpm": engine_rpm,
        "torque_nm": torque_nm,
        "speed_kmph": speed_kmph,
        "grade_pct": grade_pct,
        "distance_m": distance_m,

        "oil_temp_c": oil_temp_c,
        "coolant_temp_c": coolant_temp_c,

        "current_gear": current_state,
        "selected_gear": target_state,
        "clutch_state": actuator_state,

        "vibration_ax_g": vibration_ax_g
    })

    return df


In [None]:
# Cell 4: Batch generation for all vehicles and scenarios
# ------------------------------------------------------
# Purpose:
# - Generate multiple trips per vehicle
# - Cover multiple operating scenarios
# - Write CSVs to incoming_telemetry directory

TRIPS_PER_SCENARIO = 8

file_count = 0

for vehicle in VEHICLES:
    for scenario in SCENARIOS:
        for trip in range(TRIPS_PER_SCENARIO):
            df_trip = generate_trip(
                vehicle_id=vehicle,
                scenario=scenario,
                trip_id=trip,
                duration_minutes=25,
                sample_rate_hz=1
            )

            out_file = INCOMING_DIR / f"{vehicle}_{scenario}_trip_{trip:03d}.csv"
            df_trip.to_csv(out_file, index=False)
            file_count += 1

            print("Generated:", out_file.name)

print(f"\nSynthetic telemetry generation complete.")
print(f"Total files written: {file_count}")


In [None]:
# Cell 5: Sanity checks and preview
# --------------------------------
# Purpose:
# - Verify file creation
# - Inspect signal ranges
# - Ensure downstream compatibility

sample_files = list(INCOMING_DIR.glob("*.csv"))[:3]

for f in sample_files:
    df = pd.read_csv(f)
    print("\nFile:", f.name)
    print(df[[
        "engine_rpm",
        "torque_nm",
        "speed_kmph",
        "grade_pct",
        "oil_temp_c",
        "coolant_temp_c"
    ]].describe())
