In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# -------------------------------
# CONFIGURATION
# -------------------------------
start_date = datetime(2025, 10, 27)
end_date = datetime(2025, 10, 28)

time_slots = ["Morning", "Afternoon", "Evening", "Night"]

# Base congestion levels inspired by TomTom Bengaluru data
base_congestion = {
    "Morning": 50,      # heavy
    "Afternoon": 60,    # moderate-heavy
    "Evening": 75,      # peak
    "Night": 20         # low
}

# Usual congestion levels (reference average)
usual_congestion = {
    "Morning": 45,
    "Afternoon": 50,
    "Evening": 65,
    "Night": 25
}

# Base speeds (in km/h)
base_speed = {
    "Morning": 18,
    "Afternoon": 15,
    "Evening": 12,
    "Night": 30
}

# -------------------------------
# DATA GENERATION
# -------------------------------
rows = []
current_date = start_date

while current_date <= end_date:
    for slot in time_slots:
        # Add realistic fluctuations
        live_congestion = np.clip(
            np.random.normal(base_congestion[slot], 5), 5, 90
        )
        usual = usual_congestion[slot]
        live_speed = np.clip(
            np.random.normal(base_speed[slot], 2), 5, 40
        )
        delay = round(((100 / live_speed) * live_congestion / 100) * 2, 1)

        # Accident risk based on congestion + speed
        if live_congestion > 70 or live_speed < 12:
            risk = "High"
        elif live_congestion > 40:
            risk = "Medium"
        else:
            risk = "Low"

        rows.append({
            "Date": current_date.strftime("%Y-%m-%d"),
            "Time_Slot": slot,
            "Live_Congestion_Level(%)": round(live_congestion, 1),
            "Usual_Congestion_Level(%)": usual,
            "Live_Speed_kmph": round(live_speed, 1),
            "Delay_Minutes_per_10km": delay,
            "Accident_Risk_Level": risk
        })

    current_date += timedelta(days=1)

# -------------------------------
# SAVE TO CSV
# -------------------------------
df = pd.DataFrame(rows)
df.to_csv("../data/traffic_data.csv", index=False)
print("✅ traffic_data.csv created successfully!")
print(f"Rows: {len(df)}")
print(df.head())


✅ traffic_data.csv created successfully!
Rows: 8
         Date  Time_Slot  Live_Congestion_Level(%)  Usual_Congestion_Level(%)  \
0  2025-10-27    Morning                      57.7                         45   
1  2025-10-27  Afternoon                      54.3                         50   
2  2025-10-27    Evening                      73.1                         65   
3  2025-10-27      Night                      28.2                         25   
4  2025-10-28    Morning                      52.2                         45   

   Live_Speed_kmph  Delay_Minutes_per_10km Accident_Risk_Level  
0             15.3                     7.5              Medium  
1             13.0                     8.4              Medium  
2             12.0                    12.2                High  
3             31.3                     1.8                 Low  
4             14.9                     7.0              Medium  
