In [117]:
import pandas as pd
import numpy as np 
import random

In [119]:
def generate_synthetic_data(num_rows = 10000 , seed = 42):
    np.random.seed(seed)
    random.seed(seed)
    data = {
        "road_type" : np.random.choice(["highway","urban","rural"],num_rows),
        "num_lanes" : np.random.randint(1,5,num_rows),
        "curvature" : np.round(np.random.uniform(0.0,1.0,num_rows),2),
        "speed_limit" : np.random.choice([25,35,45,60,70] ,num_rows) ,
        "lighting" : np.random.choice(["daylight","night","dim"],num_rows),
        "weather" : np.random.choice(["clear","rainy","foggy"],num_rows),
        "road_signs_present" : np.random.choice([True ,False],num_rows),
        "public_road" : np.random.choice([True ,False],num_rows),
        "time_of_day" : np.random.choice(["morning","evening","aftenoon"],num_rows),
        "holiday" : np.random.choice([True,False],num_rows) ,
        "school_season" : np.random.choice([True,False],num_rows),
        "num_reported_accidents" : np.random.poisson(lam = 1.5 , size = num_rows)
    }
    base_risk = (
        0.3 * data["curvature"] + 
        0.2 * (data["lighting"] == "night").astype(int) +
        0.1 * (data["weather"] != "clear").astype(int) +
        0.2 * (data["speed_limit"] >= 60).astype(int) +
        0.1 * (np.array(data["num_reported_accidents"]) > 2).astype(int)
    )

    noise = np.random.normal(0,0.05,num_rows)
    risk_score = np.clip(base_risk+noise,0,1)
    data["accident_risk"] = np.round(risk_score,2)

    return pd.DataFrame(data)



In [121]:
df = generate_synthetic_data(num_rows = 2000)
df.to_csv("synthetic_road_accidents_2k.csv",index = False)
print("Data generated and Saved 1 ")

Data generated and Saved 1 


In [123]:
df = generate_synthetic_data(num_rows = 100000)
df.to_csv("synthetic_road_accidents_100k.csv",index = False)
print("Data generated and Saved 2 ")

Data generated and Saved 2 


In [125]:
df = generate_synthetic_data(num_rows = 10000)
df.to_csv("synthetic_road_accidents_10k.csv",index = False)
print("Data generated and Saved 2 ")

Data generated and Saved 2 
