<a href="https://colab.research.google.com/github/Sharo-aiml/Sharo/blob/main/Untitled19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Synthetic data generator
def generate_data(samples=500):
    data = []
    for _ in range(samples):
        weather = random.choice(['sunny', 'rainy', 'foggy'])
        road = random.choice(['highway', 'city', 'rural'])
        light = random.choice(['day', 'night-lit', 'night-dark'])
        speed = random.randint(30, 120)
        traffic = random.random()

        # Simple risk logic
        risk = 0.05
        if weather != 'sunny': risk *= 2
        if road == 'city': risk *= 1.5
        if light == 'night-dark': risk *= 3
        if traffic > 0.7: risk *= 2

        accident = 1 if random.random() < risk else 0
        data.append([[weather, road, light, speed, traffic], accident])
    return data

# Convert text categories to numbers
def prepare_features(data):
    X, y = [], []
    for features, label in data:
        weather_num = ['sunny', 'rainy', 'foggy'].index(features[0])
        road_num = ['highway', 'city', 'rural'].index(features[1])
        light_num = ['day', 'night-lit', 'night-dark'].index(features[2])
        X.append([weather_num, road_num, light_num, features[3], features[4]])
        y.append(label)
    return X, y

# Generate and prepare data
data = generate_data(1000) # Larger dataset for better stats
X, y = prepare_features(data)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Prediction function
def predict_accident(weather, road, light, speed, traffic):
    w = ['sunny', 'rainy', 'foggy'].index(weather)
    r = ['highway', 'city', 'rural'].index(road)
    l = ['day', 'night-lit', 'night-dark'].index(light)
    prob = model.predict_proba([[w, r, l, speed, traffic]])[0][1]
    return f"{prob:.0%} accident risk"

# Example usage
print("\nExample Predictions:")
print(predict_accident('rainy', 'city', 'night-dark', 80, 0.8)) # High risk
print(predict_accident('sunny', 'highway', 'day', 80, 0.8))

# Statistical Analysis and Visualizations
print("\n=== Data Statistics ===")
accident_rate = sum(y) / len(y)
print(f"Overall accident rate in dataset: {accident_rate:.2%}")

# Feature distributions
features = np.array(X)
plt.figure(figsize=(15, 10))

# Weather distribution
plt.subplot(2, 3, 1)
weather_labels = ['sunny', 'rainy', 'foggy']
weather_counts = [sum(features[:,0] == i) for i in range(3)]
plt.bar(weather_labels, weather_counts)
plt.title('count')
plt.show()
