<a href="https://colab.research.google.com/github/Nimchumba/DATA-SCIENCE-ASSIGNMENT-2/blob/main/Data_Science_Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# ==========================
# LINEAR REGRESSION MODEL FOR ROAD ACCIDENT ANALYSIS
# ==========================


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib


np.random.seed(42)
n = 100

data = pd.DataFrame({
    'Vehicle_Speed': np.random.randint(30, 140, n),
    'Weather_Condition': np.random.choice([1, 2, 3], n),   # 1=Clear, 2=Rainy, 3=Foggy
    'Road_Surface': np.random.choice([1, 2, 3], n),        # 1=Dry, 2=Wet, 3=Icy
    'Light_Condition': np.random.choice([1, 2, 3], n),     # 1=Daylight, 2=Night with lights, 3=Night no lights
    'Driver_Age': np.random.randint(18, 65, n),
    'Vehicle_Age': np.random.randint(1, 15, n)
})

# Simulate severity (0=Minor, 1=Serious, 2=Fatal)
# Severity increases with higher speed, bad weather, poor lighting, and older vehicle
data['Accident_Severity'] = (
    0.01 * data['Vehicle_Speed'] +
    0.3 * data['Weather_Condition'] +
    0.25 * data['Road_Surface'] +
    0.2 * data['Light_Condition'] +
    0.005 * data['Driver_Age'] +
    0.02 * data['Vehicle_Age'] +
    np.random.normal(0, 0.2, n)
)

# Normalize severity between 0 and 3 for realism
data['Accident_Severity'] = np.clip(data['Accident_Severity'], 0, 3)


data.to_csv('road_accidents.csv', index=False)
print("✅ Dataset 'road_accidents.csv' created successfully!")


X = data[['Vehicle_Speed', 'Weather_Condition', 'Road_Surface', 'Light_Condition', 'Driver_Age', 'Vehicle_Age']]
y = data['Accident_Severity']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = LinearRegression()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
print("\nModel Evaluation:")
print("Mean Squared Error:", round(mean_squared_error(y_test, y_pred), 3))
print("R² Score:", round(r2_score(y_test, y_pred), 3))


joblib.dump(model, 'accident_severity_model.pkl')
print("\n✅ Model saved as 'accident_severity_model.pkl'")


sample = pd.DataFrame([[100, 2, 2, 3, 25, 8]],
                      columns=['Vehicle_Speed', 'Weather_Condition', 'Road_Surface', 'Light_Condition', 'Driver_Age', 'Vehicle_Age'])

prediction = model.predict(sample)
print("\nExample Prediction for sample:")
print(sample)
print("Predicted Accident Severity:", round(prediction[0], 2))


✅ Dataset 'road_accidents.csv' created successfully!

Model Evaluation:
Mean Squared Error: 0.035
R² Score: 0.623

✅ Model saved as 'accident_severity_model.pkl'

Example Prediction for sample:
   Vehicle_Speed  Weather_Condition  Road_Surface  Light_Condition  \
0            100                  2             2                3   

   Driver_Age  Vehicle_Age  
0          25            8  
Predicted Accident Severity: 2.86
