# Modeling Notebook
This notebook trains and evaluates machine learning models.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder

# Load cleaned dataset
df = pd.read_csv("../data/processed_accidents.csv")

# Drop missing values
df = df.dropna(subset=["Hour", "Latitude", "Longitude", "Accident Type"])

# Features and target
X = df[["Hour", "Latitude", "Longitude"]]
y_raw = df["Accident Type"]

# Label encoding
le = LabelEncoder()
y = le.fit_transform(y_raw)

# SMOTE resampling
smote = SMOTE(random_state=42, k_neighbors=2)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Models
rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf.predict(X_test))

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
log_acc = accuracy_score(y_test, log_reg.predict(X_test))

xgb = XGBClassifier(learning_rate=0.1, n_estimators=150, max_depth=6, random_state=42)
xgb.fit(X_train, y_train)
xgb_acc = accuracy_score(y_test, xgb.predict(X_test))

nn = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=300, random_state=42)
nn.fit(X_train, y_train)
nn_acc = accuracy_score(y_test, nn.predict(X_test))

ensemble = VotingClassifier(estimators=[
    ('rf', rf),
    ('xgb', xgb),
    ('log_reg', log_reg)
], voting='soft')
ensemble.fit(X_train, y_train)
ensemble_acc = accuracy_score(y_test, ensemble.predict(X_test))

# Output results
print("Model Performance Summary:")
print(f"Random Forest Accuracy:        {rf_acc:.4f}")
print(f"Logistic Regression Accuracy:  {log_acc:.4f}")
print(f"XGBoost Accuracy:              {xgb_acc:.4f}")
print(f"Neural Network Accuracy:       {nn_acc:.4f}")
print(f"Ensemble Voting Accuracy:      {ensemble_acc:.4f}")