In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, RocCurveDisplay
import numpy as np

# 1. Load data
from google.colab import files
uploaded = files.upload()
df = pd.read_csv("traffic_accidents.csv")
print(df.head())

# 2. Select a compact set of useful columns
cols = [
    "crash_hour",
    "weather_condition",
    "lighting_condition",
    "trafficway_type",
    "roadway_surface_cond",
    "most_severe_injury",
]

df = df[cols].dropna()
label = LabelEncoder()

df["target"] = label.fit_transform(df["most_severe_injury"])
df = df.drop(columns=["most_severe_injury"])

# 4. One-hot-encode categoricals
df = pd.get_dummies(df, drop_first=True)

# 5. Scale crash_hour
scaler = MinMaxScaler()
df[["crash_hour"]] = scaler.fit_transform(df[["crash_hour"]])

# 6. Train / Test split
X = df.drop(columns=["target"])
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 7. Model training (Random Forest)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 8. Predictions & evaluation
y_pred = rf.predict(X_test)
cm = confusion_matrix(y_test, y_pred) # This line was missing, defining 'cm'

# 9. Visualisations
## Confusion matrix
fig, ax = plt.subplots()
ax.imshow(cm)
ax.set_xlabel("Predicted"), ax.set_ylabel("Actual")
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]): # Indented this line to be inside the outer loop
        ax.text(
            j, i, cm[i, j], ha="center", va="center")
plt.tight_layout()
plt.savefig("confusion_matrix.png")
plt.close()

# ... (rest of the code) ...

Saving traffic_accidents.csv to traffic_accidents.csv
               crash_date traffic_control_device weather_condition  \
0  07/29/2023 01:00:00 PM         TRAFFIC SIGNAL             CLEAR   
1  08/13/2023 12:11:00 AM         TRAFFIC SIGNAL             CLEAR   
2  12/09/2021 10:30:00 AM         TRAFFIC SIGNAL             CLEAR   
3  08/09/2023 07:55:00 PM         TRAFFIC SIGNAL             CLEAR   
4  08/19/2023 02:55:00 PM         TRAFFIC SIGNAL             CLEAR   

       lighting_condition first_crash_type trafficway_type  \
0                DAYLIGHT          TURNING     NOT DIVIDED   
1  DARKNESS, LIGHTED ROAD          TURNING        FOUR WAY   
2                DAYLIGHT         REAR END  T-INTERSECTION   
3                DAYLIGHT            ANGLE        FOUR WAY   
4                DAYLIGHT         REAR END  T-INTERSECTION   

            alignment roadway_surface_cond road_defect  \
0  STRAIGHT AND LEVEL              UNKNOWN     UNKNOWN   
1  STRAIGHT AND LEVEL               