# Model evaluation

### Import required libraries

In [1]:
from sklearn.metrics import accuracy_score, classification_report
import joblib
import pandas as pd

#### Load test data

In [2]:
# Load trained models
model_primary = joblib.load('accident_model_primary.pkl')
model_secondary = joblib.load('accident_model_secondary.pkl')
model_risk = joblib.load('accident_model_risk.pkl')

# Load encoders
label_encoder_primary = joblib.load('label_encoder_primary.pkl')
label_encoder_secondary = joblib.load('label_encoder_secondary.pkl')
label_encoder_risk = joblib.load('label_encoder_risk.pkl')

# Load vectorizer
vectorizer = joblib.load('vectorizer.pkl')

#Load X_test and y_test
X_test_file=pd.read_csv('X_test.csv')
X_test = X_test_file.to_numpy()  # Convert to NumPy array

y_test_primary=pd.read_csv('y_test_primary.csv')
y_test_secondary=pd.read_csv('y_test_secondary.csv')
y_test_risk=pd.read_csv('y_test_risk.csv')

### Evaluate primary cause model

In [3]:
y_pred_primary = model_primary.predict(X_test) 

print("Primary Cause Model Evaluation:")
print("Accuracy:", accuracy_score(y_test_primary, y_pred_primary))
print(classification_report(y_test_primary, y_pred_primary, target_names=label_encoder_primary.classes_))


Primary Cause Model Evaluation:
Accuracy: 1.0
                        precision    recall  f1-score   support

    Distracted driving       1.00      1.00      1.00       428
         Drunk driving       1.00      1.00      1.00       442
         Over-speeding       1.00      1.00      1.00       451
     Red light jumping       1.00      1.00      1.00       450
       Road conditions       1.00      1.00      1.00       461
Traffic rule violation       1.00      1.00      1.00       459
   Vehicle malfunction       1.00      1.00      1.00       414
    Weather conditions       1.00      1.00      1.00       436
    Wrong side driving       1.00      1.00      1.00       459

              accuracy                           1.00      4000
             macro avg       1.00      1.00      1.00      4000
          weighted avg       1.00      1.00      1.00      4000



### Evaluate Secondary Cause Model

In [4]:
y_pred_secondary = model_secondary.predict(X_test)  # Convert sparse to dense
print("Secondary Cause Model Evaluation:")
print("Accuracy:", accuracy_score(y_test_secondary, y_pred_secondary))
print(classification_report(y_test_secondary, y_pred_secondary, target_names=label_encoder_secondary.classes_))

Secondary Cause Model Evaluation:
Accuracy: 0.72025
                    precision    recall  f1-score   support

  Helmet Violation       0.00      0.00      0.00       287
      No Violation       0.72      1.00      0.84      2881
Seatbelt Violation       0.00      0.00      0.00       832

          accuracy                           0.72      4000
         macro avg       0.24      0.33      0.28      4000
      weighted avg       0.52      0.72      0.60      4000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Evaluate Risk Factor Model

In [5]:
y_pred_risk = model_risk.predict(X_test)

print("\n Risk Factor Model Evaluation:")
print("Accuracy:", accuracy_score(y_test_risk, y_pred_risk))
print(classification_report(y_test_risk, y_pred_risk, target_names=label_encoder_risk.classes_))



 Risk Factor Model Evaluation:
Accuracy: 0.79075
              precision    recall  f1-score   support

        High       0.75      0.98      0.85      2410
      Medium       0.94      0.51      0.66      1590

    accuracy                           0.79      4000
   macro avg       0.84      0.74      0.75      4000
weighted avg       0.82      0.79      0.77      4000

