In [11]:
import pandas as pd  
import kagglehub
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [9]:
path = kagglehub.dataset_download("himelsarder/road-accident-survival-dataset")
print("Path to dataset files:", path)
file_path = f"{path}/accident.csv" 

Path to dataset files: C:\Users\gsocc\.cache\kagglehub\datasets\himelsarder\road-accident-survival-dataset\versions\1


In [17]:
df = pd.read_csv(file_path)

df_cleaned = df.dropna()
df_cleaned.to_csv(f"{path}/cleaned_dataset.csv", index=False)
print(df_cleaned.head())

   Age  Gender  Speed_of_Impact Helmet_Used Seatbelt_Used  Survived
0   56  Female             27.0          No            No         1
1   69  Female             46.0          No           Yes         1
2   46    Male             46.0         Yes           Yes         0
3   32    Male            117.0          No           Yes         0
4   60  Female             40.0         Yes           Yes         0


In [21]:
label_encoders = {}
categorical_columns = ["Gender", "Helmet_Used", "Seatbelt_Used"]

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  

X = df[["Age", "Gender", "Speed_of_Impact", "Helmet_Used", "Seatbelt_Used"]]
y = df["Survived"] 

X = X.dropna()
y = y.loc[X.index]

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [23]:
svm_model = SVC(kernel='rbf', C=1, gamma='scale')
svm_model.fit(X_train_scaled, y_train)

svm_train_pred = svm_model.predict(X_train_scaled)
svm_test_pred = svm_model.predict(X_test_scaled)

svm_train_acc = accuracy_score(y_train, svm_train_pred)
svm_test_acc = accuracy_score(y_test, svm_test_pred)

In [24]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

rf_train_pred = rf_model.predict(X_train)
rf_test_pred = rf_model.predict(X_test)

rf_train_acc = accuracy_score(y_train, rf_train_pred)
rf_test_acc = accuracy_score(y_test, rf_test_pred)

In [25]:
print("\n--- Model Performance ---")
print(f"SVM Training Accuracy: {svm_train_acc:.4f}")
print(f"SVM Test Accuracy: {svm_test_acc:.4f}")
print(f"Random Forest Training Accuracy: {rf_train_acc:.4f}")
print(f"Random Forest Test Accuracy: {rf_test_acc:.4f}")


--- Model Performance ---
SVM Training Accuracy: 0.6752
SVM Test Accuracy: 0.5250
Random Forest Training Accuracy: 1.0000
Random Forest Test Accuracy: 0.5250


In [26]:
feature_importance = pd.Series(rf_model.feature_importances_, index=X.columns).sort_values(ascending=False)
print("\n--- Feature Importance (Random Forest) ---")
print(feature_importance)


--- Feature Importance (Random Forest) ---
Age                0.422309
Speed_of_Impact    0.420726
Seatbelt_Used      0.057226
Gender             0.055250
Helmet_Used        0.044488
dtype: float64


In [27]:
print("\n--- SVM Classification Report ---")
print(classification_report(y_test, svm_test_pred))

print("\n--- Random Forest Classification Report ---")
print(classification_report(y_test, rf_test_pred))


--- SVM Classification Report ---
              precision    recall  f1-score   support

           0       0.67      0.35      0.46        23
           1       0.46      0.76      0.58        17

    accuracy                           0.53        40
   macro avg       0.57      0.56      0.52        40
weighted avg       0.58      0.53      0.51        40


--- Random Forest Classification Report ---
              precision    recall  f1-score   support

           0       0.58      0.61      0.60        23
           1       0.44      0.41      0.42        17

    accuracy                           0.53        40
   macro avg       0.51      0.51      0.51        40
weighted avg       0.52      0.53      0.52        40

