In [6]:
import pandas as pd

# Convert Time column back to datetime
df["Time"] = pd.to_datetime(df["Time"], errors="coerce")

# Create Hour column
df["Hour"] = df["Time"].dt.hour

df["Hour"].head()


0    15.0
1    10.0
2    14.0
3     8.0
4    17.0
Name: Hour, dtype: float64

In [2]:
df["Severity_Label"] = df["Accident_Severity"].astype("category").cat.codes

df[["Accident_Severity", "Severity_Label"]].head() #converts text labels into numbers


Unnamed: 0,Accident_Severity,Severity_Label
0,Serious,2
1,Serious,2
2,Slight,3
3,Serious,2
4,Serious,2


In [3]:
# Create accident intensity feature
df["Casualty_per_Vehicle"] = df["Number_of_Casualties"] / df["Number_of_Vehicles"]

# Handle divide by zero (if any)
df["Casualty_per_Vehicle"] = df["Casualty_per_Vehicle"].fillna(0)

df[["Number_of_Casualties", "Number_of_Vehicles", "Casualty_per_Vehicle"]].head()


Unnamed: 0,Number_of_Casualties,Number_of_Vehicles,Casualty_per_Vehicle
0,1,2,0.5
1,11,2,5.5
2,1,2,0.5
3,1,2,0.5
4,1,2,0.5


In [7]:
final_features = [
    "Latitude",
    "Longitude",
    "Speed_limit",
    "Hour",
    "Month",
    "Day",
    "Number_of_Vehicles",
    "Casualty_per_Vehicle",
    "Severity_Label"
]

final_df = df[final_features]

final_df.head()


Unnamed: 0,Latitude,Longitude,Speed_limit,Hour,Month,Day,Number_of_Vehicles,Casualty_per_Vehicle,Severity_Label
0,51.512273,-0.201349,30,15.0,1,1,2,0.5,2
1,51.514399,-0.199248,30,10.0,1,5,2,5.5,2
2,51.486668,-0.179599,30,14.0,1,4,2,0.5,3
3,51.507804,-0.20311,30,8.0,1,5,2,0.5,2
4,51.482076,-0.173445,30,17.0,1,6,2,0.5,2


In [8]:
df["Severity_Label"].value_counts()


Severity_Label
3    263280
2     40740
0      3904
1        49
Name: count, dtype: int64

In [9]:
df["Accident_Severity"].value_counts()


Accident_Severity
Slight     263280
Serious     40740
Fatal        3904
Fetal          49
Name: count, dtype: int64

In [10]:
# Fix typo: Fetal -> Fatal
df["Accident_Severity"] = df["Accident_Severity"].replace("Fetal", "Fatal")


In [11]:
df["Severity_Label"] = df["Accident_Severity"].astype("category").cat.codes


In [12]:
df["Accident_Severity"].value_counts()


Accident_Severity
Slight     263280
Serious     40740
Fatal        3953
Name: count, dtype: int64

In [13]:
df["Severity_Label"].value_counts()


Severity_Label
2    263280
1     40740
0      3953
Name: count, dtype: int64

In [14]:
final_df = df[final_features]
final_df.head()

Unnamed: 0,Latitude,Longitude,Speed_limit,Hour,Month,Day,Number_of_Vehicles,Casualty_per_Vehicle,Severity_Label
0,51.512273,-0.201349,30,15.0,1,1,2,0.5,1
1,51.514399,-0.199248,30,10.0,1,5,2,5.5,1
2,51.486668,-0.179599,30,14.0,1,4,2,0.5,2
3,51.507804,-0.20311,30,8.0,1,5,2,0.5,1
4,51.482076,-0.173445,30,17.0,1,6,2,0.5,1


In [15]:
processed_path = "../data/processed_accident_data.csv"
final_df.to_csv(processed_path, index=False)

print("✅ Final processed dataset saved successfully!")


✅ Final processed dataset saved successfully!
