In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

file_path = r'C:\Users\mathi\Desktop\accidents_cleaned.csv'
df = pd.read_csv(file_path)

target_column = 'Accident_severity' 
X = df.drop(columns=[target_column]) 
y = df[target_column]  

num_cols = X.select_dtypes(include=['float64', 'int64']).columns
cat_cols = X.select_dtypes(exclude=['float64', 'int64']).columns  

if not X[num_cols].isnull().values.any():
    print("No missing values in numerical columns.")
else:
    imputer_num = SimpleImputer(strategy='mean')
    X[num_cols] = imputer_num.fit_transform(X[num_cols])

if not X[cat_cols].isnull().values.any():
    print("No missing values in categorical columns.")
else:
    imputer_cat = SimpleImputer(strategy='most_frequent')
    X[cat_cols] = imputer_cat.fit_transform(X[cat_cols])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

train_scaled = pd.concat([pd.DataFrame(X_train, columns=X.columns), y_train.reset_index(drop=True)], axis=1)
test_scaled = pd.concat([pd.DataFrame(X_test, columns=X.columns), y_test.reset_index(drop=True)], axis=1)

train_scaled.to_csv('train_scaled.csv', index=False)
test_scaled.to_csv('test_scaled.csv', index=False)

print("Scaling complete! Scaled data saved as 'train_scaled.csv' and 'test_scaled.csv'.")
print("Train scaled head:\n", train_scaled.head())
print("Test scaled head:\n", test_scaled.head())

No missing values in numerical columns.
Scaling complete! Scaled data saved as 'train_scaled.csv' and 'test_scaled.csv'.
Train scaled head:
          Time Day_of_week Age_band_of_driver Drivers_gender  \
192   7:30:00     Tuesday              31-50           Male   
305  16:20:00   Wednesday              31-50           Male   
734  17:30:00     Tuesday              31-50           Male   
706  20:15:00      Monday              31-50           Male   
551   5:46:00      Friday              18-30           Male   

      Educational_level Vehicle_driver_relation Driving_experience  \
192  Junior high school                Employee              1-2yr   
305   Elementary school                   Owner              2-5yr   
734  Junior high school                Employee              2-5yr   
706   Elementary school                Employee              2-5yr   
551   Elementary school                Employee             5-10yr   

         Type_of_vehicle Owner_of_vehicle Service_year_of_v

In [6]:
%pwd

'C:\\Users\\mathi'