## Import library files

In [1]:
import pandas as pd
import numpy as np
# import scipy
from scipy.sparse import csr_matrix  # For reconstructing the sparse matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import joblib

## Load data

In [2]:
X_loaded = np.load("X_sparse.npz")
# from scipy.sparse import csr_matrix
X = csr_matrix((X_loaded['data'], X_loaded['indices'], X_loaded['indptr']), shape=X_loaded['shape']) # Reconstructing X from the compressed file
y_primary= pd.read_csv("y_primary.csv")
y_secondary= pd.read_csv("y_secondary.csv")
y_risk= pd.read_csv("y_risk.csv")


# Training Supervised Learning Model

In [3]:
# Split data
X_train, X_test, y_train_primary, y_test_primary = train_test_split(X, y_primary, test_size=0.2, random_state=42)
_, _, y_train_secondary, y_test_secondary = train_test_split(X, y_secondary, test_size=0.2, random_state=42)
_, _, y_train_risk, y_test_risk = train_test_split(X, y_risk, test_size=0.2, random_state=42)

# Convert sparse matrix to dense, then to DataFrame
X_test_df = pd.DataFrame(X_test.toarray())  # Convert CSR matrix to dense before saving
X_test_df.to_csv('X_test.csv', index=False)

# Save target test datasets
y_test_primary.to_csv('y_test_primary.csv', index=False)
y_test_secondary.to_csv('y_test_secondary.csv', index=False)
y_test_risk.to_csv('y_test_risk.csv', index=False)

# Convert target variables to NumPy arrays before fitting models
y_train_primary = y_train_primary.values.ravel()
y_train_secondary = y_train_secondary.values.ravel()
y_train_risk = y_train_risk.values.ravel()

# Train models
model_primary = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model_primary.fit(X_train, y_train_primary)

model_secondary = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model_secondary.fit(X_train, y_train_secondary)

model_risk = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model_risk.fit(X_train, y_train_risk)

# Save models
joblib.dump(model_primary, 'accident_model_primary.pkl')
joblib.dump(model_secondary, 'accident_model_secondary.pkl')
joblib.dump(model_risk, 'accident_model_risk.pkl')

print("All models trained and saved successfully!")


All models trained and saved successfully!
