In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
import numpy as np
import pickle 

In [2]:
# ------------------------------
# 1. Load Data
# ------------------------------
data = pd.read_csv("simulation_security_labels_n-1.csv")

# Label encode the status
data["status_binary"] = data["status"].map({"secure": 1, "insecure": 0})

# Drop columns not used as features or explicitly specified
columns_to_drop = [
    "timestamp", "status", "status_binary", 
    "max_line_loading_percent_basecase",
    "min_bus_voltage_pu_basecase",
    "max_bus_voltage_pu_basecase",
    "max_line_loading_percent_contingency",
    "min_bus_voltage_pu_contingency",
    "max_bus_voltage_pu_contingency"
]
features_df = data.drop(columns=columns_to_drop, axis=1, errors="ignore")
target = data["status_binary"]

In [3]:
# ------------------------------
# 2. Train/Test Split
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    features_df, target, 
    test_size=0.2, 
    random_state=42, 
    stratify=target
)

In [5]:
# ------------------------------
# 3. Train the Classifier
# ------------------------------
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [7]:
# ------------------------------
# 4. Evaluate
# ------------------------------
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9378563283922463
Confusion Matrix:
[[800  54]
 [ 55 845]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94       854
           1       0.94      0.94      0.94       900

    accuracy                           0.94      1754
   macro avg       0.94      0.94      0.94      1754
weighted avg       0.94      0.94      0.94      1754



In [8]:
# ------------------------------
# 5. Save Model to Pickle
# ------------------------------
# Save the model and the imputer as separate pickle files
with open("random_forest_model.pkl", "wb") as model_file:
    pickle.dump(clf, model_file)