In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load cleaned data
df = pd.read_csv("cleaned_breaches.csv")

# Encode features
le_sector = LabelEncoder()
df["sector_enc"] = le_sector.fit_transform(df["sector"])
X = df[["sector_enc", "year"]]
y = df["method"]
le_method = LabelEncoder()
y_enc = le_method.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42)

# Train classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Decode predictions
y_pred_original = le_method.inverse_transform(y_pred)
y_test_original = le_method.inverse_transform(y_test)

# Classification report
print(classification_report(y_test_original, y_pred_original))

# Confusion matrix
conf_matrix = confusion_matrix(y_test_original, y_pred_original, labels=le_method.classes_)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt="d", xticklabels=le_method.classes_, yticklabels=le_method.classes_, cmap="Blues")
plt.title("Confusion Matrix - Breach Method Classifier")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()