In [None]:
import sys
from pathlib import Path

# we're in FYP2/notebooks right now
project_root = Path().resolve().parent  # FYP2
sys.path.append(str(project_root / "src"))

import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)

from behav_model import predict_behav  # now it should import cleanly

# 1. Load the behavioral dataset
df_behav = pd.read_parquet(str(project_root / "data_processed/behav_baseline.parquet"))

# 2. Ground truth label
#    Prediction column = true class
#    1 = WannaCry/malicious, 0 = benign
# Map Prediction column to binary: 1 = WannaCry (A), 0 = others
y_true_behav = df_behav["Prediction"].apply(lambda x: 1 if x == "A" else 0).astype(int)


# 3. Run model inference
proba_behav, pred_behav, thr = predict_behav(df_behav)

# 4. Metrics
acc  = accuracy_score(y_true_behav, pred_behav)
prec = precision_score(y_true_behav, pred_behav, zero_division=0)
rec  = recall_score(y_true_behav, pred_behav, zero_division=0)
f1   = f1_score(y_true_behav, pred_behav, zero_division=0)
auc  = roc_auc_score(y_true_behav, proba_behav)

tn, fp, fn, tp = confusion_matrix(y_true_behav, pred_behav).ravel()
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0  # False Positive Rate

print("=== Behavioral Model (CatBoost) ===")
print(f"Threshold used : {thr:.3f}")
print(f"Accuracy        : {acc:.4f}")
print(f"Precision       : {prec:.4f}")
print(f"Recall (TPR)    : {rec:.4f}")
print(f"F1-score        : {f1:.4f}")
print(f"ROC AUC         : {auc:.4f}")
print(f"False Pos Rate  : {fpr:.4f}")
print("Confusion Matrix [tn fp; fn tp]:")
print([[tn, fp],[fn, tp]])


ValueError: invalid literal for int() with base 10: 'SS'

In [2]:
import pandas as pd
from pathlib import Path

project_root = Path().resolve().parent
df_behav = pd.read_parquet(str(project_root / "data_processed/behav_baseline.parquet"))

print("Unique values in Prediction column:")
print(df_behav["Prediction"].unique()[:50])

print("\nSample counts:")
print(df_behav["Prediction"].value_counts())


Unique values in Prediction column:
['SS' 'A' 'S']

Sample counts:
Prediction
S     66380
A     42561
SS    40102
Name: count, dtype: int64


In [1]:
import sys
from pathlib import Path

project_root = Path().resolve().parent  # FYP2
sys.path.append(str(project_root / "src"))

import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)

from behav_model import predict_behav

# 1. Load behavior dataset
df_behav = pd.read_parquet(str(project_root / "data_processed/behav_baseline.parquet"))

# 2. Make true binary label: 1 = WannaCry ('A'), 0 = others
y_true_behav = df_behav["Prediction"].apply(lambda x: 1 if x == "A" else 0).astype(int)

# 3. Run inference
proba_behav, pred_behav, thr = predict_behav(df_behav)

# 4. Metrics
acc  = accuracy_score(y_true_behav, pred_behav)
prec = precision_score(y_true_behav, pred_behav, zero_division=0)
rec  = recall_score(y_true_behav, pred_behav, zero_division=0)
f1   = f1_score(y_true_behav, pred_behav, zero_division=0)
auc  = roc_auc_score(y_true_behav, proba_behav)

tn, fp, fn, tp = confusion_matrix(y_true_behav, pred_behav).ravel()
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0

print("=== Behavioral Model (CatBoost) ===")
print(f"Threshold used : {thr:.3f}")
print(f"Accuracy        : {acc:.4f}")
print(f"Precision       : {prec:.4f}")
print(f"Recall (TPR)    : {rec:.4f}")
print(f"F1-score        : {f1:.4f}")
print(f"ROC AUC         : {auc:.4f}")
print(f"False Pos Rate  : {fpr:.4f}")
print("Confusion Matrix [tn fp; fn tp]:")
print([[tn, fp],[fn, tp]])


=== Behavioral Model (CatBoost) ===
Threshold used : 0.671
Accuracy        : 0.5593
Precision       : 0.2376
Recall (TPR)    : 0.2458
F1-score        : 0.2416
ROC AUC         : 0.4251
False Pos Rate  : 0.3153
Confusion Matrix [tn fp; fn tp]:
[[np.int64(72903), np.int64(33579)], [np.int64(32098), np.int64(10463)]]


In [1]:
import sys
from pathlib import Path

project_root = Path().resolve().parent  # FYP2
sys.path.append(str(project_root / "src"))

import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)

from static_model import predict_static, load_static_artifacts

# 1. Load static dataset
df_static = pd.read_parquet(str(project_root / "data_processed/static_baseline.parquet"))

# 2. Build ground truth labels
# Benign = 1 -> safe
# We invert it: WannaCry (malicious) = 1, Benign = 0
y_true_static = (df_static["Benign"] == 0).astype(int)

# 3. Run model inference
proba_static = predict_static(df_static)
pred_static = (proba_static >= 0.5).astype(int)

# 4. Metrics
acc  = accuracy_score(y_true_static, pred_static)
prec = precision_score(y_true_static, pred_static, zero_division=0)
rec  = recall_score(y_true_static, pred_static, zero_division=0)
f1   = f1_score(y_true_static, pred_static, zero_division=0)
auc  = roc_auc_score(y_true_static, proba_static)

tn, fp, fn, tp = confusion_matrix(y_true_static, pred_static).ravel()
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0

print("=== Static Model (Random Forest) ===")
print(f"Accuracy        : {acc:.4f}")
print(f"Precision       : {prec:.4f}")
print(f"Recall (TPR)    : {rec:.4f}")
print(f"F1-score        : {f1:.4f}")
print(f"ROC AUC         : {auc:.4f}")
print(f"False Pos Rate  : {fpr:.4f}")
print("Confusion Matrix [tn fp; fn tp]:")
print([[tn, fp],[fn, tp]])


=== Static Model (Random Forest) ===
Accuracy        : 0.1573
Precision       : 0.2351
Recall (TPR)    : 0.2170
F1-score        : 0.2257
ROC AUC         : 0.0673
False Pos Rate  : 0.9205
Confusion Matrix [tn fp; fn tp]:
[[np.int64(2155), np.int64(24963)], [np.int64(27694), np.int64(7673)]]


