In [1]:
import pandas as pd
import pickle
import os
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [2]:
import json

# Path to your IF results
res_path = "/kaggle/input/isolation-forest-results-and-testing-datasets/blockchain_anomaly_results_encoded.json"

# Try reading as a single JSON array
try:
    with open(res_path, "r") as f:
        data = json.load(f)
    res_df = pd.DataFrame(data)
    print(f"Loaded {len(res_df)} records from JSON array.")
except ValueError:
    # Fallback to line-by-line (NDJSON)
    print("JSON array load failed; falling back to line-by-line parsing.")
    records = []
    with open(res_path, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            records.append(json.loads(line))
    res_df = pd.DataFrame(records)
    print(f"Loaded {len(res_df)} records from NDJSON.")

# Now do the same for your mock transactions if needed:
test_path = "/kaggle/input/isolation-forest-results-and-testing-datasets/mock_blockchain_transactions_large.json"

# Assuming mock file *is* an array of objects, you can just do:
with open(test_path, "r") as f:
    mock_data = json.load(f)
test_df = pd.DataFrame(mock_data)
print(f"Loaded {len(test_df)} mock transactions.")

# …and then proceed with the merge, filter, feature extraction, and predictions as before.


Loaded 945 records from JSON array.
Loaded 945 mock transactions.


In [3]:
# 2. Merge on tx_hash to align IF prediction with ground truth
merged = pd.merge(
    res_df, test_df,
    left_on="tx_hash", right_on="tx_hash",
    suffixes=("_if", "")
)

In [4]:
# 3. Merge and filter for all IF-“authentic” predictions
merged = pd.merge(res_df, test_df, on="tx_hash", suffixes=("_if", ""))
auth_df = merged[merged["anomaly"] == "authentic"].copy()
print(f"Running on {len(auth_df)} IF-authentic transactions")

Running on 850 IF-authentic transactions


In [5]:
# 4. Build the feature DataFrame
auth_df["TimeStamp"] = pd.to_datetime(auth_df["timestamp"], unit="s")
for fld in ["hour","day","month","year"]:
    auth_df[fld] = getattr(auth_df["TimeStamp"].dt, fld)

X_auth = auth_df.rename(columns={
    "block_number": "BlockHeight",
    "value":        "Value",
    "from":         "From",
    "to":           "To"
})[[
    "BlockHeight","Value",
    "hour","day","month","year",
    "From","To"
]]

In [6]:
# 5. Load your saved pipelines from the Kaggle input dir
model_dir = "/kaggle/input/comp517_models/scikitlearn/default/1"
pipelines = {}
for fname in os.listdir(model_dir):
    if fname.startswith("pipeline_") and fname.endswith(".pkl"):
        name = fname.replace("pipeline_","").replace(".pkl","").replace("_","+")
        with open(os.path.join(model_dir, fname),"rb") as f:
            pipelines[name] = pickle.load(f)

print("Models to test:", list(pipelines.keys()))

Models to test: ['RF+XGB', 'MLP', 'RF+MLP', 'XGB', 'RF+MLP+XGB', 'MLP+XGB', 'RF']


In [7]:
# 6. Predict, evaluate, and summarize with correctness
# Ground‐truth: 0 if type=="authentic", else 1
ground_truth = (auth_df["type"] != "authentic").astype(int).values

summary = []
for name, pipe in pipelines.items():
    preds = pipe.predict(X_auth)
    
    # counts
    valid_count = int((preds == 0).sum())
    error_count = int((preds == 1).sum())
    
    # correctness
    correct = int((preds == ground_truth).sum())
    accuracy = correct / len(preds)
    
    # print full classification report
    print(f"\n=== {name} ===")
    print(classification_report(ground_truth, preds, target_names=["Valid(0)","Error(1)"]))
    print(f"Accuracy: {accuracy:.2%} ({correct}/{len(preds)})")
    
    summary.append({
        "Model":     name,
        "Valid (0)": valid_count,
        "Error (1)": error_count,
        "Correct":   correct,
        "Total":     len(preds),
        "Accuracy":  accuracy
    })


=== RF+XGB ===
              precision    recall  f1-score   support

    Valid(0)       0.70      1.00      0.83       599
    Error(1)       0.00      0.00      0.00       251

    accuracy                           0.70       850
   macro avg       0.35      0.50      0.41       850
weighted avg       0.50      0.70      0.58       850

Accuracy: 70.47% (599/850)

=== MLP ===
              precision    recall  f1-score   support

    Valid(0)       0.00      0.00      0.00       599
    Error(1)       0.30      1.00      0.46       251

    accuracy                           0.30       850
   macro avg       0.15      0.50      0.23       850
weighted avg       0.09      0.30      0.13       850

Accuracy: 29.53% (251/850)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== RF+MLP ===
              precision    recall  f1-score   support

    Valid(0)       0.00      0.00      0.00       599
    Error(1)       0.30      1.00      0.46       251

    accuracy                           0.30       850
   macro avg       0.15      0.50      0.23       850
weighted avg       0.09      0.30      0.13       850

Accuracy: 29.53% (251/850)

=== XGB ===
              precision    recall  f1-score   support

    Valid(0)       0.70      1.00      0.83       599
    Error(1)       0.00      0.00      0.00       251

    accuracy                           0.70       850
   macro avg       0.35      0.50      0.41       850
weighted avg       0.50      0.70      0.58       850

Accuracy: 70.47% (599/850)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== RF+MLP+XGB ===
              precision    recall  f1-score   support

    Valid(0)       0.70      1.00      0.83       599
    Error(1)       0.00      0.00      0.00       251

    accuracy                           0.70       850
   macro avg       0.35      0.50      0.41       850
weighted avg       0.50      0.70      0.58       850

Accuracy: 70.47% (599/850)

=== MLP+XGB ===
              precision    recall  f1-score   support

    Valid(0)       0.69      0.92      0.79       599
    Error(1)       0.02      0.00      0.01       251

    accuracy                           0.65       850
   macro avg       0.35      0.46      0.40       850
weighted avg       0.49      0.65      0.56       850

Accuracy: 65.06% (553/850)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== RF ===
              precision    recall  f1-score   support

    Valid(0)       0.70      1.00      0.83       599
    Error(1)       0.00      0.00      0.00       251

    accuracy                           0.70       850
   macro avg       0.35      0.50      0.41       850
weighted avg       0.50      0.70      0.58       850

Accuracy: 70.47% (599/850)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
# 7. Tabulate & save summary
summary_df = pd.DataFrame(summary)
print("\nOverall Summary:")
print(summary_df)

summary_df.to_csv("authentic_prediction_summary_with_accuracy.csv", index=False)
print("\nSaved detailed summary to authentic_prediction_summary_with_accuracy.csv")


Overall Summary:
        Model  Valid (0)  Error (1)  Correct  Total  Accuracy
0      RF+XGB        850          0      599    850  0.704706
1         MLP          0        850      251    850  0.295294
2      RF+MLP          0        850      251    850  0.295294
3         XGB        850          0      599    850  0.704706
4  RF+MLP+XGB        850          0      599    850  0.704706
5     MLP+XGB        802         48      553    850  0.650588
6          RF        850          0      599    850  0.704706

Saved detailed summary to authentic_prediction_summary_with_accuracy.csv


In [9]:
# — After merging res_df and test_df into `merged` —

# 1. Build binary labels:
#    0 = authentic; 1 = anomaly
y_true = (merged["type"]    != "authentic").astype(int).values
y_if   = (merged["anomaly"] != "authentic").astype(int).values

# 2. Print overall accuracy
total   = len(y_true)
correct = (y_true == y_if).sum()
print(f"Isolation Forest Accuracy: {correct}/{total} = {correct/total:.2%}\n")

# 3. Confusion matrix
cm = confusion_matrix(y_true, y_if)
print("Confusion Matrix (rows=truth, cols=IF prediction):")
print(pd.DataFrame(
    cm,
    index=["True Auth (0)", "True Anom (1)"],
    columns=["Pred Auth (0)", "Pred Anom (1)"]
), "\n")

# 4. Detailed classification report
print("Classification Report for Isolation Forest:")
print(classification_report(y_true, y_if, target_names=["Authentic (0)", "Anomaly (1)"]))

Isolation Forest Accuracy: 694/945 = 73.44%

Confusion Matrix (rows=truth, cols=IF prediction):
               Pred Auth (0)  Pred Anom (1)
True Auth (0)            599              0
True Anom (1)            251             95 

Classification Report for Isolation Forest:
               precision    recall  f1-score   support

Authentic (0)       0.70      1.00      0.83       599
  Anomaly (1)       1.00      0.27      0.43       346

     accuracy                           0.73       945
    macro avg       0.85      0.64      0.63       945
 weighted avg       0.81      0.73      0.68       945



In [10]:

# — assume you have already merged res_df and test_df into `merged` —

# 1. Filter to the IF-“authentic” subset
auth_if = merged[merged["anomaly"] == "authentic"].copy()
print(f"Total IF-authentic predictions: {len(auth_if)}")

# 2. Build binary labels:
#    y_true: 0 if truly authentic, 1 if actually an anomaly
y_true_sub = (auth_if["type"] != "authentic").astype(int).values
#    y_pred: 0 for IF-authentic (since anomaly=="authentic"), 1 otherwise
y_pred_sub = (auth_if["anomaly"] != "authentic").astype(int).values  # will be all zeros

# 3. Confusion matrix (rows=true, cols=pred)
cm = confusion_matrix(y_true_sub, y_pred_sub, labels=[0,1])
cm_df = pd.DataFrame(cm,
                     index=["True Authentic (0)","True Anomaly (1)"],
                     columns=["Pred Authentic (0)","Pred Anomaly (1)"])
print("\nConfusion Matrix on IF-authentic subset:")
print(cm_df)

# 4. Accuracy on that subset
accuracy = (y_true_sub == y_pred_sub).mean()
print(f"\nAccuracy on IF-authentic subset: {accuracy:.2%} ({(y_true_sub==y_pred_sub).sum()}/{len(y_true_sub)})")

# 5. Classification report
print("\nClassification Report (treating IF-authentic as pred=0):")
print(classification_report(y_true_sub, y_pred_sub,
      target_names=["Authentic (true=0)","Anomaly (true=1)"]))

Total IF-authentic predictions: 850

Confusion Matrix on IF-authentic subset:
                    Pred Authentic (0)  Pred Anomaly (1)
True Authentic (0)                 599                 0
True Anomaly (1)                   251                 0

Accuracy on IF-authentic subset: 70.47% (599/850)

Classification Report (treating IF-authentic as pred=0):
                    precision    recall  f1-score   support

Authentic (true=0)       0.70      1.00      0.83       599
  Anomaly (true=1)       0.00      0.00      0.00       251

          accuracy                           0.70       850
         macro avg       0.35      0.50      0.41       850
      weighted avg       0.50      0.70      0.58       850



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
