### Imports

In [None]:
import pandas as pd
import json

In [13]:
import sys, os
sys.path.append(os.path.abspath('../../src'))

from helper_functions.path_resolver import DynamicPathResolver
from helper_functions.preparation import *
from XAI.explain_bert import *

### Paths

In [14]:
dpr = DynamicPathResolver(marker="README.md")

models_folder = dpr.path.models.bert._path
emails_path = dpr.path.app.static.csv.emails_csv
static_dir = dpr.path.app.static.json._path

Project Root: c:\Users\ilian\Documents\Projects\git_projects\university\phishing_detection


### Load data & model

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

##### Load Model

In [16]:
model_name = 'checkpoint-2500'

In [17]:
tokenizer, model = load_model(os.path.join(models_folder, model_name), device)

##### Load Data

In [18]:
emails_df = read_dataset(emails_path)
display(emails_df.head())

Unnamed: 0,subject,body,label
0,Your PayPal account needs verification,"Dear customer, we noticed unusual activity on ...",1
1,Project Update - Deadline Reminder,"Hi Team, just a friendly reminder that the dea...",0
2,Amazon Order Confirmation,Thank you for your purchase! Your order #12345...,0
3,Unusual Login Attempt Detected,We detected a login attempt from an unrecogniz...,1
4,Summer Vacation Plans,"Hey Alex, I hope this email finds you well! Ju...",0


In [19]:
emails_df = prepare_bert_data(emails_df) 
display(emails_df.head())

Unnamed: 0,text,label
0,Your PayPal account needs verification [SEP] D...,1
1,Project Update - Deadline Reminder [SEP] Hi Te...,0
2,Amazon Order Confirmation [SEP] Thank you for ...,0
3,Unusual Login Attempt Detected [SEP] We detect...,1
4,"Summer Vacation Plans [SEP] Hey Alex, I hope t...",0


### Explanations

In [20]:
output_data = []

for idx, row in emails_df.iterrows():
    text = row["text"]
    
    # Predict label & confidence
    pred_label, confidence = predict_label(text, tokenizer, model)

    # Get LIME explanation
    lime_html = explain_prediction(text, tokenizer, model)

    output_data.append({
        "index": idx,
        "original_subject": row.get("subject", ""),
        "original_body": row.get("body", ""),
        "label": int(row["label"]) if "label" in row else None,
        "pred_label": pred_label,
        "confidence": float(confidence),
        "lime_html": lime_html
    })

KeyboardInterrupt: 

### Save

In [None]:
output_path = os.path.join(static_dir, 'lime_explanations.json')
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(output_data, f, ensure_ascii=False, indent=2)

print(f"Saved in {output_path}")