### Imports

In [1]:
import os
import pandas as pd
import torch
import json
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import lime.lime_text

### Paths

In [2]:
import sys, os
sys.path.append(os.path.abspath('../../src'))
from helper_functions.path_resolver import DynamicPathResolver

dpr = DynamicPathResolver(marker="README.md")
paths = dpr.structure

Project Root: c:\Users\ilian\Documents\Projects\git_projects\university\phishing_detection


In [6]:
models_folder = dpr.get_folder_path_from_namespace(paths.models.bert)
emails_path = paths.app.static.csv.emails_csv

### Functions

In [7]:
def process_text_columns(df):
    df['subject'] = df['subject'].fillna('[NO_SUBJECT]').astype(str)
    df['body'] = df['body'].fillna('[NO_BODY]').astype(str)
    return df

def clean_text(text):
    text = re.sub(r'\s+', ' ', text).strip()
    substitutions = [
        (r'https?://\S+|www\.\S+', '[URL]'),
        (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', '[EMAIL]'),
        (r'-{2,}', ' '),
        (r'!{2,}', '!'),
        (r'\?{2,}', '?'),
        (r'[_+*]{2,}', ' '),
        (r'[=+]{3,}', ' '),
        (r'[~]{3,}', ' '),
        (r'[#]{3,}', ' '),
        (r'[<]{3,}', ' '),
        (r'[>]{3,}', ' ')
    ]
    for pattern, repl in substitutions:
        text = re.sub(pattern, repl, text)
    return text.strip()

def combine_text_fields(df):
    df['subject'] = df['subject'].apply(clean_text)
    df['body'] = df['body'].apply(clean_text)
    df['text'] = df['subject'] + " [SEP] " + df['body']
    return df

def prepare_bert_data(df):
    df = process_text_columns(df)
    df = combine_text_fields(df)
    return df  # Keep full columns or filter as needed

### Load data

In [8]:
emails_df = pd.read_csv(emails_path)
emails_df = prepare_bert_data(emails_df) 

### Load model

In [9]:
models_checkpoint = os.path.join(models_folder, 'checkpoint-175')

In [10]:
model = AutoModelForSequenceClassification.from_pretrained(models_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(models_checkpoint)
model.eval()

def lime_predict(texts):
    inputs = tokenizer(
        texts,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
    )
    with torch.no_grad():
        probs = torch.nn.functional.softmax(model(**inputs).logits, dim=-1)
    return probs.numpy()

explainer = lime.lime_text.LimeTextExplainer(class_names=["Legit", "Phishing"])

### Explanations

In [11]:
output_data = []

for idx, row in emails_df.iterrows():
    text = row["text"]
    
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
    )
    with torch.no_grad():
        logits = model(**inputs).logits
        probs = torch.nn.functional.softmax(logits, dim=-1)
        confidence, predicted_idx = torch.max(probs, dim=-1)
    
    pred_label = "Phishing" if predicted_idx.item() == 1 else "Legit"
    
    exp = explainer.explain_instance(text, lime_predict, num_features=10)
    lime_html = exp.as_html()
    
    # Append 
    output_data.append({
        "index": idx,
        "original_subject": row.get("subject", ""),
        "original_body": row.get("body", ""),
        "label": int(row["label"]) if "label" in row else None,
        "pred_label": pred_label,
        "confidence": float(confidence.item()),
        "lime_html": lime_html
    })

### Save

In [12]:
static_dir = dpr.get_folder_path_from_namespace(paths.app.static.json)

In [13]:
output_path = os.path.join('lime_explanations.json')
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(output_data, f, ensure_ascii=False, indent=2)

print(f"Saved in {output_path}")

Saved in lime_explanations.json
