In [122]:
from phisher.xai import GradCAM
from phisher.module import PhisherhModule
from phisher.model import PhisherEmbeddingModel
from phisher.dataset import PhishingEmbeddingDataset

In [123]:
import torch

#### Load model

In [124]:
model_ckpt_path: str = "/Users/bkosinski/Desktop/STUDIA/SEMI/GSN/projekt/checkpoints/embeddings_model.ckpt"

In [125]:
model: PhisherEmbeddingModel = PhisherEmbeddingModel(vocab_size=84, embedding_dim=100, out_features=1)

In [None]:
PhisherhModule.load_from_checkpoint(model_ckpt_path, model=model)

#### Visualize Grad-CAM

In [127]:
def visualize_url_with_cam(url: str, cam: torch.Tensor) -> str:
    cam = cam.squeeze()
    if len(cam.shape) != 1:
        cam = cam.mean(dim=1)
        cam = (cam - cam.min()) / (cam.max() - cam.min())
    colored_url = ""
    for char, intensity in zip(url, cam):
        color_intensity = int(
            255 * (1 - intensity)
        )  # Scale intensity (higher = darker)
        hex_color = f"{color_intensity:02x}ff{color_intensity:02x}"
        colored_url += f'<span style="background-color: #{hex_color};">{char}</span>'

    return colored_url

In [128]:
def parse_url_to_input_tensor(url: str, placeholder_dataset: PhishingEmbeddingDataset, device: str) -> torch.Tensor:
    input_tensor = placeholder_dataset.parse_url(url)
    input_tensor = placeholder_dataset.pad_or_trim(input_tensor)
    input_tensor = torch.tensor(input_tensor, dtype=torch.long)

    input_tensor = input_tensor.to(device)
    input_tensor = input_tensor.reshape(1, -1)
    return input_tensor

In [129]:
device = torch.device("mps")
model = model.to(device)

In [130]:
grad_cam = GradCAM(model, target_layer=model.conv2)

In [131]:
placeholder_csv_file_path: str = "/Users/bkosinski/Desktop/STUDIA/SEMI/GSN/projekt/phish_dataset.csv"
placeholder_dataset = PhishingEmbeddingDataset(csv_file_path=placeholder_csv_file_path)

In [132]:
target_url: str = "apple-search.world" # this is a phishing URL, see -> https://hole.cert.pl/domains/v2/domains.txt

In [134]:
input_tensor = parse_url_to_input_tensor(target_url, placeholder_dataset, device)
cam = grad_cam.generate_cam(input_tensor, target_class=0)

In [135]:
visualized_url: str = visualize_url_with_cam(url=target_url, cam=cam)

In [136]:
from IPython.core.display import display, HTML
display(HTML(f"<div style='color: black'>{visualized_url}</div>"))

  from IPython.core.display import display, HTML
