# XSS Cookie Lab – API Experiments with Full Dataset

This notebook sends all payloads from:

`notebooks/data/data_processed/xss_full_clean_with_families.csv`

to the Flask endpoint:

`/api/test_payload`

and stores the response for each payload in:

`notebooks/data/data_processed/xss_full_with_api_results.csv`.

The goal is to evaluate how the simple context-based mitigation behaves across:
- benign vs XSS payloads,
- different payload families,
- different security modes (`off`, `log`, `block`).


In [1]:
from __future__ import annotations

import json
import sys
from pathlib import Path
from typing import Any, Dict, Optional

import pandas as pd
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
NOTEBOOK_DIR = Path.cwd().resolve()
BASE_DIR = NOTEBOOK_DIR.parent  # .../xss-cookie
print(f"[OK] Using BASE_DIR={BASE_DIR}")

SRC_DIR = BASE_DIR / "src"
if not (SRC_DIR / "app" / "main.py").exists():
    raise RuntimeError(f"SRC_DIR seems wrong: {SRC_DIR}")
print(f"[OK] Using SRC_DIR={SRC_DIR}")

if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

from app import create_app, security  # type: ignore

DATA_PROCESSED_DIR = BASE_DIR / "notebooks" / "data" / "data_processed"
RESULTS_DIR = BASE_DIR / "notebooks" / "results"

TEXT_COLUMN = "Sentence_clean"
LABEL_COLUMN = "Label"
ROW_LIMIT: Optional[int] = None  # e.g. 1000 para debug rápido

[OK] Using BASE_DIR=D:\Archivos de Usuario\Documents\xss-cookie
[OK] Using SRC_DIR=D:\Archivos de Usuario\Documents\xss-cookie\src


In [3]:
DATASETS: Dict[str, Dict[str, Any]] = {
    "full": {
        "description": "Full combined dataset (GitHub + Kaggle)",
        "path": DATA_PROCESSED_DIR / "xss_full_clean_with_families.csv",
    },
    "github": {
        "description": "GitHub-only dataset",
        "path": DATA_PROCESSED_DIR / "xss_github_clean_with_families.csv",
    },
    "kaggle": {
        "description": "Kaggle-only dataset",
        "path": DATA_PROCESSED_DIR / "xss_kaggle_clean_with_families.csv",
    },
}

## Load the processed XSS dataset

We load `xss_full_clean_with_families.csv` and check that it contains:

- `Sentence_clean` → the payload we will send to the API
- `Label` → ground truth (1 = XSS, 0 = benign)

We will keep all other columns (families, features, source, etc.) for analysis.


In [4]:
def load_dataset(path: Path, row_limit: Optional[int] = None) -> pd.DataFrame:
    """Carga un dataset CSV y valida columnas básicas."""
    if not path.exists():
        raise FileNotFoundError(f"Dataset not found: {path}")

    df = pd.read_csv(path)

    if TEXT_COLUMN not in df.columns:
        raise ValueError(f"Text column '{TEXT_COLUMN}' not found in dataset {path}.")
    if LABEL_COLUMN not in df.columns:
        raise ValueError(f"Label column '{LABEL_COLUMN}' not found in dataset {path}.")

    if row_limit is not None:
        df = df.head(row_limit).copy()
        print(f"[INFO] Using only first {row_limit} rows from {path.name}.")

    return df.copy()


def evaluate_blocking(df_mode: pd.DataFrame, label_col: str = LABEL_COLUMN) -> None:
    """Imprime la matriz de confusión y métricas básicas de bloqueo."""
    df_eval = df_mode.copy()

    df_eval["api_blocked_bool"] = df_eval["api_blocked"].astype(bool)

    y_true = df_eval[label_col].astype(int)
    y_pred = df_eval["api_blocked_bool"].astype(int)

    tp = ((y_true == 1) & (y_pred == 1)).sum()
    fn = ((y_true == 1) & (y_pred == 0)).sum()
    fp = ((y_true == 0) & (y_pred == 1)).sum()
    tn = ((y_true == 0) & (y_pred == 0)).sum()

    print("TP (XSS blocked)       :", tp)
    print("FN (XSS not blocked)   :", fn)
    print("FP (benign blocked)    :", fp)
    print("TN (benign not blocked):", tn)

    tpr = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    fnr = fn / (tp + fn) if (tp + fn) > 0 else 0.0
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
    tnr = tn / (fp + tn) if (fp + tn) > 0 else 0.0

    print("TPR (Recall for XSS)         :", f"{tpr:.4f}")
    print("FNR (Miss rate for XSS)      :", f"{fnr:.4f}")
    print("FPR (False positive rate)    :", f"{fpr:.4f}")
    print("TNR (Specificity for benign) :", f"{tnr:.4f}")

## Run experiment: send all payloads to `/api/test_payload`

For each row:

1. Take `Sentence_clean` as the payload.
2. Send it to the Flask API.
3. Store the API response in new columns:
   - `api_original`
   - `api_sanitized`
   - `api_blocked`
   - `api_category`
   - `api_mode`
   - `api_error`
   - `api_error_message`
   - `api_raw_json`

At the end, we save everything into `xss_full_with_api_results.csv`.


In [5]:
# 4. Experimento in-process usando security.analyze_input / secure_output

from flask import Flask  # solo para type hints

flask_app: Flask = create_app()


def run_experiment_inprocess(df: pd.DataFrame, mode: str) -> pd.DataFrame:
    """
    Ejecuta el mitigador simple sobre todo el DataFrame en un modo dado.

    Crea las columnas:
    - api_original
    - api_sanitized
    - api_blocked
    - api_category
    - api_mode
    - api_error
    - api_error_message
    - api_raw_json
    """
    df = df.copy()

    flask_app.config["SECURITY_MODE"] = mode
    print(f"[INFO] Running in-process experiment with mode={mode}")

    df["api_original"] = df[TEXT_COLUMN].astype(str)
    df["api_sanitized"] = ""
    df["api_blocked"] = False
    df["api_category"] = ""
    df["api_mode"] = mode
    df["api_error"] = None
    df["api_error_message"] = None
    df["api_raw_json"] = None

    with flask_app.app_context():
        for idx in tqdm(df.index, desc=f"Testing payloads (mode={mode})"):
            payload = df.at[idx, TEXT_COLUMN]

            analysis: Dict[str, Any] = security.analyze_input(payload, context="html")
            sanitized = security.secure_output(payload, context="html")
            is_suspicious = bool(analysis.get("is_suspicious"))

            categories = analysis.get("categories", [])
            main_category = analysis.get("main_category")
            if not main_category:
                # fallback simple
                main_category = "benign" if not is_suspicious else "unknown"

            blocked = bool(mode == "block" and is_suspicious)

            df.at[idx, "api_sanitized"] = sanitized
            df.at[idx, "api_blocked"] = blocked
            df.at[idx, "api_category"] = main_category

            api_obj = {
                "original": payload,
                "sanitized": sanitized,
                "blocked": blocked,
                "category": main_category,
                "categories": categories,
                "mode": mode,
                "is_suspicious": is_suspicious,
                "context": "html",
                "reasons": analysis.get("reasons", []),
                "matches": analysis.get("matches", []),
            }
            df.at[idx, "api_raw_json"] = json.dumps(api_obj, ensure_ascii=False)

    return df

In [6]:
# 5. Bucle principal: datasets x modos

MODES = ["off", "log", "block"]

# Estructura:
# all_results[dataset_key][mode] = DataFrame resultante
all_results: Dict[str, Dict[str, pd.DataFrame]] = {}

for dataset_key, cfg in DATASETS.items():
    path: Path = cfg["path"]
    desc: str = cfg.get("description", dataset_key)

    print("\n" + "=" * 70)
    print(f"[DATASET] {dataset_key} :: {desc}")
    print(f"[INFO] Loading dataset from: {path}")

    df_base = load_dataset(path, ROW_LIMIT)
    print(f"[INFO] Dataset shape: {df_base.shape}")

    all_results[dataset_key] = {}

    # Nombre base de salida: xss_full / xss_github / xss_kaggle
    base_name = path.stem
    if base_name.endswith("_clean_with_families"):
        base_name = base_name.replace("_clean_with_families", "")

    for mode in MODES:
        print(f"\n[RUN] Dataset={dataset_key} | Mode={mode}")
        df_mode = run_experiment_inprocess(df_base, mode)
        all_results[dataset_key][mode] = df_mode

        # Carpeta de resultados organizada por dataset y modo
        dataset_results_dir = RESULTS_DIR / dataset_key / mode
        dataset_results_dir.mkdir(parents=True, exist_ok=True)

        output_filename = f"{base_name}_with_api_results.csv"
        output_path = dataset_results_dir / output_filename

        df_mode.to_csv(output_path, index=False, encoding="utf-8")
        print(f"[INFO] Saved results for dataset={dataset_key} mode={mode} to:")
        print(f"       {output_path}")
        print(f"[INFO] Result shape: {df_mode.shape}")

print("\n[INFO] Finished all datasets and modes.")


[DATASET] full :: Full combined dataset (GitHub + Kaggle)
[INFO] Loading dataset from: D:\Archivos de Usuario\Documents\xss-cookie\notebooks\data\data_processed\xss_full_clean_with_families.csv
[INFO] Dataset shape: (15351, 10)

[RUN] Dataset=full | Mode=off
[INFO] Running in-process experiment with mode=off


Testing payloads (mode=off): 100%|██████████| 15351/15351 [00:02<00:00, 5346.50it/s]


[INFO] Saved results for dataset=full mode=off to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\full\off\xss_full_with_api_results.csv
[INFO] Result shape: (15351, 18)

[RUN] Dataset=full | Mode=log
[INFO] Running in-process experiment with mode=log


Testing payloads (mode=log): 100%|██████████| 15351/15351 [00:31<00:00, 485.96it/s]


[INFO] Saved results for dataset=full mode=log to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\full\log\xss_full_with_api_results.csv
[INFO] Result shape: (15351, 18)

[RUN] Dataset=full | Mode=block
[INFO] Running in-process experiment with mode=block


Testing payloads (mode=block): 100%|██████████| 15351/15351 [00:04<00:00, 3378.74it/s]


[INFO] Saved results for dataset=full mode=block to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\full\block\xss_full_with_api_results.csv
[INFO] Result shape: (15351, 18)

[DATASET] github :: GitHub-only dataset
[INFO] Loading dataset from: D:\Archivos de Usuario\Documents\xss-cookie\notebooks\data\data_processed\xss_github_clean_with_families.csv
[INFO] Dataset shape: (4516, 10)

[RUN] Dataset=github | Mode=off
[INFO] Running in-process experiment with mode=off


Testing payloads (mode=off): 100%|██████████| 4516/4516 [00:01<00:00, 4052.02it/s]


[INFO] Saved results for dataset=github mode=off to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\github\off\xss_github_with_api_results.csv
[INFO] Result shape: (4516, 18)

[RUN] Dataset=github | Mode=log
[INFO] Running in-process experiment with mode=log


Testing payloads (mode=log): 100%|██████████| 4516/4516 [00:11<00:00, 385.81it/s]


[INFO] Saved results for dataset=github mode=log to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\github\log\xss_github_with_api_results.csv
[INFO] Result shape: (4516, 18)

[RUN] Dataset=github | Mode=block
[INFO] Running in-process experiment with mode=block


Testing payloads (mode=block): 100%|██████████| 4516/4516 [00:01<00:00, 2632.65it/s]


[INFO] Saved results for dataset=github mode=block to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\github\block\xss_github_with_api_results.csv
[INFO] Result shape: (4516, 18)

[DATASET] kaggle :: Kaggle-only dataset
[INFO] Loading dataset from: D:\Archivos de Usuario\Documents\xss-cookie\notebooks\data\data_processed\xss_kaggle_clean_with_families.csv
[INFO] Dataset shape: (10835, 10)

[RUN] Dataset=kaggle | Mode=off
[INFO] Running in-process experiment with mode=off


Testing payloads (mode=off): 100%|██████████| 10835/10835 [00:03<00:00, 3429.07it/s]


[INFO] Saved results for dataset=kaggle mode=off to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\kaggle\off\xss_kaggle_with_api_results.csv
[INFO] Result shape: (10835, 18)

[RUN] Dataset=kaggle | Mode=log
[INFO] Running in-process experiment with mode=log


Testing payloads (mode=log): 100%|██████████| 10835/10835 [00:37<00:00, 287.04it/s]


[INFO] Saved results for dataset=kaggle mode=log to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\kaggle\log\xss_kaggle_with_api_results.csv
[INFO] Result shape: (10835, 18)

[RUN] Dataset=kaggle | Mode=block
[INFO] Running in-process experiment with mode=block


Testing payloads (mode=block): 100%|██████████| 10835/10835 [00:03<00:00, 3192.75it/s]


[INFO] Saved results for dataset=kaggle mode=block to:
       D:\Archivos de Usuario\Documents\xss-cookie\notebooks\results\kaggle\block\xss_kaggle_with_api_results.csv
[INFO] Result shape: (10835, 18)

[INFO] Finished all datasets and modes.


## Quick evaluation: how does the filter behave?

We can use:

- `Label` as ground truth (1 = XSS, 0 = benign),
- `api_blocked` as the decision of the mitigation layer.

This gives us a simple confusion matrix:

- TP: XSS correctly blocked
- FN: XSS not blocked
- FP: Benign blocked (false positive)
- TN: Benign not blocked


In [7]:
# 6. Evaluación cuantitativa (confusion matrix) por dataset y modo

for dataset_key, modes_dict in all_results.items():
    print("\n" + "=" * 70)
    print(f"=== Evaluation for dataset: {dataset_key} ===")
    for mode, df_mode in modes_dict.items():
        print("\n--- Mode:", mode, "---")
        evaluate_blocking(df_mode)


=== Evaluation for dataset: full ===

--- Mode: off ---
TP (XSS blocked)       : 0
FN (XSS not blocked)   : 11703
FP (benign blocked)    : 0
TN (benign not blocked): 3648
TPR (Recall for XSS)         : 0.0000
FNR (Miss rate for XSS)      : 1.0000
FPR (False positive rate)    : 0.0000
TNR (Specificity for benign) : 1.0000

--- Mode: log ---
TP (XSS blocked)       : 0
FN (XSS not blocked)   : 11703
FP (benign blocked)    : 0
TN (benign not blocked): 3648
TPR (Recall for XSS)         : 0.0000
FNR (Miss rate for XSS)      : 1.0000
FPR (False positive rate)    : 0.0000
TNR (Specificity for benign) : 1.0000

--- Mode: block ---
TP (XSS blocked)       : 11404
FN (XSS not blocked)   : 299
FP (benign blocked)    : 148
TN (benign not blocked): 3500
TPR (Recall for XSS)         : 0.9745
FNR (Miss rate for XSS)      : 0.0255
FPR (False positive rate)    : 0.0406
TNR (Specificity for benign) : 0.9594

=== Evaluation for dataset: github ===

--- Mode: off ---
TP (XSS blocked)       : 0
FN (XSS not 

In [8]:
# 7. Vista rápida de ejemplos

for dataset_key, modes_dict in all_results.items():
    if "block" in modes_dict:
        print("\n" + "-" * 70)
        print(f"[HEAD] Dataset={dataset_key} | Mode=block")
        display(modes_dict["block"].head())


----------------------------------------------------------------------
[HEAD] Dataset=full | Mode=block


Unnamed: 0,Sentence_clean,Label,families_str,len_after_clean,source,family_main,has_script_tag,has_event,has_js_uri,has_iframe,api_original,api_sanitized,api_blocked,api_category,api_mode,api_error,api_error_message,api_raw_json
0,"<li><a href=""/wiki/File:Socrates.png"" class=""i...",0,image_tag,557,kaggle,benign,False,False,False,False,"<li><a href=""/wiki/File:Socrates.png"" class=""i...",&lt;li&gt;&lt;a href=&quot;/wiki/File:Socrates...,False,benign,block,,,"{""original"": ""<li><a href=\""/wiki/File:Socrate..."
1,"<tt onmouseover=""alert(1)"">test</tt>",1,event_handler,36,kaggle,event,False,True,False,False,"<tt onmouseover=""alert(1)"">test</tt>",[blocked by simple context-based filter],True,event,block,,,"{""original"": ""<tt onmouseover=\""alert(1)\"">tes..."
2,"</span> <span class=""reference-text"">Steering ...",0,other,230,kaggle,benign,False,False,False,False,"</span> <span class=""reference-text"">Steering ...",&lt;/span&gt; &lt;span class=&quot;reference-t...,False,benign,block,,,"{""original"": ""</span> <span class=\""reference-..."
3,"</span> <span class=""reference-text""><cite cla...",0,maybe_polyglot,392,kaggle,benign,False,False,False,False,"</span> <span class=""reference-text""><cite cla...",&lt;/span&gt; &lt;span class=&quot;reference-t...,False,benign,block,,,"{""original"": ""</span> <span class=\""reference-..."
4,"</span>. <a href=""/wiki/Digital_object_identif...",0,other,419,kaggle,benign,False,False,False,False,"</span>. <a href=""/wiki/Digital_object_identif...",&lt;/span&gt;. &lt;a href=&quot;/wiki/Digital_...,False,benign,block,,,"{""original"": ""</span>. <a href=\""/wiki/Digital..."



----------------------------------------------------------------------
[HEAD] Dataset=github | Mode=block


Unnamed: 0,Sentence_clean,Label,families_str,len_after_clean,source,family_main,has_script_tag,has_event,has_js_uri,has_iframe,api_original,api_sanitized,api_blocked,api_category,api_mode,api_error,api_error_message,api_raw_json
0,<script>alert(document.cookie);</script>,1,script_tag,40,github,script,True,False,False,False,<script>alert(document.cookie);</script>,[blocked by simple context-based filter],True,script_tag,block,,,"{""original"": ""<script>alert(document.cookie);<..."
1,<script>alert(document.cookie);</script>&btng=...,1,script_tag,98,github,script,True,False,False,False,<script>alert(document.cookie);</script>&btng=...,[blocked by simple context-based filter],True,script_tag,block,,,"{""original"": ""<script>alert(document.cookie);<..."
2,<marquee>pappy</marquee>&missionary_id=69,1,marquee_tag,41,github,benign,False,False,False,False,<marquee>pappy</marquee>&missionary_id=69,&lt;marquee&gt;pappy&lt;/marquee&gt;&amp;missi...,False,benign,block,,,"{""original"": ""<marquee>pappy</marquee>&mission..."
3,<script>alert(document.cookie);</script>&subdw...,1,script_tag,212,github,script,True,False,False,False,<script>alert(document.cookie);</script>&subdw...,[blocked by simple context-based filter],True,script_tag,block,,,"{""original"": ""<script>alert(document.cookie);<..."
4,<iframe src=http://google.com>,1,iframe_tag,30,github,iframe,False,False,False,True,<iframe src=http://google.com>,[blocked by simple context-based filter],True,active_tag,block,,,"{""original"": ""<iframe src=http://google.com>"",..."



----------------------------------------------------------------------
[HEAD] Dataset=kaggle | Mode=block


Unnamed: 0,Sentence_clean,Label,families_str,len_after_clean,source,family_main,has_script_tag,has_event,has_js_uri,has_iframe,api_original,api_sanitized,api_blocked,api_category,api_mode,api_error,api_error_message,api_raw_json
0,"<li><a href=""/wiki/File:Socrates.png"" class=""i...",0,image_tag,557,kaggle,benign,False,False,False,False,"<li><a href=""/wiki/File:Socrates.png"" class=""i...",&lt;li&gt;&lt;a href=&quot;/wiki/File:Socrates...,False,benign,block,,,"{""original"": ""<li><a href=\""/wiki/File:Socrate..."
1,"<tt onmouseover=""alert(1)"">test</tt>",1,event_handler,36,kaggle,event,False,True,False,False,"<tt onmouseover=""alert(1)"">test</tt>",[blocked by simple context-based filter],True,event,block,,,"{""original"": ""<tt onmouseover=\""alert(1)\"">tes..."
2,"</span> <span class=""reference-text"">Steering ...",0,other,230,kaggle,benign,False,False,False,False,"</span> <span class=""reference-text"">Steering ...",&lt;/span&gt; &lt;span class=&quot;reference-t...,False,benign,block,,,"{""original"": ""</span> <span class=\""reference-..."
3,"</span> <span class=""reference-text""><cite cla...",0,maybe_polyglot,392,kaggle,benign,False,False,False,False,"</span> <span class=""reference-text""><cite cla...",&lt;/span&gt; &lt;span class=&quot;reference-t...,False,benign,block,,,"{""original"": ""</span> <span class=\""reference-..."
4,"</span>. <a href=""/wiki/Digital_object_identif...",0,other,419,kaggle,benign,False,False,False,False,"</span>. <a href=""/wiki/Digital_object_identif...",&lt;/span&gt;. &lt;a href=&quot;/wiki/Digital_...,False,benign,block,,,"{""original"": ""</span>. <a href=\""/wiki/Digital..."


In [9]:
# all_results["block"].head()
