# LLM Query Injection Experiment Results Analysis

This notebook analyzes the results from the LLM query injection experiments, comparing performance across different prompt types, attack types, and mitigation strategies.

**Baseline**: BASIC prompt, no attack, no mitigation


In [None]:
import pandas as pd
from IPython.display import display
import os
from dotenv import load_dotenv

load_dotenv()

# Set display options
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

# Load the results

MODEL_NAME = os.getenv("MODEL_NAME")

CSV_FILENAME = f"{MODEL_NAME}_results.csv"
df = pd.read_csv(CSV_FILENAME)

print(f"Loaded {len(df)} experiment results")
print(f"Columns: {list(df.columns)}")
df.head()

Loaded 48 experiment results
Columns: ['filename', 'prompt_type', 'attack_type', 'mitigation_type', 'overall_mae', 'mae_gt_zero', 'mae_gt_positive', 'overall_jer', 'jer_gt_zero', 'jer_gt_positive', 'delta_overall_mae', 'delta_mae_gt_zero', 'delta_mae_gt_positive', 'delta_overall_jer', 'delta_jer_gt_zero', 'delta_jer_gt_positive', 'valid_experiments', 'gt_zero_count', 'gt_positive_count', 'failed_experiments', 'total_experiments']


Unnamed: 0,filename,prompt_type,attack_type,mitigation_type,overall_mae,mae_gt_zero,mae_gt_positive,overall_jer,jer_gt_zero,jer_gt_positive,delta_overall_mae,delta_mae_gt_zero,delta_mae_gt_positive,delta_overall_jer,delta_jer_gt_zero,delta_jer_gt_positive,valid_experiments,gt_zero_count,gt_positive_count,failed_experiments,total_experiments
0,results_BASIC_none_none.json,BASIC,none,none,1.7711,2.2946,0.9247,89.46,98.55,74.77,,,,,,,7021,4338,2683,0,7021
1,results_BASIC_append_few_shot.json,BASIC,append,few_shot,1.7977,2.2985,0.9881,88.16,95.5,76.3,0.0266,0.0039,0.0634,-1.3,-3.04,1.53,7021,4338,2683,0,7021
2,results_BASIC_append_none.json,BASIC,append,none,1.9412,2.5007,1.0365,89.96,99.08,75.21,0.1701,0.2061,0.1118,0.5,0.53,0.45,7021,4338,2683,0,7021
3,results_BASIC_append_system_prompt_hardening.json,BASIC,append,system_prompt_hardening,1.7928,2.308,0.9597,89.65,97.65,76.71,0.0216,0.0134,0.035,0.19,-0.9,1.94,7021,4338,2683,0,7021
4,results_BASIC_append_user_prompt_hardening.json,BASIC,append,user_prompt_hardening,2.0705,2.6579,1.1208,88.39,97.26,74.06,0.2994,0.3633,0.196,-1.07,-1.29,-0.71,7021,4338,2683,0,7021


## 1. Overall MAE Results


In [37]:
# Find highest metric and highest delta for Overall MAE
highest_mae = df.loc[df["overall_mae"].idxmax()]
highest_delta_mae = df[df["delta_overall_mae"] != "NaN"].loc[
    df[df["delta_overall_mae"] != "NaN"]["delta_overall_mae"].idxmax()
]
print(f"Highest MAE: {highest_mae['overall_mae']:.4f}")
print(
    f"  Experiment: {highest_mae['prompt_type']}, {highest_mae['attack_type']}, {highest_mae['mitigation_type']}"
)
print(f"\nHighest Δ from Baseline: {highest_delta_mae['delta_overall_mae']:.4f}")
print(
    f"  Experiment: {highest_delta_mae['prompt_type']}, {highest_delta_mae['attack_type']}, {highest_delta_mae['mitigation_type']}"
)

Highest MAE: 2.2931
  Experiment: UTILITY, prepend, user_prompt_hardening

Highest Δ from Baseline: 0.5220
  Experiment: UTILITY, prepend, user_prompt_hardening


In [38]:
# Create overall MAE table
mae_table = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "overall_mae",
        "delta_overall_mae",
    ]
].copy()
mae_table = mae_table.sort_values(["prompt_type", "attack_type", "mitigation_type"])

# Format the table
mae_table["overall_mae"] = mae_table["overall_mae"].apply(
    lambda x: f"{x:.4f}" if pd.notna(x) else "N/A"
)
mae_table["delta_overall_mae"] = mae_table["delta_overall_mae"].apply(
    lambda x: f"{x:+.4f}" if pd.notna(x) and x != "NaN" else "N/A"
)

# Rename columns for display
mae_table_display = mae_table.rename(
    columns={
        "prompt_type": "Prompt",
        "attack_type": "Attack",
        "mitigation_type": "Mitigation",
        "overall_mae": "MAE",
        "delta_overall_mae": "Δ from Baseline",
    }
)

print("Overall MAE Results (Mean Absolute Error)")
print("=" * 60)
display(mae_table_display)

Overall MAE Results (Mean Absolute Error)


Unnamed: 0,Prompt,Attack,Mitigation,MAE,Δ from Baseline
1,BASIC,append,few_shot,1.7977,0.0266
2,BASIC,append,none,1.9412,0.1701
3,BASIC,append,system_prompt_hardening,1.7928,0.0216
4,BASIC,append,user_prompt_hardening,2.0705,0.2994
5,BASIC,none,few_shot,1.6189,-0.1523
0,BASIC,none,none,1.7711,
6,BASIC,none,system_prompt_hardening,1.6642,-0.107
7,BASIC,none,user_prompt_hardening,1.9605,0.1894
8,BASIC,prepend,few_shot,1.8096,0.0385
9,BASIC,prepend,none,1.9543,0.1832


## 2. Overall JER Results


In [39]:
# Find highest metric and highest delta for Overall JER
highest_jer = df.loc[df["overall_jer"].idxmax()]
highest_delta_jer = df[df["delta_overall_jer"] != "NaN"].loc[
    df[df["delta_overall_jer"] != "NaN"]["delta_overall_jer"].idxmax()
]
print(f"Highest JER: {highest_jer['overall_jer']:.2f}%")
print(
    f"  Experiment: {highest_jer['prompt_type']}, {highest_jer['attack_type']}, {highest_jer['mitigation_type']}"
)
print(f"\nHighest Δ from Baseline: {highest_delta_jer['delta_overall_jer']:.2f}%")
print(
    f"  Experiment: {highest_delta_jer['prompt_type']}, {highest_delta_jer['attack_type']}, {highest_delta_jer['mitigation_type']}"
)

Highest JER: 90.30%
  Experiment: RATIONALE, scatter, system_prompt_hardening

Highest Δ from Baseline: 0.84%
  Experiment: RATIONALE, scatter, system_prompt_hardening


In [40]:
# Create overall JER table
jer_table = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "overall_jer",
        "delta_overall_jer",
    ]
].copy()
jer_table = jer_table.sort_values(["prompt_type", "attack_type", "mitigation_type"])

# Format the table
jer_table["overall_jer"] = jer_table["overall_jer"].apply(
    lambda x: f"{x:.2f}%" if pd.notna(x) else "N/A"
)
jer_table["delta_overall_jer"] = jer_table["delta_overall_jer"].apply(
    lambda x: f"{x:+.2f}%" if pd.notna(x) and x != "NaN" else "N/A"
)

# Rename columns for display
jer_table_display = jer_table.rename(
    columns={
        "prompt_type": "Prompt",
        "attack_type": "Attack",
        "mitigation_type": "Mitigation",
        "overall_jer": "JER",
        "delta_overall_jer": "Δ from Baseline",
    }
)

print("Overall JER Results (Judgment Error Rate)")
print("=" * 60)
display(jer_table_display)

Overall JER Results (Judgment Error Rate)


Unnamed: 0,Prompt,Attack,Mitigation,JER,Δ from Baseline
1,BASIC,append,few_shot,88.16%,-1.30%
2,BASIC,append,none,89.96%,+0.50%
3,BASIC,append,system_prompt_hardening,89.65%,+0.19%
4,BASIC,append,user_prompt_hardening,88.39%,-1.07%
5,BASIC,none,few_shot,87.88%,-1.58%
0,BASIC,none,none,89.46%,
6,BASIC,none,system_prompt_hardening,89.26%,-0.20%
7,BASIC,none,user_prompt_hardening,88.65%,-0.81%
8,BASIC,prepend,few_shot,89.65%,+0.19%
9,BASIC,prepend,none,89.75%,+0.28%


## 3. MAE for Ground Truth = 0 (Irrelevant Documents)


In [41]:
# Find highest metric and highest delta for MAE (GT=0)
highest_mae_gt0 = df.loc[df["mae_gt_zero"].idxmax()]
highest_delta_mae_gt0 = df[df["delta_mae_gt_zero"] != "NaN"].loc[
    df[df["delta_mae_gt_zero"] != "NaN"]["delta_mae_gt_zero"].idxmax()
]
print(f"Highest MAE (GT=0): {highest_mae_gt0['mae_gt_zero']:.4f}")
print(
    f"  Experiment: {highest_mae_gt0['prompt_type']}, {highest_mae_gt0['attack_type']}, {highest_mae_gt0['mitigation_type']}"
)
print(f"\nHighest Δ from Baseline: {highest_delta_mae_gt0['delta_mae_gt_zero']:.4f}")
print(
    f"  Experiment: {highest_delta_mae_gt0['prompt_type']}, {highest_delta_mae_gt0['attack_type']}, {highest_delta_mae_gt0['mitigation_type']}"
)

Highest MAE (GT=0): 2.9373
  Experiment: UTILITY, prepend, user_prompt_hardening

Highest Δ from Baseline: 0.6427
  Experiment: UTILITY, prepend, user_prompt_hardening


In [42]:
# Create GT=0 MAE table
mae_gt0_table = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "mae_gt_zero",
        "delta_mae_gt_zero",
    ]
].copy()
mae_gt0_table = mae_gt0_table.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)

# Format the table
mae_gt0_table["mae_gt_zero"] = mae_gt0_table["mae_gt_zero"].apply(
    lambda x: f"{x:.4f}" if pd.notna(x) else "N/A"
)
mae_gt0_table["delta_mae_gt_zero"] = mae_gt0_table["delta_mae_gt_zero"].apply(
    lambda x: f"{x:+.4f}" if pd.notna(x) and x != "NaN" else "N/A"
)

# Rename columns for display
mae_gt0_table_display = mae_gt0_table.rename(
    columns={
        "prompt_type": "Prompt",
        "attack_type": "Attack",
        "mitigation_type": "Mitigation",
        "mae_gt_zero": "MAE (GT=0)",
        "delta_mae_gt_zero": "Δ from Baseline",
    }
)

print("MAE for Ground Truth = 0 (Irrelevant Documents)")
print("=" * 60)
display(mae_gt0_table_display)

MAE for Ground Truth = 0 (Irrelevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,MAE (GT=0),Δ from Baseline
1,BASIC,append,few_shot,2.2985,0.0039
2,BASIC,append,none,2.5007,0.2061
3,BASIC,append,system_prompt_hardening,2.308,0.0134
4,BASIC,append,user_prompt_hardening,2.6579,0.3633
5,BASIC,none,few_shot,2.092,-0.2026
0,BASIC,none,none,2.2946,
6,BASIC,none,system_prompt_hardening,2.1515,-0.1432
7,BASIC,none,user_prompt_hardening,2.5166,0.222
8,BASIC,prepend,few_shot,2.3686,0.074
9,BASIC,prepend,none,2.5461,0.2515


## 4. JER for Ground Truth = 0 (Irrelevant Documents)


In [43]:
# Find highest metric and highest delta for JER (GT=0)
highest_jer_gt0 = df.loc[df["jer_gt_zero"].idxmax()]
highest_delta_jer_gt0 = df[df["delta_jer_gt_zero"] != "NaN"].loc[
    df[df["delta_jer_gt_zero"] != "NaN"]["delta_jer_gt_zero"].idxmax()
]
print(f"Highest JER (GT=0): {highest_jer_gt0['jer_gt_zero']:.2f}%")
print(
    f"  Experiment: {highest_jer_gt0['prompt_type']}, {highest_jer_gt0['attack_type']}, {highest_jer_gt0['mitigation_type']}"
)
print(f"\nHighest Δ from Baseline: {highest_delta_jer_gt0['delta_jer_gt_zero']:.2f}%")
print(
    f"  Experiment: {highest_delta_jer_gt0['prompt_type']}, {highest_delta_jer_gt0['attack_type']}, {highest_delta_jer_gt0['mitigation_type']}"
)

Highest JER (GT=0): 100.00%
  Experiment: RATIONALE, prepend, none

Highest Δ from Baseline: 1.45%
  Experiment: RATIONALE, prepend, none


In [44]:
# Create GT=0 JER table
jer_gt0_table = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "jer_gt_zero",
        "delta_jer_gt_zero",
    ]
].copy()
jer_gt0_table = jer_gt0_table.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)

# Format the table
jer_gt0_table["jer_gt_zero"] = jer_gt0_table["jer_gt_zero"].apply(
    lambda x: f"{x:.2f}%" if pd.notna(x) else "N/A"
)
jer_gt0_table["delta_jer_gt_zero"] = jer_gt0_table["delta_jer_gt_zero"].apply(
    lambda x: f"{x:+.2f}%" if pd.notna(x) and x != "NaN" else "N/A"
)

# Rename columns for display
jer_gt0_table_display = jer_gt0_table.rename(
    columns={
        "prompt_type": "Prompt",
        "attack_type": "Attack",
        "mitigation_type": "Mitigation",
        "jer_gt_zero": "JER (GT=0)",
        "delta_jer_gt_zero": "Δ from Baseline",
    }
)

print("JER for Ground Truth = 0 (Irrelevant Documents)")
print("=" * 60)
display(jer_gt0_table_display)

JER for Ground Truth = 0 (Irrelevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,JER (GT=0),Δ from Baseline
1,BASIC,append,few_shot,95.50%,-3.04%
2,BASIC,append,none,99.08%,+0.53%
3,BASIC,append,system_prompt_hardening,97.65%,-0.90%
4,BASIC,append,user_prompt_hardening,97.26%,-1.29%
5,BASIC,none,few_shot,95.27%,-3.27%
0,BASIC,none,none,98.55%,
6,BASIC,none,system_prompt_hardening,97.21%,-1.34%
7,BASIC,none,user_prompt_hardening,97.42%,-1.13%
8,BASIC,prepend,few_shot,99.26%,+0.71%
9,BASIC,prepend,none,99.82%,+1.27%


## 5. MAE for Ground Truth > 0 (Relevant Documents)


In [45]:
# Find highest metric and highest delta for MAE (GT>0)
highest_mae_gtpos = df.loc[df["mae_gt_positive"].idxmax()]
highest_delta_mae_gtpos = df[df["delta_mae_gt_positive"] != "NaN"].loc[
    df[df["delta_mae_gt_positive"] != "NaN"]["delta_mae_gt_positive"].idxmax()
]
print(f"Highest MAE (GT>0): {highest_mae_gtpos['mae_gt_positive']:.4f}")
print(
    f"  Experiment: {highest_mae_gtpos['prompt_type']}, {highest_mae_gtpos['attack_type']}, {highest_mae_gtpos['mitigation_type']}"
)
print(
    f"\nHighest Δ from Baseline: {highest_delta_mae_gtpos['delta_mae_gt_positive']:.4f}"
)
print(
    f"  Experiment: {highest_delta_mae_gtpos['prompt_type']}, {highest_delta_mae_gtpos['attack_type']}, {highest_delta_mae_gtpos['mitigation_type']}"
)

Highest MAE (GT>0): 1.2516
  Experiment: UTILITY, prepend, user_prompt_hardening

Highest Δ from Baseline: 0.3269
  Experiment: UTILITY, prepend, user_prompt_hardening


In [46]:
# Create GT>0 MAE table
mae_gtpos_table = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "mae_gt_positive",
        "delta_mae_gt_positive",
    ]
].copy()
mae_gtpos_table = mae_gtpos_table.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)

# Format the table
mae_gtpos_table["mae_gt_positive"] = mae_gtpos_table["mae_gt_positive"].apply(
    lambda x: f"{x:.4f}" if pd.notna(x) else "N/A"
)
mae_gtpos_table["delta_mae_gt_positive"] = mae_gtpos_table[
    "delta_mae_gt_positive"
].apply(lambda x: f"{x:+.4f}" if pd.notna(x) and x != "NaN" else "N/A")

# Rename columns for display
mae_gtpos_table_display = mae_gtpos_table.rename(
    columns={
        "prompt_type": "Prompt",
        "attack_type": "Attack",
        "mitigation_type": "Mitigation",
        "mae_gt_positive": "MAE (GT>0)",
        "delta_mae_gt_positive": "Δ from Baseline",
    }
)

print("MAE for Ground Truth > 0 (Relevant Documents)")
print("=" * 60)
display(mae_gtpos_table_display)

MAE for Ground Truth > 0 (Relevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,MAE (GT>0),Δ from Baseline
1,BASIC,append,few_shot,0.9881,0.0634
2,BASIC,append,none,1.0365,0.1118
3,BASIC,append,system_prompt_hardening,0.9597,0.035
4,BASIC,append,user_prompt_hardening,1.1208,0.196
5,BASIC,none,few_shot,0.8539,-0.0708
0,BASIC,none,none,0.9247,
6,BASIC,none,system_prompt_hardening,0.8763,-0.0485
7,BASIC,none,user_prompt_hardening,1.0615,0.1368
8,BASIC,prepend,few_shot,0.9057,-0.019
9,BASIC,prepend,none,0.9974,0.0727


## 6. JER for Ground Truth > 0 (Relevant Documents)


In [47]:
# Find highest metric and highest delta for JER (GT>0)
highest_jer_gtpos = df.loc[df["jer_gt_positive"].idxmax()]
highest_delta_jer_gtpos = df[df["delta_jer_gt_positive"] != "NaN"].loc[
    df[df["delta_jer_gt_positive"] != "NaN"]["delta_jer_gt_positive"].idxmax()
]
print(f"Highest JER (GT>0): {highest_jer_gtpos['jer_gt_positive']:.2f}%")
print(
    f"  Experiment: {highest_jer_gtpos['prompt_type']}, {highest_jer_gtpos['attack_type']}, {highest_jer_gtpos['mitigation_type']}"
)
print(
    f"\nHighest Δ from Baseline: {highest_delta_jer_gtpos['delta_jer_gt_positive']:.2f}%"
)
print(
    f"  Experiment: {highest_delta_jer_gtpos['prompt_type']}, {highest_delta_jer_gtpos['attack_type']}, {highest_delta_jer_gtpos['mitigation_type']}"
)

Highest JER (GT>0): 77.56%
  Experiment: BASIC, scatter, system_prompt_hardening

Highest Δ from Baseline: 2.80%
  Experiment: BASIC, scatter, system_prompt_hardening


In [48]:
# Create GT>0 JER table
jer_gtpos_table = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "jer_gt_positive",
        "delta_jer_gt_positive",
    ]
].copy()
jer_gtpos_table = jer_gtpos_table.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)

# Format the table
jer_gtpos_table["jer_gt_positive"] = jer_gtpos_table["jer_gt_positive"].apply(
    lambda x: f"{x:.2f}%" if pd.notna(x) else "N/A"
)
jer_gtpos_table["delta_jer_gt_positive"] = jer_gtpos_table[
    "delta_jer_gt_positive"
].apply(lambda x: f"{x:+.2f}%" if pd.notna(x) and x != "NaN" else "N/A")

# Rename columns for display
jer_gtpos_table_display = jer_gtpos_table.rename(
    columns={
        "prompt_type": "Prompt",
        "attack_type": "Attack",
        "mitigation_type": "Mitigation",
        "jer_gt_positive": "JER (GT>0)",
        "delta_jer_gt_positive": "Δ from Baseline",
    }
)

print("JER for Ground Truth > 0 (Relevant Documents)")
print("=" * 60)
display(jer_gtpos_table_display)

JER for Ground Truth > 0 (Relevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,JER (GT>0),Δ from Baseline
1,BASIC,append,few_shot,76.30%,+1.53%
2,BASIC,append,none,75.21%,+0.45%
3,BASIC,append,system_prompt_hardening,76.71%,+1.94%
4,BASIC,append,user_prompt_hardening,74.06%,-0.71%
5,BASIC,none,few_shot,75.92%,+1.16%
0,BASIC,none,none,74.77%,
6,BASIC,none,system_prompt_hardening,76.41%,+1.64%
7,BASIC,none,user_prompt_hardening,74.47%,-0.30%
8,BASIC,prepend,few_shot,74.10%,-0.67%
9,BASIC,prepend,none,73.46%,-1.30%


## 7. Summary Statistics


In [49]:
# Summary statistics
print("Experiment Summary")
print("=" * 30)
print(f"Total experiments: {len(df)}")
print(f"Prompt types: {df['prompt_type'].unique().tolist()}")
print(f"Attack types: {df['attack_type'].unique().tolist()}")
print(f"Mitigation types: {df['mitigation_type'].unique().tolist()}")

# Find baseline
baseline = df[
    (df["prompt_type"] == "BASIC")
    & (df["attack_type"] == "none")
    & (df["mitigation_type"] == "none")
]
if not baseline.empty:
    print("\nBaseline (BASIC, none, none) metrics:")
    print(f"  Overall MAE: {baseline['overall_mae'].iloc[0]:.4f}")
    print(f"  Overall JER: {baseline['overall_jer'].iloc[0]:.2f}%")
    print(f"  MAE GT=0: {baseline['mae_gt_zero'].iloc[0]:.4f}")
    print(f"  JER GT=0: {baseline['jer_gt_zero'].iloc[0]:.2f}%")
    print(f"  MAE GT>0: {baseline['mae_gt_positive'].iloc[0]:.4f}")
    print(f"  JER GT>0: {baseline['jer_gt_positive'].iloc[0]:.2f}%")

# Best and worst performing experiments
print(f"\nBest Overall MAE: {df['overall_mae'].min():.4f}")
best_mae = df[df["overall_mae"] == df["overall_mae"].min()]
print(
    f"  Experiment: {best_mae['prompt_type'].iloc[0]}, {best_mae['attack_type'].iloc[0]}, {best_mae['mitigation_type'].iloc[0]}"
)

print(f"\nWorst Overall MAE: {df['overall_mae'].max():.4f}")
worst_mae = df[df["overall_mae"] == df["overall_mae"].max()]
print(
    f"  Experiment: {worst_mae['prompt_type'].iloc[0]}, {worst_mae['attack_type'].iloc[0]}, {worst_mae['mitigation_type'].iloc[0]}"
)

print(f"\nBest Overall JER: {df['overall_jer'].min():.2f}%")
best_jer = df[df["overall_jer"] == df["overall_jer"].min()]
print(
    f"  Experiment: {best_jer['prompt_type'].iloc[0]}, {best_jer['attack_type'].iloc[0]}, {best_jer['mitigation_type'].iloc[0]}"
)

print(f"\nWorst Overall JER: {df['overall_jer'].max():.2f}%")
worst_jer = df[df["overall_jer"] == df["overall_jer"].max()]
print(
    f"  Experiment: {worst_jer['prompt_type'].iloc[0]}, {worst_jer['attack_type'].iloc[0]}, {worst_jer['mitigation_type'].iloc[0]}"
)

Experiment Summary
Total experiments: 48
Prompt types: ['BASIC', 'RATIONALE', 'UTILITY']
Attack types: ['none', 'append', 'prepend', 'scatter']
Mitigation types: ['none', 'few_shot', 'system_prompt_hardening', 'user_prompt_hardening']

Baseline (BASIC, none, none) metrics:
  Overall MAE: 1.7711
  Overall JER: 89.46%
  MAE GT=0: 2.2946
  JER GT=0: 98.55%
  MAE GT>0: 0.9247
  JER GT>0: 74.77%

Best Overall MAE: 1.6189
  Experiment: BASIC, none, few_shot

Worst Overall MAE: 2.2931
  Experiment: UTILITY, prepend, user_prompt_hardening

Best Overall JER: 87.61%
  Experiment: UTILITY, none, user_prompt_hardening

Worst Overall JER: 90.30%
  Experiment: RATIONALE, scatter, system_prompt_hardening


## 8. Delta Analysis (Performance vs Baseline)


In [50]:
# Analyze deltas (excluding baseline itself)
non_baseline = df[
    (df["prompt_type"] != "BASIC")
    | (df["attack_type"] != "none")
    | (df["mitigation_type"] != "none")
]

print("Delta Analysis (Performance vs Baseline)")
print("=" * 40)
print(f"Experiments compared to baseline: {len(non_baseline)}")

# Count improvements vs degradations
mae_improvements = len(non_baseline[non_baseline["delta_overall_mae"] < 0])
mae_degradations = len(non_baseline[non_baseline["delta_overall_mae"] > 0])
jer_improvements = len(non_baseline[non_baseline["delta_overall_jer"] < 0])
jer_degradations = len(non_baseline[non_baseline["delta_overall_jer"] > 0])

print(f"\nMAE Improvements (negative delta): {mae_improvements}")
print(f"MAE Degradations (positive delta): {mae_degradations}")
print(f"JER Improvements (negative delta): {jer_improvements}")
print(f"JER Degradations (positive delta): {jer_degradations}")

# Best improvements
if not non_baseline.empty:
    best_mae_improvement = non_baseline[
        non_baseline["delta_overall_mae"] == non_baseline["delta_overall_mae"].min()
    ]
    best_jer_improvement = non_baseline[
        non_baseline["delta_overall_jer"] == non_baseline["delta_overall_jer"].min()
    ]

    print(
        f"\nBest MAE Improvement: {best_mae_improvement['delta_overall_mae'].iloc[0]:.4f}"
    )
    print(
        f"  Experiment: {best_mae_improvement['prompt_type'].iloc[0]}, {best_mae_improvement['attack_type'].iloc[0]}, {best_mae_improvement['mitigation_type'].iloc[0]}"
    )

    print(
        f"\nBest JER Improvement: {best_jer_improvement['delta_overall_jer'].iloc[0]:.2f}%"
    )
    print(
        f"  Experiment: {best_jer_improvement['prompt_type'].iloc[0]}, {best_jer_improvement['attack_type'].iloc[0]}, {best_jer_improvement['mitigation_type'].iloc[0]}"
    )

Delta Analysis (Performance vs Baseline)
Experiments compared to baseline: 47

MAE Improvements (negative delta): 5
MAE Degradations (positive delta): 42
JER Improvements (negative delta): 27
JER Degradations (positive delta): 20

Best MAE Improvement: -0.1523
  Experiment: BASIC, none, few_shot

Best JER Improvement: -1.85%
  Experiment: UTILITY, none, user_prompt_hardening


In [51]:
# ==============================================================================
# LaTeX Table Generation for Thesis
# ==============================================================================

print("=" * 80)
print("LATEX TABLE OUTPUT FOR THESIS")
print("=" * 80)

# 1. Overall MAE Table
print("\n" + "=" * 80)
print("1. OVERALL MAE TABLE")
print("=" * 80 + "\n")
mae_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "overall_mae",
        "delta_overall_mae",
    ]
].copy()
mae_latex = mae_latex.sort_values(["prompt_type", "attack_type", "mitigation_type"])
mae_latex.columns = ["Prompt", "Attack", "Mitigation", "MAE", "$\\Delta$ MAE"]
latex_code = mae_latex.to_latex(
    index=False, float_format="%.4f", escape=False, column_format="lllrr", na_rep="--"
)
print(latex_code)

# 2. Overall JER Table
print("\n" + "=" * 80)
print("2. OVERALL JER TABLE")
print("=" * 80 + "\n")
jer_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "overall_jer",
        "delta_overall_jer",
    ]
].copy()
jer_latex = jer_latex.sort_values(["prompt_type", "attack_type", "mitigation_type"])
jer_latex.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "JER (\\%)",
    "$\\Delta$ JER (\\%)",
]
latex_code = jer_latex.to_latex(
    index=False, float_format="%.2f", escape=False, column_format="lllrr", na_rep="--"
)
print(latex_code)

# 3. MAE for GT=0 Table
print("\n" + "=" * 80)
print("3. MAE FOR GROUND TRUTH = 0 (IRRELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
mae_gt0_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "mae_gt_zero",
        "delta_mae_gt_zero",
    ]
].copy()
mae_gt0_latex = mae_gt0_latex.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)
mae_gt0_latex.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "MAE (GT=0)",
    "$\\Delta$ MAE (GT=0)",
]
latex_code = mae_gt0_latex.to_latex(
    index=False, float_format="%.4f", escape=False, column_format="lllrr", na_rep="--"
)
print(latex_code)

# 4. JER for GT=0 Table
print("\n" + "=" * 80)
print("4. JER FOR GROUND TRUTH = 0 (IRRELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
jer_gt0_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "jer_gt_zero",
        "delta_jer_gt_zero",
    ]
].copy()
jer_gt0_latex = jer_gt0_latex.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)
jer_gt0_latex.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "JER (GT=0) (\\%)",
    "$\\Delta$ JER (GT=0) (\\%)",
]
latex_code = jer_gt0_latex.to_latex(
    index=False, float_format="%.2f", escape=False, column_format="lllrr", na_rep="--"
)
print(latex_code)

# 5. MAE for GT>0 Table
print("\n" + "=" * 80)
print("5. MAE FOR GROUND TRUTH > 0 (RELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
mae_gtpos_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "mae_gt_positive",
        "delta_mae_gt_positive",
    ]
].copy()
mae_gtpos_latex = mae_gtpos_latex.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)
mae_gtpos_latex.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "MAE (GT$>$0)",
    "$\\Delta$ MAE (GT$>$0)",
]
latex_code = mae_gtpos_latex.to_latex(
    index=False, float_format="%.4f", escape=False, column_format="lllrr", na_rep="--"
)
print(latex_code)

# 6. JER for GT>0 Table
print("\n" + "=" * 80)
print("6. JER FOR GROUND TRUTH > 0 (RELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
jer_gtpos_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "jer_gt_positive",
        "delta_jer_gt_positive",
    ]
].copy()
jer_gtpos_latex = jer_gtpos_latex.sort_values(
    ["prompt_type", "attack_type", "mitigation_type"]
)
jer_gtpos_latex.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "JER (GT$>$0) (\\%)",
    "$\\Delta$ JER (GT$>$0) (\\%)",
]
latex_code = jer_gtpos_latex.to_latex(
    index=False, float_format="%.2f", escape=False, column_format="lllrr", na_rep="--"
)
print(latex_code)

print("\n" + "=" * 80)
print("END OF LATEX TABLE OUTPUT")
print("=" * 80)

LATEX TABLE OUTPUT FOR THESIS

1. OVERALL MAE TABLE

\begin{tabular}{lllrr}
\toprule
Prompt & Attack & Mitigation & MAE & $\Delta$ MAE \\
\midrule
BASIC & append & few_shot & 1.7977 & 0.0266 \\
BASIC & append & none & 1.9412 & 0.1701 \\
BASIC & append & system_prompt_hardening & 1.7928 & 0.0216 \\
BASIC & append & user_prompt_hardening & 2.0705 & 0.2994 \\
BASIC & none & few_shot & 1.6189 & -0.1523 \\
BASIC & none & none & 1.7711 & -- \\
BASIC & none & system_prompt_hardening & 1.6642 & -0.1070 \\
BASIC & none & user_prompt_hardening & 1.9605 & 0.1894 \\
BASIC & prepend & few_shot & 1.8096 & 0.0385 \\
BASIC & prepend & none & 1.9543 & 0.1832 \\
BASIC & prepend & system_prompt_hardening & 1.8871 & 0.1159 \\
BASIC & prepend & user_prompt_hardening & 2.1644 & 0.3932 \\
BASIC & scatter & few_shot & 1.7248 & -0.0463 \\
BASIC & scatter & none & 1.8943 & 0.1232 \\
BASIC & scatter & system_prompt_hardening & 1.7680 & -0.0031 \\
BASIC & scatter & user_prompt_hardening & 2.1181 & 0.3470 \\
RATIO

In [52]:
# Enhanced LaTeX tables with Best Improvement rows appended at bottom
import pandas as pd

# Ensure delta columns are numeric for min() computations
cols_mae = ["delta_overall_mae", "delta_mae_gt_zero", "delta_mae_gt_positive"]
cols_jer = ["delta_overall_jer", "delta_jer_gt_zero", "delta_jer_gt_positive"]
for c in cols_mae + cols_jer:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")


def append_best_footer(
    latex_code: str, label: str, value: float, value_format: str
) -> str:
    """Insert a bold footer row with the best improvement value before \\bottomrule."""
    if pd.isna(value):
        return latex_code
    formatted = value_format % value
    footer = f"\\midrule\n\\textbf{{{label}}} &  &  &  & {formatted} \\\n\\bottomrule"
    return latex_code.replace("\\bottomrule", footer)


print("=" * 80)
print("LATEX TABLES WITH BEST IMPROVEMENT ROWS")
print("=" * 80)

# 1) Overall MAE
mae_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "overall_mae",
        "delta_overall_mae",
    ]
].copy()
mae_latex = mae_latex.sort_values(["prompt_type", "attack_type", "mitigation_type"])
mae_latex.columns = ["Prompt", "Attack", "Mitigation", "MAE", "$\\Delta$ MAE"]
latex_code = mae_latex.to_latex(
    index=False, float_format="%.4f", escape=False, column_format="lllrr", na_rep="--"
)
best_mae_impr = df["delta_overall_mae"].min()
latex_code = append_best_footer(
    latex_code, "Best MAE Improvement", best_mae_impr, "%+.4f"
)
print("\n" + "=" * 80)
print("1. OVERALL MAE TABLE (with Best Improvement)")
print("=" * 80 + "\n")
print(latex_code)

# 2) Overall JER
jer_latex = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "overall_jer",
        "delta_overall_jer",
    ]
].copy()
jer_latex = jer_latex.sort_values(["prompt_type", "attack_type", "mitigation_type"])
jer_latex.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "JER (\\%)",
    "$\\Delta$ JER (\\%)",
]
latex_code = jer_latex.to_latex(
    index=False, float_format="%.2f", escape=False, column_format="lllrr", na_rep="--"
)
best_jer_impr = df["delta_overall_jer"].min()
latex_code = append_best_footer(
    latex_code, "Best JER Improvement", best_jer_impr, "%+.2f"
)
print("\n" + "=" * 80)
print("2. OVERALL JER TABLE (with Best Improvement)")
print("=" * 80 + "\n")
print(latex_code)

# 3) MAE (GT=0)
mae0 = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "mae_gt_zero",
        "delta_mae_gt_zero",
    ]
].copy()
mae0 = mae0.sort_values(["prompt_type", "attack_type", "mitigation_type"])
mae0.columns = ["Prompt", "Attack", "Mitigation", "MAE (GT=0)", "$\\Delta$ MAE (GT=0)"]
latex_code = mae0.to_latex(
    index=False, float_format="%.4f", escape=False, column_format="lllrr", na_rep="--"
)
best_mae0_impr = df["delta_mae_gt_zero"].min()
latex_code = append_best_footer(
    latex_code, "Best MAE Improvement (GT=0)", best_mae0_impr, "%+.4f"
)
print("\n" + "=" * 80)
print("3. MAE (GT=0) TABLE (with Best Improvement)")
print("=" * 80 + "\n")
print(latex_code)

# 4) JER (GT=0)
jer0 = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "jer_gt_zero",
        "delta_jer_gt_zero",
    ]
].copy()
jer0 = jer0.sort_values(["prompt_type", "attack_type", "mitigation_type"])
jer0.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "JER (GT=0) (\\%)",
    "$\\Delta$ JER (GT=0) (\\%)",
]
latex_code = jer0.to_latex(
    index=False, float_format="%.2f", escape=False, column_format="lllrr", na_rep="--"
)
best_jer0_impr = df["delta_jer_gt_zero"].min()
latex_code = append_best_footer(
    latex_code, "Best JER Improvement (GT=0)", best_jer0_impr, "%+.2f"
)
print("\n" + "=" * 80)
print("4. JER (GT=0) TABLE (with Best Improvement)")
print("=" * 80 + "\n")
print(latex_code)

# 5) MAE (GT>0)
maep = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "mae_gt_positive",
        "delta_mae_gt_positive",
    ]
].copy()
maep = maep.sort_values(["prompt_type", "attack_type", "mitigation_type"])
maep.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "MAE (GT$>$0)",
    "$\\Delta$ MAE (GT$>$0)",
]
latex_code = maep.to_latex(
    index=False, float_format="%.4f", escape=False, column_format="lllrr", na_rep="--"
)
best_maep_impr = df["delta_mae_gt_positive"].min()
latex_code = append_best_footer(
    latex_code, "Best MAE Improvement (GT>0)", best_maep_impr, "%+.4f"
)
print("\n" + "=" * 80)
print("5. MAE (GT>0) TABLE (with Best Improvement)")
print("=" * 80 + "\n")
print(latex_code)

# 6) JER (GT>0)
jerp = df[
    [
        "prompt_type",
        "attack_type",
        "mitigation_type",
        "jer_gt_positive",
        "delta_jer_gt_positive",
    ]
].copy()
jerp = jerp.sort_values(["prompt_type", "attack_type", "mitigation_type"])
jerp.columns = [
    "Prompt",
    "Attack",
    "Mitigation",
    "JER (GT$>$0) (\\%)",
    "$\\Delta$ JER (GT$>$0) (\\%)",
]
latex_code = jerp.to_latex(
    index=False, float_format="%.2f", escape=False, column_format="lllrr", na_rep="--"
)
best_jerp_impr = df["delta_jer_gt_positive"].min()
latex_code = append_best_footer(
    latex_code, "Best JER Improvement (GT>0)", best_jerp_impr, "%+.2f"
)
print("\n" + "=" * 80)
print("6. JER (GT>0) TABLE (with Best Improvement)")
print("=" * 80 + "\n")
print(latex_code)

# =============================
# Summary table for all 6 metrics
# =============================
# Define baseline mask and non-baseline for improvements
is_baseline = (
    (df["prompt_type"] == "BASIC")
    & (df["attack_type"] == "none")
    & (df["mitigation_type"] == "none")
)
non_baseline = df[~is_baseline].copy()

# Ensure numeric types
for c in [
    "overall_mae",
    "overall_jer",
    "mae_gt_zero",
    "mae_gt_positive",
    "jer_gt_zero",
    "jer_gt_positive",
    "delta_overall_mae",
    "delta_overall_jer",
    "delta_mae_gt_zero",
    "delta_mae_gt_positive",
    "delta_jer_gt_zero",
    "delta_jer_gt_positive",
]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")
        non_baseline[c] = pd.to_numeric(non_baseline[c], errors="coerce")

# Compute best improvements (exclude baseline) and best overall (all)
rows = []
# Overall MAE
imp_row = non_baseline.loc[non_baseline["delta_overall_mae"].idxmin()]
best_row = df.loc[df["overall_mae"].idxmin()]
rows.append(
    {
        "Metric": "Overall MAE",
        "Best Improvement": f"{imp_row['delta_overall_mae']:.4f}",
        "Imp Prompt": imp_row["prompt_type"],
        "Imp Attack": imp_row["attack_type"],
        "Imp Mitigation": imp_row["mitigation_type"],
        "Best Overall": f"{best_row['overall_mae']:.4f}",
        "Best Prompt": best_row["prompt_type"],
        "Best Attack": best_row["attack_type"],
        "Best Mitigation": best_row["mitigation_type"],
    }
)
# Overall JER
imp_row = non_baseline.loc[non_baseline["delta_overall_jer"].idxmin()]
best_row = df.loc[df["overall_jer"].idxmin()]
rows.append(
    {
        "Metric": "Overall JER",
        "Best Improvement": f"{imp_row['delta_overall_jer']:.2f}%",
        "Imp Prompt": imp_row["prompt_type"],
        "Imp Attack": imp_row["attack_type"],
        "Imp Mitigation": imp_row["mitigation_type"],
        "Best Overall": f"{best_row['overall_jer']:.2f}%",
        "Best Prompt": best_row["prompt_type"],
        "Best Attack": best_row["attack_type"],
        "Best Mitigation": best_row["mitigation_type"],
    }
)
# MAE (GT=0)
imp_row = non_baseline.loc[non_baseline["delta_mae_gt_zero"].idxmin()]
best_row = df.loc[df["mae_gt_zero"].idxmin()]
rows.append(
    {
        "Metric": "MAE (GT=0)",
        "Best Improvement": f"{imp_row['delta_mae_gt_zero']:.4f}",
        "Imp Prompt": imp_row["prompt_type"],
        "Imp Attack": imp_row["attack_type"],
        "Imp Mitigation": imp_row["mitigation_type"],
        "Best Overall": f"{best_row['mae_gt_zero']:.4f}",
        "Best Prompt": best_row["prompt_type"],
        "Best Attack": best_row["attack_type"],
        "Best Mitigation": best_row["mitigation_type"],
    }
)
# JER (GT=0)
imp_row = non_baseline.loc[non_baseline["delta_jer_gt_zero"].idxmin()]
best_row = df.loc[df["jer_gt_zero"].idxmin()]
rows.append(
    {
        "Metric": "JER (GT=0)",
        "Best Improvement": f"{imp_row['delta_jer_gt_zero']:.2f}%",
        "Imp Prompt": imp_row["prompt_type"],
        "Imp Attack": imp_row["attack_type"],
        "Imp Mitigation": imp_row["mitigation_type"],
        "Best Overall": f"{best_row['jer_gt_zero']:.2f}%",
        "Best Prompt": best_row["prompt_type"],
        "Best Attack": best_row["attack_type"],
        "Best Mitigation": best_row["mitigation_type"],
    }
)
# MAE (GT>0)
imp_row = non_baseline.loc[non_baseline["delta_mae_gt_positive"].idxmin()]
best_row = df.loc[df["mae_gt_positive"].idxmin()]
rows.append(
    {
        "Metric": "MAE (GT>0)",
        "Best Improvement": f"{imp_row['delta_mae_gt_positive']:.4f}",
        "Imp Prompt": imp_row["prompt_type"],
        "Imp Attack": imp_row["attack_type"],
        "Imp Mitigation": imp_row["mitigation_type"],
        "Best Overall": f"{best_row['mae_gt_positive']:.4f}",
        "Best Prompt": best_row["prompt_type"],
        "Best Attack": best_row["attack_type"],
        "Best Mitigation": best_row["mitigation_type"],
    }
)
# JER (GT>0)
imp_row = non_baseline.loc[non_baseline["delta_jer_gt_positive"].idxmin()]
best_row = df.loc[df["jer_gt_positive"].idxmin()]
rows.append(
    {
        "Metric": "JER (GT>0)",
        "Best Improvement": f"{imp_row['delta_jer_gt_positive']:.2f}%",
        "Imp Prompt": imp_row["prompt_type"],
        "Imp Attack": imp_row["attack_type"],
        "Imp Mitigation": imp_row["mitigation_type"],
        "Best Overall": f"{best_row['jer_gt_positive']:.2f}%",
        "Best Prompt": best_row["prompt_type"],
        "Best Attack": best_row["attack_type"],
        "Best Mitigation": best_row["mitigation_type"],
    }
)

summary_df = pd.DataFrame(
    rows,
    columns=[
        "Metric",
        "Best Improvement",
        "Imp Prompt",
        "Imp Attack",
        "Imp Mitigation",
        "Best Overall",
        "Best Prompt",
        "Best Attack",
        "Best Mitigation",
    ],
)

print("\n" + "=" * 80)
print("SUMMARY OF BESTS (All Tables)")
print("=" * 80)
print(summary_df.to_string(index=False))

print("\n" + "=" * 80)
print("LATEX SUMMARY TABLE (All Tables)")
print("=" * 80)
print(summary_df.to_latex(index=False, escape=False, column_format="lrrrrrrrrr"))

LATEX TABLES WITH BEST IMPROVEMENT ROWS

1. OVERALL MAE TABLE (with Best Improvement)

\begin{tabular}{lllrr}
\toprule
Prompt & Attack & Mitigation & MAE & $\Delta$ MAE \\
\midrule
BASIC & append & few_shot & 1.7977 & 0.0266 \\
BASIC & append & none & 1.9412 & 0.1701 \\
BASIC & append & system_prompt_hardening & 1.7928 & 0.0216 \\
BASIC & append & user_prompt_hardening & 2.0705 & 0.2994 \\
BASIC & none & few_shot & 1.6189 & -0.1523 \\
BASIC & none & none & 1.7711 & -- \\
BASIC & none & system_prompt_hardening & 1.6642 & -0.1070 \\
BASIC & none & user_prompt_hardening & 1.9605 & 0.1894 \\
BASIC & prepend & few_shot & 1.8096 & 0.0385 \\
BASIC & prepend & none & 1.9543 & 0.1832 \\
BASIC & prepend & system_prompt_hardening & 1.8871 & 0.1159 \\
BASIC & prepend & user_prompt_hardening & 2.1644 & 0.3932 \\
BASIC & scatter & few_shot & 1.7248 & -0.0463 \\
BASIC & scatter & none & 1.8943 & 0.1232 \\
BASIC & scatter & system_prompt_hardening & 1.7680 & -0.0031 \\
BASIC & scatter & user_prompt_ha

### Overleaf table placement tips

- Use table placement hints to avoid footer overlap: `\begin{table}[htbp] ... \end{table}`
- For multi-page tables, prefer `longtable`:
  - Preamble: `\usepackage{longtable,booktabs}`
  - Replace `table` with `longtable` and include continued headers/footers per Overleaf docs.
- If a table still crowds the footer, force a break before it: `\clearpage` immediately before the table.
- Keep consistent vertical spacing: avoid trailing `\vspace` after tables; use `\caption` above `\label` within the environment.
- For wide tables, consider `\resizebox{\textwidth}{!}{...}` or landscape with `\usepackage{pdflscape}` and `\begin{landscape} ... \end{landscape}` to reduce float pressure.

