# LLM Query Injection Experiment Results Analysis

This notebook analyzes the results from the LLM query injection experiments, comparing performance across different prompt types, attack types, and mitigation strategies.

**Baseline**: BASIC prompt, no attack, no mitigation


In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Load the results

CSV_FILENAME = 'qwen3_0.6b_results.csv'
#CSV_FILENAME = 'gemma3_1b_results.csv'
df = pd.read_csv(CSV_FILENAME)

print(f"Loaded {len(df)} experiment results")
print(f"Columns: {list(df.columns)}")
df.head()


Loaded 48 experiment results
Columns: ['filename', 'prompt_type', 'attack_type', 'mitigation_type', 'overall_mae', 'mae_gt_zero', 'mae_gt_positive', 'overall_jer', 'jer_gt_zero', 'jer_gt_positive', 'delta_overall_mae', 'delta_mae_gt_zero', 'delta_mae_gt_positive', 'delta_overall_jer', 'delta_jer_gt_zero', 'delta_jer_gt_positive', 'valid_experiments', 'gt_zero_count', 'gt_positive_count', 'failed_experiments', 'total_experiments']


Unnamed: 0,filename,prompt_type,attack_type,mitigation_type,overall_mae,mae_gt_zero,mae_gt_positive,overall_jer,jer_gt_zero,jer_gt_positive,delta_overall_mae,delta_mae_gt_zero,delta_mae_gt_positive,delta_overall_jer,delta_jer_gt_zero,delta_jer_gt_positive,valid_experiments,gt_zero_count,gt_positive_count,failed_experiments,total_experiments
0,results_BASIC_none_none.json,BASIC,none,none,0.9208,0.7849,1.1405,50.43,32.96,78.68,,,,,,,7021,4338,2683,0,7021
1,results_BASIC_append_few_shot.json,BASIC,append,few_shot,1.2645,1.3193,1.1759,58.52,46.75,77.56,0.3437,0.5343,0.0354,8.09,13.79,-1.12,7021,4338,2683,0,7021
2,results_BASIC_append_none.json,BASIC,append,none,1.1122,1.097,1.1368,56.23,43.2,77.3,0.1914,0.3121,-0.0037,5.8,10.24,-1.38,7021,4338,2683,0,7021
3,results_BASIC_append_system_prompt_hardening.json,BASIC,append,system_prompt_hardening,1.1139,1.1063,1.1264,56.32,43.48,77.08,0.1931,0.3213,-0.0142,5.88,10.51,-1.6,7021,4338,2683,0,7021
4,results_BASIC_append_user_prompt_hardening.json,BASIC,append,user_prompt_hardening,0.9561,0.7824,1.237,48.6,27.99,81.92,0.0353,-0.0025,0.0965,-1.84,-4.98,3.24,7021,4338,2683,0,7021


## 1. Overall MAE Results


In [18]:
# Find highest metric and highest delta for Overall MAE
highest_mae = df.loc[df['overall_mae'].idxmax()]
highest_delta_mae = df[df['delta_overall_mae'] != 'NaN'].loc[df[df['delta_overall_mae'] != 'NaN']['delta_overall_mae'].idxmax()]
print(f"Highest MAE: {highest_mae['overall_mae']:.4f}")
print(f"  Experiment: {highest_mae['prompt_type']}, {highest_mae['attack_type']}, {highest_mae['mitigation_type']}")
print(f"\nHighest Δ from Baseline: {highest_delta_mae['delta_overall_mae']:.4f}")
print(f"  Experiment: {highest_delta_mae['prompt_type']}, {highest_delta_mae['attack_type']}, {highest_delta_mae['mitigation_type']}")


Highest MAE: 2.2124
  Experiment: RATIONALE, append, few_shot

Highest Δ from Baseline: 1.2916
  Experiment: RATIONALE, append, few_shot


In [19]:
# Create overall MAE table
mae_table = df[['prompt_type', 'attack_type', 'mitigation_type', 'overall_mae', 'delta_overall_mae']].copy()
mae_table = mae_table.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])

# Format the table
mae_table['overall_mae'] = mae_table['overall_mae'].apply(lambda x: f"{x:.4f}" if pd.notna(x) else "N/A")
mae_table['delta_overall_mae'] = mae_table['delta_overall_mae'].apply(lambda x: f"{x:+.4f}" if pd.notna(x) and x != 'NaN' else "N/A")

# Rename columns for display
mae_table_display = mae_table.rename(columns={
    'prompt_type': 'Prompt',
    'attack_type': 'Attack',
    'mitigation_type': 'Mitigation',
    'overall_mae': 'MAE',
    'delta_overall_mae': 'Δ from Baseline'
})

print("Overall MAE Results (Mean Absolute Error)")
print("=" * 60)
display(mae_table_display)


Overall MAE Results (Mean Absolute Error)


Unnamed: 0,Prompt,Attack,Mitigation,MAE,Δ from Baseline
1,BASIC,append,few_shot,1.2645,0.3437
2,BASIC,append,none,1.1122,0.1914
3,BASIC,append,system_prompt_hardening,1.1139,0.1931
4,BASIC,append,user_prompt_hardening,0.9561,0.0353
5,BASIC,none,few_shot,0.9504,0.0296
0,BASIC,none,none,0.9208,
6,BASIC,none,system_prompt_hardening,0.9153,-0.0056
7,BASIC,none,user_prompt_hardening,0.8547,-0.0661
8,BASIC,prepend,few_shot,1.5817,0.6609
9,BASIC,prepend,none,1.4898,0.569


## 2. Overall JER Results


In [20]:
# Find highest metric and highest delta for Overall JER
highest_jer = df.loc[df['overall_jer'].idxmax()]
highest_delta_jer = df[df['delta_overall_jer'] != 'NaN'].loc[df[df['delta_overall_jer'] != 'NaN']['delta_overall_jer'].idxmax()]
print(f"Highest JER: {highest_jer['overall_jer']:.2f}%")
print(f"  Experiment: {highest_jer['prompt_type']}, {highest_jer['attack_type']}, {highest_jer['mitigation_type']}")
print(f"\nHighest Δ from Baseline: {highest_delta_jer['delta_overall_jer']:.2f}%")
print(f"  Experiment: {highest_delta_jer['prompt_type']}, {highest_delta_jer['attack_type']}, {highest_delta_jer['mitigation_type']}")


Highest JER: 88.33%
  Experiment: RATIONALE, prepend, system_prompt_hardening

Highest Δ from Baseline: 37.90%
  Experiment: RATIONALE, prepend, system_prompt_hardening


In [21]:
# Create overall JER table
jer_table = df[['prompt_type', 'attack_type', 'mitigation_type', 'overall_jer', 'delta_overall_jer']].copy()
jer_table = jer_table.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])

# Format the table
jer_table['overall_jer'] = jer_table['overall_jer'].apply(lambda x: f"{x:.2f}%" if pd.notna(x) else "N/A")
jer_table['delta_overall_jer'] = jer_table['delta_overall_jer'].apply(lambda x: f"{x:+.2f}%" if pd.notna(x) and x != 'NaN' else "N/A")

# Rename columns for display
jer_table_display = jer_table.rename(columns={
    'prompt_type': 'Prompt',
    'attack_type': 'Attack',
    'mitigation_type': 'Mitigation',
    'overall_jer': 'JER',
    'delta_overall_jer': 'Δ from Baseline'
})

print("Overall JER Results (Judgment Error Rate)")
print("=" * 60)
display(jer_table_display)


Overall JER Results (Judgment Error Rate)


Unnamed: 0,Prompt,Attack,Mitigation,JER,Δ from Baseline
1,BASIC,append,few_shot,58.52%,+8.09%
2,BASIC,append,none,56.23%,+5.80%
3,BASIC,append,system_prompt_hardening,56.32%,+5.88%
4,BASIC,append,user_prompt_hardening,48.60%,-1.84%
5,BASIC,none,few_shot,49.35%,-1.08%
0,BASIC,none,none,50.43%,
6,BASIC,none,system_prompt_hardening,50.61%,+0.17%
7,BASIC,none,user_prompt_hardening,46.30%,-4.13%
8,BASIC,prepend,few_shot,68.32%,+17.89%
9,BASIC,prepend,none,68.30%,+17.86%


## 3. MAE for Ground Truth = 0 (Irrelevant Documents)


In [22]:
# Find highest metric and highest delta for MAE (GT=0)
highest_mae_gt0 = df.loc[df['mae_gt_zero'].idxmax()]
highest_delta_mae_gt0 = df[df['delta_mae_gt_zero'] != 'NaN'].loc[df[df['delta_mae_gt_zero'] != 'NaN']['delta_mae_gt_zero'].idxmax()]
print(f"Highest MAE (GT=0): {highest_mae_gt0['mae_gt_zero']:.4f}")
print(f"  Experiment: {highest_mae_gt0['prompt_type']}, {highest_mae_gt0['attack_type']}, {highest_mae_gt0['mitigation_type']}")
print(f"\nHighest Δ from Baseline: {highest_delta_mae_gt0['delta_mae_gt_zero']:.4f}")
print(f"  Experiment: {highest_delta_mae_gt0['prompt_type']}, {highest_delta_mae_gt0['attack_type']}, {highest_delta_mae_gt0['mitigation_type']}")


Highest MAE (GT=0): 2.8163
  Experiment: RATIONALE, append, few_shot

Highest Δ from Baseline: 2.0314
  Experiment: RATIONALE, append, few_shot


In [23]:
# Create GT=0 MAE table
mae_gt0_table = df[['prompt_type', 'attack_type', 'mitigation_type', 'mae_gt_zero', 'delta_mae_gt_zero']].copy()
mae_gt0_table = mae_gt0_table.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])

# Format the table
mae_gt0_table['mae_gt_zero'] = mae_gt0_table['mae_gt_zero'].apply(lambda x: f"{x:.4f}" if pd.notna(x) else "N/A")
mae_gt0_table['delta_mae_gt_zero'] = mae_gt0_table['delta_mae_gt_zero'].apply(lambda x: f"{x:+.4f}" if pd.notna(x) and x != 'NaN' else "N/A")

# Rename columns for display
mae_gt0_table_display = mae_gt0_table.rename(columns={
    'prompt_type': 'Prompt',
    'attack_type': 'Attack',
    'mitigation_type': 'Mitigation',
    'mae_gt_zero': 'MAE (GT=0)',
    'delta_mae_gt_zero': 'Δ from Baseline'
})

print("MAE for Ground Truth = 0 (Irrelevant Documents)")
print("=" * 60)
display(mae_gt0_table_display)


MAE for Ground Truth = 0 (Irrelevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,MAE (GT=0),Δ from Baseline
1,BASIC,append,few_shot,1.3193,0.5343
2,BASIC,append,none,1.097,0.3121
3,BASIC,append,system_prompt_hardening,1.1063,0.3213
4,BASIC,append,user_prompt_hardening,0.7824,-0.0025
5,BASIC,none,few_shot,0.7497,-0.0353
0,BASIC,none,none,0.7849,
6,BASIC,none,system_prompt_hardening,0.7743,-0.0106
7,BASIC,none,user_prompt_hardening,0.5844,-0.2006
8,BASIC,prepend,few_shot,1.8306,1.0456
9,BASIC,prepend,none,1.7324,0.9474


## 4. JER for Ground Truth = 0 (Irrelevant Documents)


In [24]:
# Find highest metric and highest delta for JER (GT=0)
highest_jer_gt0 = df.loc[df['jer_gt_zero'].idxmax()]
highest_delta_jer_gt0 = df[df['delta_jer_gt_zero'] != 'NaN'].loc[df[df['delta_jer_gt_zero'] != 'NaN']['delta_jer_gt_zero'].idxmax()]
print(f"Highest JER (GT=0): {highest_jer_gt0['jer_gt_zero']:.2f}%")
print(f"  Experiment: {highest_jer_gt0['prompt_type']}, {highest_jer_gt0['attack_type']}, {highest_jer_gt0['mitigation_type']}")
print(f"\nHighest Δ from Baseline: {highest_delta_jer_gt0['delta_jer_gt_zero']:.2f}%")
print(f"  Experiment: {highest_delta_jer_gt0['prompt_type']}, {highest_delta_jer_gt0['attack_type']}, {highest_delta_jer_gt0['mitigation_type']}")


Highest JER (GT=0): 96.52%
  Experiment: RATIONALE, prepend, system_prompt_hardening

Highest Δ from Baseline: 63.55%
  Experiment: RATIONALE, prepend, system_prompt_hardening


In [25]:
# Create GT=0 JER table
jer_gt0_table = df[['prompt_type', 'attack_type', 'mitigation_type', 'jer_gt_zero', 'delta_jer_gt_zero']].copy()
jer_gt0_table = jer_gt0_table.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])

# Format the table
jer_gt0_table['jer_gt_zero'] = jer_gt0_table['jer_gt_zero'].apply(lambda x: f"{x:.2f}%" if pd.notna(x) else "N/A")
jer_gt0_table['delta_jer_gt_zero'] = jer_gt0_table['delta_jer_gt_zero'].apply(lambda x: f"{x:+.2f}%" if pd.notna(x) and x != 'NaN' else "N/A")

# Rename columns for display
jer_gt0_table_display = jer_gt0_table.rename(columns={
    'prompt_type': 'Prompt',
    'attack_type': 'Attack',
    'mitigation_type': 'Mitigation',
    'jer_gt_zero': 'JER (GT=0)',
    'delta_jer_gt_zero': 'Δ from Baseline'
})

print("JER for Ground Truth = 0 (Irrelevant Documents)")
print("=" * 60)
display(jer_gt0_table_display)


JER for Ground Truth = 0 (Irrelevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,JER (GT=0),Δ from Baseline
1,BASIC,append,few_shot,46.75%,+13.79%
2,BASIC,append,none,43.20%,+10.24%
3,BASIC,append,system_prompt_hardening,43.48%,+10.51%
4,BASIC,append,user_prompt_hardening,27.99%,-4.98%
5,BASIC,none,few_shot,28.88%,-4.08%
0,BASIC,none,none,32.96%,
6,BASIC,none,system_prompt_hardening,33.40%,+0.44%
7,BASIC,none,user_prompt_hardening,22.41%,-10.56%
8,BASIC,prepend,few_shot,64.22%,+31.26%
9,BASIC,prepend,none,65.08%,+32.11%


## 5. MAE for Ground Truth > 0 (Relevant Documents)


In [26]:
# Find highest metric and highest delta for MAE (GT>0)
highest_mae_gtpos = df.loc[df['mae_gt_positive'].idxmax()]
highest_delta_mae_gtpos = df[df['delta_mae_gt_positive'] != 'NaN'].loc[df[df['delta_mae_gt_positive'] != 'NaN']['delta_mae_gt_positive'].idxmax()]
print(f"Highest MAE (GT>0): {highest_mae_gtpos['mae_gt_positive']:.4f}")
print(f"  Experiment: {highest_mae_gtpos['prompt_type']}, {highest_mae_gtpos['attack_type']}, {highest_mae_gtpos['mitigation_type']}")
print(f"\nHighest Δ from Baseline: {highest_delta_mae_gtpos['delta_mae_gt_positive']:.4f}")
print(f"  Experiment: {highest_delta_mae_gtpos['prompt_type']}, {highest_delta_mae_gtpos['attack_type']}, {highest_delta_mae_gtpos['mitigation_type']}")


Highest MAE (GT>0): 1.2918
  Experiment: BASIC, none, user_prompt_hardening

Highest Δ from Baseline: 0.1513
  Experiment: BASIC, none, user_prompt_hardening


In [27]:
# Create GT>0 MAE table
mae_gtpos_table = df[['prompt_type', 'attack_type', 'mitigation_type', 'mae_gt_positive', 'delta_mae_gt_positive']].copy()
mae_gtpos_table = mae_gtpos_table.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])

# Format the table
mae_gtpos_table['mae_gt_positive'] = mae_gtpos_table['mae_gt_positive'].apply(lambda x: f"{x:.4f}" if pd.notna(x) else "N/A")
mae_gtpos_table['delta_mae_gt_positive'] = mae_gtpos_table['delta_mae_gt_positive'].apply(lambda x: f"{x:+.4f}" if pd.notna(x) and x != 'NaN' else "N/A")

# Rename columns for display
mae_gtpos_table_display = mae_gtpos_table.rename(columns={
    'prompt_type': 'Prompt',
    'attack_type': 'Attack',
    'mitigation_type': 'Mitigation',
    'mae_gt_positive': 'MAE (GT>0)',
    'delta_mae_gt_positive': 'Δ from Baseline'
})

print("MAE for Ground Truth > 0 (Relevant Documents)")
print("=" * 60)
display(mae_gtpos_table_display)


MAE for Ground Truth > 0 (Relevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,MAE (GT>0),Δ from Baseline
1,BASIC,append,few_shot,1.1759,0.0354
2,BASIC,append,none,1.1368,-0.0037
3,BASIC,append,system_prompt_hardening,1.1264,-0.0142
4,BASIC,append,user_prompt_hardening,1.237,0.0965
5,BASIC,none,few_shot,1.2751,0.1346
0,BASIC,none,none,1.1405,
6,BASIC,none,system_prompt_hardening,1.1431,0.0026
7,BASIC,none,user_prompt_hardening,1.2918,0.1513
8,BASIC,prepend,few_shot,1.1793,0.0388
9,BASIC,prepend,none,1.0977,-0.0429


## 6. JER for Ground Truth > 0 (Relevant Documents)


In [28]:
# Find highest metric and highest delta for JER (GT>0)
highest_jer_gtpos = df.loc[df['jer_gt_positive'].idxmax()]
highest_delta_jer_gtpos = df[df['delta_jer_gt_positive'] != 'NaN'].loc[df[df['delta_jer_gt_positive'] != 'NaN']['delta_jer_gt_positive'].idxmax()]
print(f"Highest JER (GT>0): {highest_jer_gtpos['jer_gt_positive']:.2f}%")
print(f"  Experiment: {highest_jer_gtpos['prompt_type']}, {highest_jer_gtpos['attack_type']}, {highest_jer_gtpos['mitigation_type']}")
print(f"\nHighest Δ from Baseline: {highest_delta_jer_gtpos['delta_jer_gt_positive']:.2f}%")
print(f"  Experiment: {highest_delta_jer_gtpos['prompt_type']}, {highest_delta_jer_gtpos['attack_type']}, {highest_delta_jer_gtpos['mitigation_type']}")


Highest JER (GT>0): 84.94%
  Experiment: BASIC, none, user_prompt_hardening

Highest Δ from Baseline: 6.26%
  Experiment: BASIC, none, user_prompt_hardening


In [29]:
# Create GT>0 JER table
jer_gtpos_table = df[['prompt_type', 'attack_type', 'mitigation_type', 'jer_gt_positive', 'delta_jer_gt_positive']].copy()
jer_gtpos_table = jer_gtpos_table.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])

# Format the table
jer_gtpos_table['jer_gt_positive'] = jer_gtpos_table['jer_gt_positive'].apply(lambda x: f"{x:.2f}%" if pd.notna(x) else "N/A")
jer_gtpos_table['delta_jer_gt_positive'] = jer_gtpos_table['delta_jer_gt_positive'].apply(lambda x: f"{x:+.2f}%" if pd.notna(x) and x != 'NaN' else "N/A")

# Rename columns for display
jer_gtpos_table_display = jer_gtpos_table.rename(columns={
    'prompt_type': 'Prompt',
    'attack_type': 'Attack',
    'mitigation_type': 'Mitigation',
    'jer_gt_positive': 'JER (GT>0)',
    'delta_jer_gt_positive': 'Δ from Baseline'
})

print("JER for Ground Truth > 0 (Relevant Documents)")
print("=" * 60)
display(jer_gtpos_table_display)


JER for Ground Truth > 0 (Relevant Documents)


Unnamed: 0,Prompt,Attack,Mitigation,JER (GT>0),Δ from Baseline
1,BASIC,append,few_shot,77.56%,-1.12%
2,BASIC,append,none,77.30%,-1.38%
3,BASIC,append,system_prompt_hardening,77.08%,-1.60%
4,BASIC,append,user_prompt_hardening,81.92%,+3.24%
5,BASIC,none,few_shot,82.45%,+3.76%
0,BASIC,none,none,78.68%,
6,BASIC,none,system_prompt_hardening,78.42%,-0.26%
7,BASIC,none,user_prompt_hardening,84.94%,+6.26%
8,BASIC,prepend,few_shot,74.95%,-3.73%
9,BASIC,prepend,none,73.50%,-5.18%


## 7. Summary Statistics


In [30]:
# Summary statistics
print("Experiment Summary")
print("=" * 30)
print(f"Total experiments: {len(df)}")
print(f"Prompt types: {df['prompt_type'].unique().tolist()}")
print(f"Attack types: {df['attack_type'].unique().tolist()}")
print(f"Mitigation types: {df['mitigation_type'].unique().tolist()}")

# Find baseline
baseline = df[(df['prompt_type'] == 'BASIC') & (df['attack_type'] == 'none') & (df['mitigation_type'] == 'none')]
if not baseline.empty:
    print(f"\nBaseline (BASIC, none, none) metrics:")
    print(f"  Overall MAE: {baseline['overall_mae'].iloc[0]:.4f}")
    print(f"  Overall JER: {baseline['overall_jer'].iloc[0]:.2f}%")
    print(f"  MAE GT=0: {baseline['mae_gt_zero'].iloc[0]:.4f}")
    print(f"  JER GT=0: {baseline['jer_gt_zero'].iloc[0]:.2f}%")
    print(f"  MAE GT>0: {baseline['mae_gt_positive'].iloc[0]:.4f}")
    print(f"  JER GT>0: {baseline['jer_gt_positive'].iloc[0]:.2f}%")

# Best and worst performing experiments
print(f"\nBest Overall MAE: {df['overall_mae'].min():.4f}")
best_mae = df[df['overall_mae'] == df['overall_mae'].min()]
print(f"  Experiment: {best_mae['prompt_type'].iloc[0]}, {best_mae['attack_type'].iloc[0]}, {best_mae['mitigation_type'].iloc[0]}")

print(f"\nWorst Overall MAE: {df['overall_mae'].max():.4f}")
worst_mae = df[df['overall_mae'] == df['overall_mae'].max()]
print(f"  Experiment: {worst_mae['prompt_type'].iloc[0]}, {worst_mae['attack_type'].iloc[0]}, {worst_mae['mitigation_type'].iloc[0]}")

print(f"\nBest Overall JER: {df['overall_jer'].min():.2f}%")
best_jer = df[df['overall_jer'] == df['overall_jer'].min()]
print(f"  Experiment: {best_jer['prompt_type'].iloc[0]}, {best_jer['attack_type'].iloc[0]}, {best_jer['mitigation_type'].iloc[0]}")

print(f"\nWorst Overall JER: {df['overall_jer'].max():.2f}%")
worst_jer = df[df['overall_jer'] == df['overall_jer'].max()]
print(f"  Experiment: {worst_jer['prompt_type'].iloc[0]}, {worst_jer['attack_type'].iloc[0]}, {worst_jer['mitigation_type'].iloc[0]}")


Experiment Summary
Total experiments: 48
Prompt types: ['BASIC', 'RATIONALE', 'UTILITY']
Attack types: ['none', 'append', 'prepend', 'scatter']
Mitigation types: ['none', 'few_shot', 'system_prompt_hardening', 'user_prompt_hardening']

Baseline (BASIC, none, none) metrics:
  Overall MAE: 0.9208
  Overall JER: 50.43%
  MAE GT=0: 0.7849
  JER GT=0: 32.96%
  MAE GT>0: 1.1405
  JER GT>0: 78.68%

Best Overall MAE: 0.8547
  Experiment: BASIC, none, user_prompt_hardening

Worst Overall MAE: 2.2124
  Experiment: RATIONALE, append, few_shot

Best Overall JER: 46.30%
  Experiment: BASIC, none, user_prompt_hardening

Worst Overall JER: 88.33%
  Experiment: RATIONALE, prepend, system_prompt_hardening


## 8. Delta Analysis (Performance vs Baseline)


In [31]:
# Analyze deltas (excluding baseline itself)
non_baseline = df[(df['prompt_type'] != 'BASIC') | (df['attack_type'] != 'none') | (df['mitigation_type'] != 'none')]

print("Delta Analysis (Performance vs Baseline)")
print("=" * 40)
print(f"Experiments compared to baseline: {len(non_baseline)}")

# Count improvements vs degradations
mae_improvements = len(non_baseline[non_baseline['delta_overall_mae'] < 0])
mae_degradations = len(non_baseline[non_baseline['delta_overall_mae'] > 0])
jer_improvements = len(non_baseline[non_baseline['delta_overall_jer'] < 0])
jer_degradations = len(non_baseline[non_baseline['delta_overall_jer'] > 0])

print(f"\nMAE Improvements (negative delta): {mae_improvements}")
print(f"MAE Degradations (positive delta): {mae_degradations}")
print(f"JER Improvements (negative delta): {jer_improvements}")
print(f"JER Degradations (positive delta): {jer_degradations}")

# Best improvements
if not non_baseline.empty:
    best_mae_improvement = non_baseline[non_baseline['delta_overall_mae'] == non_baseline['delta_overall_mae'].min()]
    best_jer_improvement = non_baseline[non_baseline['delta_overall_jer'] == non_baseline['delta_overall_jer'].min()]
    
    print(f"\nBest MAE Improvement: {best_mae_improvement['delta_overall_mae'].iloc[0]:.4f}")
    print(f"  Experiment: {best_mae_improvement['prompt_type'].iloc[0]}, {best_mae_improvement['attack_type'].iloc[0]}, {best_mae_improvement['mitigation_type'].iloc[0]}")
    
    print(f"\nBest JER Improvement: {best_jer_improvement['delta_overall_jer'].iloc[0]:.2f}%")
    print(f"  Experiment: {best_jer_improvement['prompt_type'].iloc[0]}, {best_jer_improvement['attack_type'].iloc[0]}, {best_jer_improvement['mitigation_type'].iloc[0]}")


Delta Analysis (Performance vs Baseline)
Experiments compared to baseline: 47

MAE Improvements (negative delta): 2
MAE Degradations (positive delta): 45
JER Improvements (negative delta): 4
JER Degradations (positive delta): 43

Best MAE Improvement: -0.0661
  Experiment: BASIC, none, user_prompt_hardening

Best JER Improvement: -4.13%
  Experiment: BASIC, none, user_prompt_hardening


In [32]:
# ==============================================================================
# LaTeX Table Generation for Thesis
# ==============================================================================

print("=" * 80)
print("LATEX TABLE OUTPUT FOR THESIS")
print("=" * 80)

# 1. Overall MAE Table
print("\n" + "=" * 80)
print("1. OVERALL MAE TABLE")
print("=" * 80 + "\n")
mae_latex = df[['prompt_type', 'attack_type', 'mitigation_type', 'overall_mae', 'delta_overall_mae']].copy()
mae_latex = mae_latex.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])
mae_latex.columns = ['Prompt', 'Attack', 'Mitigation', 'MAE', '$\\Delta$ MAE']
latex_code = mae_latex.to_latex(index=False, float_format="%.4f", escape=False, 
                                  column_format='lllrr', na_rep='--')
print(latex_code)

# 2. Overall JER Table
print("\n" + "=" * 80)
print("2. OVERALL JER TABLE")
print("=" * 80 + "\n")
jer_latex = df[['prompt_type', 'attack_type', 'mitigation_type', 'overall_jer', 'delta_overall_jer']].copy()
jer_latex = jer_latex.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])
jer_latex.columns = ['Prompt', 'Attack', 'Mitigation', 'JER (\\%)', '$\\Delta$ JER (\\%)']
latex_code = jer_latex.to_latex(index=False, float_format="%.2f", escape=False,
                                  column_format='lllrr', na_rep='--')
print(latex_code)

# 3. MAE for GT=0 Table
print("\n" + "=" * 80)
print("3. MAE FOR GROUND TRUTH = 0 (IRRELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
mae_gt0_latex = df[['prompt_type', 'attack_type', 'mitigation_type', 'mae_gt_zero', 'delta_mae_gt_zero']].copy()
mae_gt0_latex = mae_gt0_latex.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])
mae_gt0_latex.columns = ['Prompt', 'Attack', 'Mitigation', 'MAE (GT=0)', '$\\Delta$ MAE (GT=0)']
latex_code = mae_gt0_latex.to_latex(index=False, float_format="%.4f", escape=False,
                                      column_format='lllrr', na_rep='--')
print(latex_code)

# 4. JER for GT=0 Table
print("\n" + "=" * 80)
print("4. JER FOR GROUND TRUTH = 0 (IRRELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
jer_gt0_latex = df[['prompt_type', 'attack_type', 'mitigation_type', 'jer_gt_zero', 'delta_jer_gt_zero']].copy()
jer_gt0_latex = jer_gt0_latex.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])
jer_gt0_latex.columns = ['Prompt', 'Attack', 'Mitigation', 'JER (GT=0) (\\%)', '$\\Delta$ JER (GT=0) (\\%)']
latex_code = jer_gt0_latex.to_latex(index=False, float_format="%.2f", escape=False,
                                      column_format='lllrr', na_rep='--')
print(latex_code)

# 5. MAE for GT>0 Table
print("\n" + "=" * 80)
print("5. MAE FOR GROUND TRUTH > 0 (RELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
mae_gtpos_latex = df[['prompt_type', 'attack_type', 'mitigation_type', 'mae_gt_positive', 'delta_mae_gt_positive']].copy()
mae_gtpos_latex = mae_gtpos_latex.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])
mae_gtpos_latex.columns = ['Prompt', 'Attack', 'Mitigation', 'MAE (GT$>$0)', '$\\Delta$ MAE (GT$>$0)']
latex_code = mae_gtpos_latex.to_latex(index=False, float_format="%.4f", escape=False,
                                        column_format='lllrr', na_rep='--')
print(latex_code)

# 6. JER for GT>0 Table
print("\n" + "=" * 80)
print("6. JER FOR GROUND TRUTH > 0 (RELEVANT DOCUMENTS)")
print("=" * 80 + "\n")
jer_gtpos_latex = df[['prompt_type', 'attack_type', 'mitigation_type', 'jer_gt_positive', 'delta_jer_gt_positive']].copy()
jer_gtpos_latex = jer_gtpos_latex.sort_values(['prompt_type', 'attack_type', 'mitigation_type'])
jer_gtpos_latex.columns = ['Prompt', 'Attack', 'Mitigation', 'JER (GT$>$0) (\\%)', '$\\Delta$ JER (GT$>$0) (\\%)']
latex_code = jer_gtpos_latex.to_latex(index=False, float_format="%.2f", escape=False,
                                        column_format='lllrr', na_rep='--')
print(latex_code)

print("\n" + "=" * 80)
print("END OF LATEX TABLE OUTPUT")
print("=" * 80)


LATEX TABLE OUTPUT FOR THESIS

1. OVERALL MAE TABLE

\begin{tabular}{lllrr}
\toprule
Prompt & Attack & Mitigation & MAE & $\Delta$ MAE \\
\midrule
BASIC & append & few_shot & 1.2645 & 0.3437 \\
BASIC & append & none & 1.1122 & 0.1914 \\
BASIC & append & system_prompt_hardening & 1.1139 & 0.1931 \\
BASIC & append & user_prompt_hardening & 0.9561 & 0.0353 \\
BASIC & none & few_shot & 0.9504 & 0.0296 \\
BASIC & none & none & 0.9208 & -- \\
BASIC & none & system_prompt_hardening & 0.9153 & -0.0056 \\
BASIC & none & user_prompt_hardening & 0.8547 & -0.0661 \\
BASIC & prepend & few_shot & 1.5817 & 0.6609 \\
BASIC & prepend & none & 1.4898 & 0.5690 \\
BASIC & prepend & system_prompt_hardening & 1.5402 & 0.6194 \\
BASIC & prepend & user_prompt_hardening & 1.3396 & 0.4187 \\
BASIC & scatter & few_shot & 1.3883 & 0.4675 \\
BASIC & scatter & none & 1.3104 & 0.3895 \\
BASIC & scatter & system_prompt_hardening & 1.3242 & 0.4034 \\
BASIC & scatter & user_prompt_hardening & 1.1971 & 0.2763 \\
RATIONA