In [None]:
# Library

import torch
import math
import re
import numpy as np
import pandas as pd
from torch.distributions import Laplace

from transformers import AutoTokenizer
from transformers import AutoModel
from transformers import AutoModelForCausalLM
from transformers import GPT2LMHeadModel
from transformers import AutoModelForSequenceClassification

from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from scipy.stats import vonmises_fisher
import torch.nn.functional as F

from typing import Dict, List, Optional

from openai import OpenAI
import glob

from collections import Counter

from sentence_transformers import SentenceTransformer
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Run once per notebook
import shutil, matplotlib as mpl

USE_TEX = shutil.which("latex") is not None   # auto-detect; False on Colab by default
mpl.rcParams.update({"text.usetex": USE_TEX, "axes.unicode_minus": False})

if USE_TEX:
    # Real LaTeX path (if you *do* have TeX available)
    mpl.rcParams.update({
        "font.family": "serif",
        "font.serif": ["Computer Modern Roman", "CMU Serif", "Times New Roman", "DejaVu Serif"],
        "text.latex.preamble": r"\usepackage{amsmath}\usepackage{bm}\usepackage{siunitx}"
    })
else:
    # LaTeX-like look without LaTeX installed
    mpl.rcParams.update({
        "text.usetex": False,
        # Use a LaTeX-y serif + STIX math (good match for LaTeX/Times);
        # if you prefer Computer Modern look, change 'stix' -> 'cm'
        "font.family": "serif",
        "font.serif": ["STIX Two Text", "STIXGeneral", "DejaVu Serif", "Times New Roman"],
        "mathtext.fontset": "stix",
        "mathtext.rm": "serif",
        "mathtext.it": "serif:italic",
        "mathtext.bf": "serif:bold",
    })

In [None]:
client = OpenAI(api_key="Your_API_Key")  # needs OPENAI_API_KEY


In [None]:
# --- Load tokenizer and GPT-2 model ---
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2", output_hidden_states=True)
embedding_table = model.get_input_embeddings().weight.detach()
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
model.eval()

# Base tokenizer pad fix (optional)
if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token", None) is not None:
    tokenizer.pad_token = tokenizer.eos_token

# Load a light GPT-2 model
gpt2_tok = AutoTokenizer.from_pretrained("distilgpt2")
if gpt2_tok.pad_token is None:
    gpt2_tok.pad_token = gpt2_tok.eos_token
gpt2_model = AutoModelForCausalLM.from_pretrained("distilgpt2").to(
    "cuda" if torch.cuda.is_available() else "cpu"
).eval()

# --- Extract embedding table ---
# Normalize embedding table for search
norm_embedding_table = torch.nn.functional.normalize(embedding_table, dim=1)

In [None]:
csv_files_polar = glob.glob("/content/AGnews_inf_polar_sweep_avg_epsilon_*.csv")
csv_files_laplace = glob.glob("/content/AGnews_inf_laplace_sweep_avg_epsilon_*.csv")

csv_files = csv_files_polar + csv_files_laplace
print("Files found by glob:", csv_files)

dataframes = {}
for file in csv_files:
    df_name = file.replace(".csv", "")
    dataframes[df_name] = pd.read_csv(file)

print("Number of dataframes loaded:", len(dataframes))

In [None]:
# Get the answers

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        return re.sub(r'[^\w\s]', '', text)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def get_tokens(s):
    if not s: return []
    return normalize_answer(s).split()

def compute_f1(prediction, truth):
    pred_tokens = get_tokens(prediction)
    truth_tokens = get_tokens(truth)

    # if either is empty, return 1 if both are empty, 0 otherwise
    if not pred_tokens and not truth_tokens:
        return 1.0
    if not pred_tokens or not truth_tokens:
        return 0.0

    common_tokens = Counter(pred_tokens) & Counter(truth_tokens)
    num_common = sum(common_tokens.values())

    if num_common == 0:
        return 0.0

    precision = num_common / len(pred_tokens)
    recall = num_common / len(truth_tokens)

    return (2 * precision * recall) / (precision + recall)

def evaluate_answer(predicted_answer: str, true_answers: list[str]):
    """
    Evaluates a predicted answer against a list of true answers using F1 score and exact match.

    Args:
        predicted_answer: The generated answer string.
        true_answers: A list of true answer strings.

    Returns:
        A tuple containing:
            - The maximum F1 score achieved against any of the true answers.
            - A boolean indicating whether an exact match was found against any of the true answers.
    """
    max_f1 = 0.0
    exact_match = False

    normalized_prediction = normalize_answer(predicted_answer)

    for true_answer in true_answers:
        normalized_true = normalize_answer(true_answer)

        # Exact Match
        if normalized_prediction == normalized_true:
            exact_match = True

        # F1 Score
        f1 = compute_f1(predicted_answer, true_answer)
        max_f1 = max(max_f1, f1)

    return max_f1, exact_match


def calculate_cosine_similarity(prediction: str, truths: list[str], model) -> float:
    """
    Calculates the cosine similarity between the prediction and each true answer
    using Sentence-BERT embeddings and returns the maximum similarity.
    """
    if not prediction or not truths:
        return 0.0

    # Encode the prediction
    prediction_embedding = model.encode(prediction, convert_to_tensor=True)

    max_similarity = 0.0
    for truth in truths:
        if not truth:
            continue
        # Encode the true answer
        truth_embedding = model.encode(truth, convert_to_tensor=True)

        # Calculate cosine similarity
        similarity = util.pytorch_cos_sim(prediction_embedding, truth_embedding).item()
        max_similarity = max(max_similarity, similarity)

    return max_similarity


In [None]:
# Initialize Sentence Transformer model
model_sentence = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
# Load the AGnews_stamp_Laplace_tau_0.5.csv file
file_to_load = "AGnews_stamp_Laplace_tau_0.5.csv"
try:
    df_agnews_stamp_laplace = pd.read_csv(file_to_load)
    print(f"Successfully loaded {file_to_load}")

    # Calculate and print the average accuracy (Exact Match)
    if 'em_privatized' in df_agnews_stamp_laplace.columns:
        average_accuracy = df_agnews_stamp_laplace['em_privatized'].mean()
        print(f"Average Accuracy (Exact Match) for {file_to_load}: {average_accuracy:.4f}")
    else:
        print("Error: 'em_privatized' column not found in the dataframe.")

except FileNotFoundError:
    print(f"Error: {file_to_load} not found. Please ensure the file exists in the correct location.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
# --- Load AG News classifier ---
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("textattack/distilbert-base-uncased-ag-news")
model = AutoModelForSequenceClassification.from_pretrained("textattack/distilbert-base-uncased-ag-news")

num_rounds = 3
all_results = []

print("Loaded AG News classifier model: textattack/distilbert-base-uncased-ag-news")

# --- Evaluation loop ---
for round_num in range(num_rounds):
    print(f"\n--- Starting Round {round_num + 1}/{num_rounds} ---")
    results_list = []

    for df_name, df in dataframes.items():
        print(f"Processing DataFrame: {df_name}")
        for index, row in df.iterrows():
            question = row["question"] if "question" in df.columns else ""
            # Assume 'label' or 'rating' represents the true AG News category index (0–3)
            true_answers = [str(row["label"] if "label" in df.columns else row["rating"])]
            privatized_context = row["privatized_context"]
            original_context = row["original_context"]

            try:
                # Encode privatized context
                inputs = tokenizer(
                    privatized_context,
                    return_tensors="pt",
                    truncation=True,
                    padding=True
                )

                # Run classification
                with torch.no_grad():
                    outputs = model(**inputs)
                predicted_label = torch.argmax(outputs.logits, dim=1).item()

                # Convert to string for comparison
                answer_privatized = str(predicted_label)

            except Exception as e:
                print(f"Error classifying privatized context in row {index}: {e}")
                answer_privatized = ""

            # Evaluate privatized answer
            f1_privatized, em_privatized = evaluate_answer(answer_privatized, true_answers)

            # Store results
            results_list.append({
                "dataframe": df_name,
                "question": question,
                "true_answers": true_answers,
                "original_context": original_context,
                "privatized_context": privatized_context,
                "answer_privatized": answer_privatized,
                "f1_privatized": f1_privatized,
                "em_privatized": em_privatized,
            })

    # Create DataFrame for this round
    df_round_results = pd.DataFrame(results_list)
    all_results.append(df_round_results)

print(f"\n✅ Finished {num_rounds} rounds of AG News evaluation.")

In [None]:
# --- Combine all AG News evaluation results and save to CSV ---

# Concatenate all round results into one DataFrame
df_all_results_combined_agnews = pd.concat(all_results, ignore_index=True)

# Define output path
csv_output_path_agnews = "AGnews_evaluation_results_inf_rounds.csv"

# Save to CSV
df_all_results_combined_agnews.to_csv(csv_output_path_agnews, index=False)

print(f"\n✅ All AG News evaluation results saved to {csv_output_path_agnews}")

In [None]:
# --- Load the AG News GPT-4 Fill dataset ---
file_path_agnews_gpt4_fill = "/content/AGnews_gpt4_fill_tau_0.5.csv"  # adjust filename if different

try:
    df_agnews_gpt4_fill = pd.read_csv(file_path_agnews_gpt4_fill)
    print(f"✅ Loaded dataset: {file_path_agnews_gpt4_fill}")
    display(df_agnews_gpt4_fill.head())
except FileNotFoundError:
    print(f"❌ Error: File not found at {file_path_agnews_gpt4_fill}")
    df_agnews_gpt4_fill = None


In [None]:

tokenizer = AutoTokenizer.from_pretrained("textattack/distilbert-base-uncased-ag-news")
model = AutoModelForSequenceClassification.from_pretrained("textattack/distilbert-base-uncased-ag-news")

if df_agnews_gpt4_fill is not None:
    results_list_agnews_gpt4_fill = []

    # Detect true label column dynamically
    label_col = None
    for cand in ["label", "labels", "rating", "true_label", "category"]:
        if cand in df_agnews_gpt4_fill.columns:
            label_col = cand
            break

    if label_col is None:
        raise KeyError("No label column found in AG News GPT-4 Fill dataset!")

    for index, row in df_agnews_gpt4_fill.iterrows():
        question = row["question"] if "question" in df_agnews_gpt4_fill.columns else ""
        true_label_original = row[label_col]
        true_answers = [str(true_label_original)]
        privatized_context = row["privatized_context_gpt4_fill"]

        try:
            inputs = tokenizer(privatized_context, return_tensors="pt", truncation=True, padding=True)
            with torch.no_grad():
                outputs = model(**inputs)
            predicted_label = torch.argmax(outputs.logits, dim=1).item()
            answer_privatized = str(predicted_label)
        except Exception as e:
            print(f"Error classifying row {index}: {e}")
            answer_privatized = ""

        f1_privatized, em_privatized = evaluate_answer(answer_privatized, true_answers)

        results_list_agnews_gpt4_fill.append({
            "question": question,
            "true_label": true_label_original,
            "true_answers": true_answers,
            "privatized_context": privatized_context,
            "predicted_label": answer_privatized,
            "f1_privatized": f1_privatized,
            "em_privatized": em_privatized,
        })

    df_agnews_gpt4_fill_results = pd.DataFrame(results_list_agnews_gpt4_fill)
    display(df_agnews_gpt4_fill_results.head())

    average_scores_agnews_gpt4_fill = {
        'mean_f1_privatized_agnews': df_agnews_gpt4_fill_results['f1_privatized'].mean(),
        'mean_em_privatized_agnews': df_agnews_gpt4_fill_results['em_privatized'].mean(),
    }
    display(pd.DataFrame([average_scores_agnews_gpt4_fill]))

    csv_output_path_agnews_gpt4_fill = "AGnews_gpt4_fill_results.csv"
    df_agnews_gpt4_fill_results.to_csv(csv_output_path_agnews_gpt4_fill, index=False)
    print(f"✅ Saved AG News GPT-4 Fill results to {csv_output_path_agnews_gpt4_fill}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import re

# --- Load CSVs ---
inf_results_path = "/content/AGnews_evaluation_results_inf_rounds.csv"
baseline_path = "/content/AGnews_gpt4_fill_results.csv"

df_inf_results = pd.read_csv(inf_results_path)
df_baseline = pd.read_csv(baseline_path)

print(f"Loaded {len(df_inf_results)} INF results and {len(df_baseline)} baseline results.")

# --- Extract epsilon values from filename column (if present) ---
if "dataframe" in df_inf_results.columns:
    df_inf_results["epsilon"] = df_inf_results["dataframe"].apply(
        lambda x: float(re.search(r"epsilon_([\d.]+)", str(x)).group(1))
        if isinstance(x, str) and "epsilon_" in x else None
    )

# --- Identify method type ---
df_inf_results["method"] = df_inf_results["dataframe"].apply(
    lambda x: "Inf Laplace" if "laplace" in str(x).lower()
    else "Inf Polar" if "polar" in str(x).lower()
    else "Unknown"
)

# --- Clean & aggregate mean accuracy (Exact Match) ---
df_inf_results = df_inf_results.dropna(subset=["em_privatized"])
df_aggregated_inf_metrics = (
    df_inf_results.groupby(["epsilon", "method"])
    .agg(mean_accuracy=("em_privatized", "mean"))
    .reset_index()
)

# --- Compute GPT-4 Fill Baseline ---
if "em_privatized" in df_baseline.columns:
    baseline_acc = df_baseline["em_privatized"].mean()
else:
    # In case column name differs
    baseline_acc = df_baseline.filter(like="em").mean().mean()

print(f"✅ Baseline Accuracy (GPT-4 Fill): {baseline_acc:.4f}")

# --- Plot ---
plt.figure(figsize=(12, 7))

# Inf Laplace
df_laplace = df_aggregated_inf_metrics[df_aggregated_inf_metrics["method"] == "Inf Laplace"]
plt.plot(df_laplace["epsilon"], df_laplace["mean_accuracy"],
         label="Inf Laplace", marker="o", linestyle="-")

# Inf Polar
df_polar = df_aggregated_inf_metrics[df_aggregated_inf_metrics["method"] == "Inf Polar"]
plt.plot(df_polar["epsilon"], df_polar["mean_accuracy"],
         label="Inf Polar", marker="o", linestyle="--")

# Baseline line
plt.axhline(y=baseline_acc, color="red", linestyle=":", linewidth=2, label="Baseline (GPT-4 Fill)")

plt.title("AG News — Accuracy (Exact Match) vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy")
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title="Method")
plt.show()

In [None]:

# --- 1) Non-private AG News baseline (first 50 test samples) ---
agnews = load_dataset("ag_news", split="test[:50]")

tokenizer = AutoTokenizer.from_pretrained("textattack/distilbert-base-uncased-ag-news")
model = AutoModelForSequenceClassification.from_pretrained("textattack/distilbert-base-uncased-ag-news")

correct = 0
for example in agnews:
    text = example["text"]
    true_label = example["label"]
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    pred = torch.argmax(outputs.logits, dim=1).item()
    if pred == true_label:
        correct += 1

baseline_agnews50_acc = correct / len(agnews)
print(f"✅ Baseline (Non-Private AG News, 50 samples): {baseline_agnews50_acc:.3f}")

# --- 2) INF Laplace & INF Polar curves (must already exist as df_aggregated_inf_metrics) ---
plt.figure(figsize=(12, 7))

df_laplace = df_aggregated_inf_metrics[df_aggregated_inf_metrics['method'] == 'Inf Laplace']
plt.plot(df_laplace['epsilon'], df_laplace['mean_accuracy'],
         label='Inf Laplace', marker='o', linestyle='-')

df_polar = df_aggregated_inf_metrics[df_aggregated_inf_metrics['method'] == 'Inf Polar']
plt.plot(df_polar['epsilon'], df_polar['mean_accuracy'],
         label='Inf Polar', marker='o', linestyle='--')

# --- 3) GPT-4 Fill baseline (already computed separately) ---
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='GPT-4 Fill (Baseline)')
    print(f"✅ GPT-4 Fill Baseline: {baseline_gpt4fill_acc:.3f}")
else:
    print("⚠️ GPT-4 Fill baseline not found — check variable names.")

# --- 4) Non-private AG News baseline (DistilBERT classifier) ---
plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
            label='Non-Private Baseline (AG News 50 samples)')

# --- Style ---
plt.title("AG News — Accuracy (Exact Match) vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy (Exact Match)")
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title="Method")
plt.show()


In [None]:

# --- Load all STAMP Polar and Laplace datasets for AG News ---
csv_files_polar = glob.glob("/content/AGnews_stamp_polar_sweep_avg_epsilon_*.csv")
csv_files_laplace = glob.glob("/content/AGnews_stamp_Laplace_sweep_avg_epsilon_*.csv")

# Combine lists
csv_files = csv_files_polar + csv_files_laplace
print("Files found by glob:", csv_files)

# Load each CSV into a dictionary of DataFrames
dataframes = {}
for file in csv_files:
    df_name = file.replace(".csv", "")
    dataframes[df_name] = pd.read_csv(file)

print(f"✅ Number of AG News STAMP dataframes loaded: {len(dataframes)}")

In [None]:

# --- Load AG News classifier ---
tokenizer = AutoTokenizer.from_pretrained("textattack/distilbert-base-uncased-ag-news")
model = AutoModelForSequenceClassification.from_pretrained("textattack/distilbert-base-uncased-ag-news")

num_rounds = 3
all_results_non_inf = []

for round_num in range(num_rounds):
    print(f"--- Starting Round {round_num + 1}/{num_rounds} ---")
    results_list = []

    for df_name, df in dataframes.items():  # AG News STAMP dataframes
        print(f"Processing DataFrame: {df_name}")

        for index, row in df.iterrows():
            question = row.get("question", "")

            # --- Identify the true label column dynamically ---
            true_label = None
            for possible_key in ["label", "true_label", "rating", "answer", "true_answers"]:
                if possible_key in row and pd.notna(row[possible_key]):
                    true_label = str(row[possible_key])
                    break
            if true_label is None:
                true_label = "0"  # Default to class 0 if missing (avoid KeyError)

            true_answers = [true_label]

            privatized_context = row.get("privatized_context", "")
            original_context = row.get("original_context", "")

            # --- Classify privatized context ---
            try:
                inputs = tokenizer(privatized_context, return_tensors="pt", truncation=True, padding=True)
                with torch.no_grad():
                    outputs = model(**inputs)
                pred_label = torch.argmax(outputs.logits, dim=1).item()
                answer_privatized = str(pred_label)
            except Exception as e:
                print(f"Error classifying privatized context row {index}: {e}")
                answer_privatized = ""

            # --- Evaluate ---
            f1_privatized, em_privatized = evaluate_answer(answer_privatized, true_answers)
            cosine_privatized = calculate_cosine_similarity(answer_privatized, true_answers, model_sentence)

            # --- Record all details ---
            results_list.append({
                "dataframe": df_name,
                "question": question,
                "true_label": true_label,                # ✅ explicitly recorded
                "true_answers": true_answers,
                "predicted_label": answer_privatized,    # ✅ renamed for clarity
                "original_context": original_context,
                "privatized_context": privatized_context,
                "f1_privatized": f1_privatized,
                "em_privatized": em_privatized,
                "cosine_privatized": cosine_privatized,
            })

    df_round_results = pd.DataFrame(results_list)
    all_results_non_inf.append(df_round_results)

print(f"\n✅ Finished {num_rounds} rounds of evaluation for AG News non-inf files.")

# --- Combine all rounds ---
df_all_results_combined_non_inf = pd.concat(all_results_non_inf, ignore_index=True)

# --- Save combined results ---
csv_output_path_non_inf = "AGnews_evaluation_results_non_inf_STAMP_rounds.csv"
df_all_results_combined_non_inf.to_csv(csv_output_path_non_inf, index=False)

print(f"✅ All AG News STAMP evaluation results saved to {csv_output_path_non_inf}")


In [None]:
# --- Prepare Data ---
# Extract epsilon value from filename (e.g., "AGnews_stamp_polar_sweep_avg_epsilon_200")
df_all_results_combined_non_inf['epsilon'] = df_all_results_combined_non_inf['dataframe'].apply(
    lambda x: float(x.split('_')[-1]) if '_' in x else None
)

# Identify method (Laplace or Polar)
df_all_results_combined_non_inf['method'] = df_all_results_combined_non_inf['dataframe'].apply(
    lambda x: 'STAMP Laplace' if 'laplace' in x.lower() else 'STAMP Polar'
)

# --- Aggregate mean accuracy by epsilon and method ---
df_agg_non_inf = (
    df_all_results_combined_non_inf.groupby(['epsilon', 'method'])
    .agg(mean_accuracy=('em_privatized', 'mean'))
    .reset_index()
    .sort_values('epsilon')
)

# --- Plot ---
plt.figure(figsize=(12, 7))

# STAMP Polar
df_polar = df_agg_non_inf[df_agg_non_inf['method'] == 'STAMP Polar']
plt.plot(df_polar['epsilon'], df_polar['mean_accuracy'],
         label='STAMP Polar', marker='o', linestyle='--', linewidth=2)

# STAMP Laplace
df_laplace = df_agg_non_inf[df_agg_non_inf['method'] == 'STAMP Laplace']
plt.plot(df_laplace['epsilon'], df_laplace['mean_accuracy'],
         label='STAMP Laplace', marker='o', linestyle='-', linewidth=2)

# --- Baseline: GPT-4 Fill + AGnews classifier ---
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill + AGnews Classifier)')
    print(f"Baseline (GPT-4 Fill): {baseline_gpt4fill_acc:.3f}")
else:
    print("⚠️ Baseline accuracy (GPT-4 Fill) not found — please run that evaluation first.")

# --- Style ---
plt.title('AG News — STAMP Accuracy vs. Epsilon')
plt.xlabel('Epsilon')
plt.ylabel('Accuracy (Exact Match)')
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title='Method')
plt.show()

In [None]:
# --- Load all Uniform Polar and Uniform Laplace datasets for AG News ---
csv_files_polar = glob.glob("/content/AGnews_uniform_polar_sweep_avg_epsilon_*.csv")
csv_files_laplace = glob.glob("/content/AGnews_uniform_laplace_sweep_avg_epsilon_*.csv") + \
                    glob.glob("/content/AGnews_uniform_Laplace_sweep_avg_epsilon_*.csv")

# Combine both Polar and Laplace lists
csv_files = csv_files_polar + csv_files_laplace
print("Files found by glob:", csv_files)

# --- Load into DataFrames dictionary ---
dataframes = {}
for file in csv_files:
    df_name = file.replace(".csv", "")
    dataframes[df_name] = pd.read_csv(file)

print(f"✅ Number of AG News Uniform dataframes loaded: {len(dataframes)}")


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd

# --- Load AG News classifier ---
tokenizer = AutoTokenizer.from_pretrained("textattack/distilbert-base-uncased-ag-news")
model = AutoModelForSequenceClassification.from_pretrained("textattack/distilbert-base-uncased-ag-news")

num_rounds = 3
all_results_uniform = []

for round_num in range(num_rounds):
    print(f"--- Starting Round {round_num + 1}/{num_rounds} ---")
    results_list = []

    for df_name, df in dataframes.items():  # Uniform Laplace / Polar
        print(f"Processing DataFrame: {df_name}")

        for index, row in df.iterrows():
            question = row.get("question", "")

            # Detect correct label key automatically (label, true_label, rating, etc.)
            true_label = None
            for key in ["label", "true_label", "rating", "answer", "true_answers"]:
                if key in row and pd.notna(row[key]):
                    true_label = str(row[key])
                    break
            if true_label is None:
                true_label = "0"  # fallback default

            true_answers = [true_label]

            privatized_context = row.get("privatized_context", "")
            original_context = row.get("original_context", "")

            # --- Classify privatized context ---
            try:
                inputs = tokenizer(privatized_context, return_tensors="pt", truncation=True, padding=True)
                with torch.no_grad():
                    outputs = model(**inputs)
                pred_label = torch.argmax(outputs.logits, dim=1).item()
                answer_privatized = str(pred_label)
            except Exception as e:
                print(f"Error classifying privatized context row {index}: {e}")
                answer_privatized = ""

            # --- Evaluate prediction ---
            f1_privatized, em_privatized = evaluate_answer(answer_privatized, true_answers)
            cosine_privatized = calculate_cosine_similarity(answer_privatized, true_answers, model_sentence)

            # --- Record results ---
            results_list.append({
                "dataframe": df_name,
                "question": question,
                "true_label": true_label,
                "true_answers": true_answers,
                "predicted_label": answer_privatized,
                "original_context": original_context,
                "privatized_context": privatized_context,
                "f1_privatized": f1_privatized,
                "em_privatized": em_privatized,
                "cosine_privatized": cosine_privatized,
            })

    df_round_results = pd.DataFrame(results_list)
    all_results_uniform.append(df_round_results)

print(f"\n✅ Finished {num_rounds} rounds of evaluation for AG News uniform datasets.")

# --- Combine all rounds ---
df_all_results_combined_uniform = pd.concat(all_results_uniform, ignore_index=True)

# --- Save results ---
csv_output_path_uniform = "AGnews_evaluation_results_uniform_rounds.csv"
df_all_results_combined_uniform.to_csv(csv_output_path_uniform, index=False)

print(f"✅ All AG News uniform evaluation results saved to {csv_output_path_uniform}")


In [None]:

# --- STAMP Aggregation ---
def extract_epsilon(name):
    """Extract epsilon value safely from filename-like string."""
    try:
        eps_part = name.split("_")[-1]
        return float(eps_part) if eps_part.replace('.', '', 1).isdigit() else np.nan
    except Exception:
        return np.nan

# --- Prepare STAMP Aggregation ---
df_all_results_combined_non_inf['epsilon'] = df_all_results_combined_non_inf['dataframe'].apply(extract_epsilon)
df_all_results_combined_non_inf['method'] = df_all_results_combined_non_inf['dataframe'].apply(
    lambda x: 'STAMP Laplace' if 'laplace' in x.lower() else 'STAMP Polar'
)

df_stamp_agg = (
    df_all_results_combined_non_inf
    .dropna(subset=['epsilon'])
    .groupby(['epsilon', 'method'], as_index=False)
    .agg(mean_accuracy=('em_privatized', 'mean'))
    .sort_values(['method', 'epsilon'])
)

print("✅ STAMP aggregation complete")
display(df_stamp_agg.head())

# --- Prepare UNIFORM Aggregation ---
df_all_results_combined_uniform['epsilon'] = df_all_results_combined_uniform['dataframe'].apply(extract_epsilon)
df_all_results_combined_uniform['method'] = df_all_results_combined_uniform['dataframe'].apply(
    lambda x: 'Uniform Laplace' if 'laplace' in x.lower() else 'Uniform Polar'
)

df_uniform_agg = (
    df_all_results_combined_uniform
    .dropna(subset=['epsilon'])
    .groupby(['epsilon', 'method'], as_index=False)
    .agg(mean_accuracy=('em_privatized', 'mean'))
    .sort_values(['method', 'epsilon'])
)

print("✅ UNIFORM aggregation complete")
display(df_uniform_agg.head())


In [None]:

plt.figure(figsize=(12, 7))

# --- STAMP Polar ---
df_stamp_polar = df_stamp_agg[df_stamp_agg['method'] == 'STAMP Polar']
plt.plot(df_stamp_polar['epsilon'], df_stamp_polar['mean_accuracy'],
         label='STAMP Polar', marker='o', linestyle='-')

# --- Uniform Polar ---
df_uniform_polar = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Polar']
plt.plot(df_uniform_polar['epsilon'], df_uniform_polar['mean_accuracy'],
         label='Uniform Polar', marker='s', linestyle='--')

# --- Baselines ---
# AG News baseline (non-private 50 samples)
plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
            label='Baseline (Non-Private AG News 50)')

# GPT-4 Fill baseline (if available)
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping that line.")

# --- Style ---
plt.title("AG News: Polar — Uniform vs STAMP")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy (Exact Match)")
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title="Method")
plt.show()


In [None]:

plt.figure(figsize=(12, 7))

# --- STAMP Polar ---
df_stamp_polar = df_stamp_agg[df_stamp_agg['method'] == 'STAMP Polar']
plt.plot(df_stamp_polar['epsilon'], df_stamp_polar['mean_accuracy'],
         label='STAMP Polar', marker='o', linestyle='-')

# --- STAMP Laplace ---
df_stamp_laplace = df_stamp_agg[df_stamp_agg['method'] == 'STAMP Laplace']
plt.plot(df_stamp_laplace['epsilon'], df_stamp_laplace['mean_accuracy'],
         label='STAMP Laplace', marker='^', linestyle='--')

# --- Baselines ---
# Non-private AG News baseline (first 50 samples)
plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
            label='Baseline (Non-Private AG News 50)')

# GPT-4 Fill + AG News classifier baseline
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping that line.")

# --- Style ---
plt.title("AG News: STAMP — Polar vs Laplace")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy (Exact Match)")
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title="Method")
plt.show()


In [None]:
plt.figure(figsize=(12, 7))

# --- Uniform Polar ---
df_uniform_polar = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Polar']
plt.plot(df_uniform_polar['epsilon'], df_uniform_polar['mean_accuracy'],
         label='Uniform Polar', marker='o', linestyle='-')

# --- Uniform Laplace ---
df_uniform_laplace = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Laplace']
plt.plot(df_uniform_laplace['epsilon'], df_uniform_laplace['mean_accuracy'],
         label='Uniform Laplace', marker='^', linestyle='--')

# --- Baselines ---
# Non-private AG News (first 50 test samples)
plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
            label='Baseline (Non-Private AG News 50)')

# GPT-4 Fill + AG News classifier baseline
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping that line.")

# --- Style ---
plt.title("AG News: Uniform — Polar vs Laplace")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy (Exact Match)")
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title="Method")
plt.show()


In [None]:

plt.figure(figsize=(14, 8))

# --- STAMP ---
df_stamp_polar = df_stamp_agg[df_stamp_agg['method'] == 'STAMP Polar']
plt.plot(df_stamp_polar['epsilon'], df_stamp_polar['mean_accuracy'],
         label='STAMP Polar', marker='o', linestyle='-')

df_stamp_laplace = df_stamp_agg[df_stamp_agg['method'] == 'STAMP Laplace']
plt.plot(df_stamp_laplace['epsilon'], df_stamp_laplace['mean_accuracy'],
         label='STAMP Laplace', marker='^', linestyle='--')

# --- UNIFORM ---
df_uniform_polar = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Polar']
plt.plot(df_uniform_polar['epsilon'], df_uniform_polar['mean_accuracy'],
         label='Uniform Polar', marker='s', linestyle='-.')

df_uniform_laplace = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Laplace']
plt.plot(df_uniform_laplace['epsilon'], df_uniform_laplace['mean_accuracy'],
         label='Uniform Laplace', marker='d', linestyle=':')

# --- INF ---
df_inf_results['epsilon'] = df_inf_results['dataframe'].apply(
    lambda x: float(x.split('_')[-1].replace('.csv', '')) if 'inf' not in x.lower() else float('inf')
)
df_inf_results['method'] = df_inf_results['dataframe'].apply(
    lambda x: 'Inf Laplace' if 'laplace' in x.lower() else 'Inf Polar'
)
df_inf_agg = (
    df_inf_results.groupby(['epsilon', 'method'])
    .agg(mean_accuracy=('em_privatized', 'mean'))
    .reset_index()
)

df_inf_polar = df_inf_agg[df_inf_agg['method'] == 'Inf Polar']
plt.plot(df_inf_polar['epsilon'], df_inf_polar['mean_accuracy'],
         label='Inf Polar', marker='x', linestyle='-')

df_inf_laplace = df_inf_agg[df_inf_agg['method'] == 'Inf Laplace']
plt.plot(df_inf_laplace['epsilon'], df_inf_laplace['mean_accuracy'],
         label='Inf Laplace', marker='x', linestyle='--')

# --- Baselines ---
plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
            label='Baseline (Non-Private AG News 50)')
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    plt.axhline(y=average_scores_agnews_gpt4_fill['mean_em_privatized_agnews'],
                color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill + AGNews Classifier)')
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping baseline line.")

# --- Style ---
plt.title("AG News: Comparison of All Mechanisms (Polar vs Laplace, STAMP vs Uniform vs Inf)")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy (Exact Match)")
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title="Mechanism", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
# Load all CSV files in /content/

csv_files = glob.glob("/content/*.csv")
print(f"Found {len(csv_files)} CSV files in /content/: {csv_files}")

dataframes = {}
for file in csv_files:
    try:
        df_name = file.replace("/content/", "").replace(".csv", "")
        dataframes[df_name] = pd.read_csv(file)
        print(f"Successfully loaded {file} into dataframe '{df_name}'")
        display(dataframes[df_name].head()) # Display the head of the dataframe
    except Exception as e:
        print(f"Error loading {file}: {e}")

print(f"✅ Number of dataframes loaded: {len(dataframes)}")

In [None]:
import matplotlib.pyplot as plt
import re
import numpy as np

# Extract epsilon values from filename column (if present)
# Assuming 'dataframes' dictionary containing loaded dataframes from glob is available
df_inf_results_combined = dataframes['AGnews_evaluation_results_inf_rounds']

if "dataframe" in df_inf_results_combined.columns:
    df_inf_results_combined["epsilon"] = df_inf_results_combined["dataframe"].apply(
        lambda x: float(re.search(r"epsilon_([\d.]+)", str(x)).group(1))
        if isinstance(x, str) and "epsilon_" in x else np.nan
    )

# Identify method type
df_inf_results_combined["method"] = df_inf_results_combined["dataframe"].apply(
    lambda x: "Inf Laplace" if "laplace" in str(x).lower()
    else "Inf Polar" if "polar" in str(x).lower()
    else "Unknown"
)

# Clean & aggregate mean accuracy (Exact Match)
df_aggregated_inf_metrics = (
    df_inf_results_combined.dropna(subset=["em_privatized", "epsilon"]) # Ensure epsilon is not NaN
    .groupby(["epsilon", "method"])
    .agg(mean_accuracy=("em_privatized", "mean"))
    .reset_index()
)

plt.figure(figsize=(12, 7))

# Inf Laplace
df_laplace = df_aggregated_inf_metrics[df_aggregated_inf_metrics['method'] == 'Inf Laplace']
plt.plot(df_laplace['epsilon'], df_laplace['mean_accuracy'],
         label='Inf Laplace', marker='o', linestyle='-')

# Inf Polar
df_polar = df_aggregated_inf_metrics[df_aggregated_inf_metrics['method'] == 'Inf Polar']
plt.plot(df_polar['epsilon'], df_polar['mean_accuracy'],
         label='Inf Polar', marker='o', linestyle='--')

# Baseline (kmack classifier)
# Assuming 'average_scores_agnews_gpt4_fill' dictionary is available from previous cells
if 'average_scores_agnews_gpt4_fill' in globals() and \
   'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
    print(f"Baseline accuracy (GPT-4 Fill): {baseline_acc:.3f}") # Corrected print message
else:
    # Attempt to get baseline from the loaded dataframes if the variable is not in globals
    if 'AGnews_gpt4_fill_results' in dataframes and 'em_privatized' in dataframes['AGnews_gpt4_fill_results'].columns:
         baseline_acc = dataframes['AGnews_gpt4_fill_results']['em_privatized'].mean()
         plt.axhline(y=baseline_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
         print(f"Baseline accuracy (GPT-4 Fill from dataframe): {baseline_acc:.3f}")
    else:
        print("⚠️ Baseline accuracy not found — check variable name or dataframe!")


plt.title('AG News - Accuracy (Exact Match) vs Epsilon (Inf)') # More specific title
plt.xlabel('Epsilon')
plt.ylabel('Accuracy (Exact Match)')
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title='Method')
plt.show()

In [None]:
# Load the combined evaluation results for non-inf files from CSV
csv_output_path_non_inf = "AGnews_evaluation_results_non_inf_STAMP_rounds.csv"
try:
    df_all_results_combined_non_inf = pd.read_csv(csv_output_path_non_inf)
    print(f"Successfully loaded data from {csv_output_path_non_inf}")
    display(df_all_results_combined_non_inf.head())
except FileNotFoundError:
    print(f"Error: File not found at {csv_output_path_non_inf}. Please ensure cell 23d172a1 was executed successfully.")

In [None]:
import re
import numpy as np

# --- Prepare Data ---
# Extract epsilon from filename
# Updated regex to handle potential '.csv' at the end
df_all_results_combined_non_inf['epsilon'] = df_all_results_combined_non_inf['dataframe'].apply(
    lambda x: float(re.search(r"epsilon_([\d.]+)", str(x)).group(1))
    if isinstance(x, str) and "epsilon_" in x else np.nan
)


# Add method column (Laplace or Polar)
df_all_results_combined_non_inf['method'] = df_all_results_combined_non_inf['dataframe'].apply(
    lambda x: 'STAMP Laplace' if 'laplace' in x.lower() else 'STAMP Polar'
)

# Aggregate mean accuracy by epsilon + method
df_agg_non_inf = (
    df_all_results_combined_non_inf.dropna(subset=['epsilon']) # Drop rows where epsilon extraction failed
    .groupby(['epsilon', 'method'])
    .agg(mean_accuracy=('em_privatized', 'mean'))
    .reset_index()
    .sort_values('epsilon')
)

print("Aggregated Non-INF (STAMP) Results:")
display(df_agg_non_inf)

In [None]:
import matplotlib.pyplot as plt

# --- Plot ---
plt.figure(figsize=(8, 5)) # Matched figure size

# STAMP Polar
df_polar = df_agg_non_inf[df_agg_non_inf['method'] == 'STAMP Polar']
plt.plot(df_polar['epsilon'], df_polar['mean_accuracy'],
         label='STAMP Polar', marker='o', linestyle='-')

# STAMP Laplace
df_laplace = df_agg_non_inf[df_agg_non_inf['method'] == 'STAMP Laplace']
plt.plot(df_laplace['epsilon'], df_laplace['mean_accuracy'],
         label='STAMP Laplace', marker='^', linestyle='--')

# --- Baselines ---
# Non-private AG News baseline (first 50 samples) - assuming 'baseline_agnews50_acc' is available
if 'baseline_agnews50_acc' in globals():
    plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
                label='Baseline (Non-Private AG News 50)')
else:
    print("⚠️ Non-Private AG News baseline not found.")


# GPT-4 Fill + AG News classifier baseline - trying to get from dataframes if not in globals
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
    print(f"Baseline (GPT-4 Fill): {baseline_gpt4fill_acc:.3f}")
elif 'AGnews_gpt4_fill_results' in dataframes and 'em_privatized' in dataframes['AGnews_gpt4_fill_results'].columns:
     baseline_gpt4fill_acc = dataframes['AGnews_gpt4_fill_results']['em_privatized'].mean()
     plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
            label='Baseline (GPT-4 Fill)')
     print(f"Baseline (GPT-4 Fill from dataframe): {baseline_gpt4fill_acc:.3f}")
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping that line.")


# --- Style ---
plt.title("AG News: STAMP— Polar vs Laplace", fontsize=20) # Matched title format and fontsize
plt.xlabel(r"Average per-token privacy budget $\epsilon$", fontsize=17) # Matched xlabel and fontsize
plt.ylabel("Accuracy (Exact Match)", fontsize=17) # Matched ylabel and fontsize
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title='', fontsize=15) # Matched legend format and fontsize
plt.tick_params(axis='both', which='major', labelsize=15) # Matched tick params
plt.show()

In [None]:
import matplotlib.pyplot as plt
import re
import numpy as np
import pandas as pd

# --- Aggregate STAMP Polar data ---
# Ensure df_agg_non_inf is available (should be from previous cells)
if 'df_agg_non_inf' in globals():
    df_stamp_polar = df_agg_non_inf[df_agg_non_inf['method'] == 'STAMP Polar']
else:
    print("⚠️ STAMP aggregated data (df_agg_non_inf) not found. Please run previous cells.")
    df_stamp_polar = pd.DataFrame() # Create empty dataframe to avoid errors


# --- Aggregate Uniform Polar data ---
# Ensure df_all_results_combined_uniform is available and processed
if 'df_all_results_combined_uniform' not in globals():
    # Load the combined evaluation results for uniform files from CSV if not already loaded
    csv_output_path_uniform = "AGnews_evaluation_results_uniform_rounds.csv" # Changed to AG News filename
    try:
        df_all_results_combined_uniform = pd.read_csv(csv_output_path_uniform)
        print(f"Successfully loaded data from {csv_output_path_uniform}")
    except FileNotFoundError:
        print(f"Error: File not found at {csv_output_path_uniform}.")
        df_all_results_combined_uniform = pd.DataFrame() # Create empty dataframe to avoid errors

df_uniform_polar = pd.DataFrame() # Initialize df_uniform_polar
df_uniform_laplace = pd.DataFrame() # Initialize df_uniform_laplace

if not df_all_results_combined_uniform.empty:
    # Process the 'AGnews_evaluation_results_uniform_rounds' dataframe if not already processed
    # Use the extract_epsilon function defined earlier
    if 'epsilon' not in df_all_results_combined_uniform.columns or 'method' not in df_all_results_combined_uniform.columns:
        # Define extract_epsilon locally if not in global scope (or ensure it's imported)
        def extract_epsilon(name):
            """Extract epsilon value safely from filename-like string."""
            try:
                # Adjusted regex to be more robust and handle potential .csv at the end
                match = re.search(r"epsilon_([\d.]+)", str(name))
                return float(match.group(1)) if match else np.nan
            except Exception:
                return np.nan

        df_all_results_combined_uniform['epsilon'] = df_all_results_combined_uniform['dataframe'].apply(extract_epsilon)
        df_all_results_combined_uniform['method'] = df_all_results_combined_uniform['dataframe'].apply(
            lambda x: 'Uniform Laplace' if 'laplace' in x.lower() else 'Uniform Polar'
        )

    # Aggregate mean accuracy by epsilon + method for Uniform
    df_uniform_agg = (
        df_all_results_combined_uniform.dropna(subset=['epsilon']) # Drop rows where epsilon extraction failed
        .groupby(['epsilon', 'method'])
        .agg(mean_accuracy=('em_privatized', 'mean'))
        .reset_index()
        .sort_values('epsilon')
    )
    df_uniform_polar = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Polar']
    df_uniform_laplace = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Laplace'] # Get Laplace data too
else:
    print("Uniform data not available for plotting.")


# --- Plot ---
plt.figure(figsize=(8, 5)) # Matched figure size

# Uniform Polar
if not df_uniform_polar.empty:
    plt.plot(df_uniform_polar['epsilon'], df_uniform_polar['mean_accuracy'],
             label='Uniform Polar', marker='o', linestyle='-')

# Uniform Laplace
if not df_uniform_laplace.empty:
     plt.plot(df_uniform_laplace['epsilon'], df_uniform_laplace['mean_accuracy'],
             label='Uniform Laplace', marker='^', linestyle='--')


# --- Baselines ---
# Non-private AG News baseline (first 50 samples) - assuming 'baseline_agnews50_acc' is available
if 'baseline_agnews50_acc' in globals():
    plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
                label='Baseline (Non-Private AG News 50)')
else:
    print("⚠️ Non-Private AG News baseline not found.")

# GPT-4 Fill + AG News classifier baseline - trying to get from dataframes if not in globals
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
    print(f"Baseline (GPT-4 Fill): {baseline_gpt4fill_acc:.3f}")
elif 'AGnews_gpt4_fill_results' in dataframes and 'em_privatized' in dataframes['AGnews_gpt4_fill_results'].columns:
     baseline_gpt4fill_acc = dataframes['AGnews_gpt4_fill_results']['em_privatized'].mean()
     plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
            label='Baseline (GPT-4 Fill)')
     print(f"Baseline (GPT-4 Fill from dataframe): {baseline_gpt4fill_acc:.3f}")
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping that line.")


# --- Style ---
plt.title("AG News: Uniform — Polar vs Laplace", fontsize=20) # Changed title to AG News and matched fontsize
plt.xlabel(r"Average per-token privacy budget $\epsilon$", fontsize=17) # Matched xlabel and fontsize
plt.ylabel("Accuracy (Exact Match)", fontsize=17) # Matched ylabel and fontsize
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend(title='', fontsize=15) # Matched legend format and fontsize
plt.tick_params(axis='both', which='major', labelsize=15) # Matched tick params
plt.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import re
import numpy as np

# Define a function to extract epsilon safely
def extract_epsilon(name):
    """Extract epsilon value safely from filename-like string."""
    try:
        match = re.search(r"epsilon_([\d.]+)", str(name))
        return float(match.group(1)) if match else np.nan
    except Exception:
        return np.nan

# --- Load STAMP and Uniform data if not already loaded ---
df_all_results_combined_non_inf = pd.DataFrame()
csv_output_path_non_inf = "AGnews_evaluation_results_non_inf_STAMP_rounds.csv"
try:
    df_all_results_combined_non_inf = pd.read_csv(csv_output_path_non_inf)
    print(f"Successfully loaded data from {csv_output_path_non_inf}")
except FileNotFoundError:
    print(f"Error: File not found at {csv_output_path_non_inf}. Please ensure evaluation cell was executed.")
except pd.errors.EmptyDataError:
    print(f"Error: The file {csv_output_path_non_inf} is empty. Cannot load data.")


df_all_results_combined_uniform = pd.DataFrame()
csv_output_path_uniform = "AGnews_evaluation_results_uniform_rounds.csv"
try:
    df_all_results_combined_uniform = pd.read_csv(csv_output_path_uniform)
    print(f"Successfully loaded data from {csv_output_path_uniform}")
except FileNotFoundError:
    print(f"Error: File not found at {csv_output_path_uniform}. Please ensure evaluation cell was executed.")
except pd.errors.EmptyDataError:
    print(f"Error: The file {csv_output_path_uniform} is empty. Cannot load data.")


# --- Aggregate STAMP Data ---
df_stamp_agg = pd.DataFrame() # Initialize in case loading failed
if not df_all_results_combined_non_inf.empty:
    df_all_results_combined_non_inf['epsilon'] = df_all_results_combined_non_inf['dataframe'].apply(extract_epsilon)
    df_all_results_combined_non_inf['method'] = df_all_results_combined_non_inf['dataframe'].apply(
        lambda x: 'STAMP Laplace' if 'laplace' in x.lower() else 'STAMP Polar'
    )
    df_stamp_agg = (
        df_all_results_combined_non_inf
        .dropna(subset=['epsilon'])
        .groupby(['epsilon', 'method'], as_index=False)
        .agg(mean_accuracy=('em_privatized', 'mean'))
        .sort_values(['method', 'epsilon'])
    )
    print("✅ STAMP aggregation complete")
else:
    print("⚠️ STAMP data not available for aggregation.")


# --- Aggregate UNIFORM Data ---
df_uniform_agg = pd.DataFrame() # Initialize in case loading failed
if not df_all_results_combined_uniform.empty:
    df_all_results_combined_uniform['epsilon'] = df_all_results_combined_uniform['dataframe'].apply(extract_epsilon)
    df_all_results_combined_uniform['method'] = df_all_results_combined_uniform['dataframe'].apply(
        lambda x: 'Uniform Laplace' if 'laplace' in x.lower() else 'Uniform Polar'
    )
    df_uniform_agg = (
        df_all_results_combined_uniform
        .dropna(subset=['epsilon'])
        .groupby(['epsilon', 'method'], as_index=False)
        .agg(mean_accuracy=('em_privatized', 'mean'))
        .reset_index()
        .sort_values(['method', 'epsilon'])
    )
    print("✅ UNIFORM aggregation complete")
else:
    print("⚠️ UNIFORM data not available for aggregation.")


# --- Plot ---
plt.figure(figsize=(8, 5)) # Consistent figure size

# --- STAMP Polar ---
if not df_stamp_agg.empty:
    df_stamp_polar = df_stamp_agg[df_stamp_agg['method'] == 'STAMP Polar']
    plt.plot(df_stamp_polar['epsilon'], df_stamp_polar['mean_accuracy'],
             label='STAMP Polar', marker='o', linestyle='-')
else:
    print("⚠️ STAMP aggregated data (df_stamp_agg) not available for plotting STAMP Polar.")


# --- Uniform Polar ---
if not df_uniform_agg.empty:
    df_uniform_polar = df_uniform_agg[df_uniform_agg['method'] == 'Uniform Polar']
    plt.plot(df_uniform_polar['epsilon'], df_uniform_polar['mean_accuracy'],
             label='Uniform Polar', marker='s', linestyle='--')
else:
    print("⚠️ Uniform aggregated data (df_uniform_agg) not available for plotting Uniform Polar.")


# --- Baselines ---
# AG News baseline (non-private 50 samples)
if 'baseline_agnews50_acc' in globals():
    plt.axhline(y=baseline_agnews50_acc, color='green', linestyle='-.', linewidth=2,
                label='Baseline (Non-Private AG News 50)')
else:
    print("⚠️ Non-Private AG News baseline not found.")

# GPT-4 Fill baseline (if available)
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
                label='Baseline (GPT-4 Fill)')
    print(f"Baseline (GPT-4 Fill): {baseline_gpt4fill_acc:.3f}")
elif 'AGnews_gpt4_fill_results' in dataframes and 'em_privatized' in dataframes['AGnews_gpt4_fill_results'].columns:
     baseline_gpt4fill_acc = dataframes['AGnews_gpt4_fill_results']['em_privatized'].mean()
     plt.axhline(y=baseline_gpt4fill_acc, color='red', linestyle=':', linewidth=2,
            label='Baseline (GPT-4 Fill)')
     print(f"Baseline (GPT-4 Fill from dataframe): {baseline_gpt4fill_acc:.3f}")
else:
    print("⚠️ GPT-4 Fill baseline not found — skipping that line.")

# Check if any data is available for plotting before showing the plot
if not df_stamp_agg.empty or not df_uniform_agg.empty or 'baseline_agnews50_acc' in globals() or ('average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill) or ('AGnews_gpt4_fill_results' in dataframes and 'em_privatized' in dataframes['AGnews_gpt4_fill_results'].columns):
    # --- Style ---
    plt.title("AG News: Polar — Uniform vs STAMP", fontsize=20) # Specific title
    plt.xlabel(r"Average per-token privacy budget $\epsilon$", fontsize=17) # Consistent xlabel
    plt.ylabel("Accuracy (Exact Match)", fontsize=17) # Consistent ylabel
    plt.ylim(0, 1.1)
    plt.grid(True)
    plt.legend(title="", fontsize=15) # Consistent legend format
    plt.tick_params(axis='both', which='major', labelsize=15) # Consistent tick params
    plt.show()
else:
    print("No data available to plot.")

In [None]:
# Combine STAMP data
df_stamp_combined = pd.DataFrame()
if 'df_agg_non_inf' in globals() and not df_agg_non_inf.empty:
    df_stamp_combined = df_agg_non_inf.copy()
else:
    print("⚠️ STAMP aggregated data not found. Cannot include STAMP data in the combined table.")

# Get baseline values
baseline_agnews50_acc_value = None
if 'baseline_agnews50_acc' in globals():
    baseline_agnews50_acc_value = baseline_agnews50_acc
    print(f"Using Non-Private AG News 50 samples baseline: {baseline_agnews50_acc_value:.3f}")
else:
    print("⚠️ Non-Private AG News 50 samples baseline not found.")

baseline_gpt4fill_acc_value = None
if 'average_scores_agnews_gpt4_fill' in globals() and 'mean_em_privatized_agnews' in average_scores_agnews_gpt4_fill:
    baseline_gpt4fill_acc_value = average_scores_agnews_gpt4_fill['mean_em_privatized_agnews']
    print(f"Using GPT-4 Fill baseline: {baseline_gpt4fill_acc_value:.3f}")
elif 'AGnews_gpt4_fill_results' in dataframes and 'em_privatized' in dataframes['AGnews_gpt4_fill_results'].columns:
     baseline_gpt4fill_acc_value = dataframes['AGnews_gpt4_fill_results']['em_privatized'].mean()
     print(f"Using GPT-4 Fill baseline (from dataframe): {baseline_gpt4fill_acc_value:.3f}")
else:
    print("⚠️ GPT-4 Fill baseline not found.")


# Create rows for baselines (epsilon can be NaN or a common value, depending on desired representation)
# Here, we'll add them as separate rows with a placeholder epsilon for clarity,
# or you might choose to represent them as a horizontal line on a plot.
# For a combined table vs epsilon, representing them at a single epsilon (e.g., max epsilon or NaN) is common.
# Let's add them with a placeholder epsilon and method name.
baseline_rows = []
if baseline_agnews50_acc_value is not None:
    baseline_rows.append({'epsilon': None, 'method': 'Baseline (Non-Private AG News 50)', 'mean_accuracy': baseline_agnews50_acc_value})
if baseline_gpt4fill_acc_value is not None:
    baseline_rows.append({'epsilon': None, 'method': 'Baseline (GPT-4 Fill)', 'mean_accuracy': baseline_gpt4fill_acc_value})

df_baselines = pd.DataFrame(baseline_rows)

# Combine DataFrames
df_combined_table = pd.concat([df_stamp_combined, df_baselines], ignore_index=True)

# Sort by epsilon and then by method for better readability
df_combined_table = df_combined_table.sort_values(['epsilon', 'method'], na_position='last').reset_index(drop=True)

print("\nCombined Accuracy Table:")
# Format 'mean_accuracy' to 3 decimal places
display(df_combined_table.style.format({'mean_accuracy': '{:.3f}'}))