# Persuasion Persistence Study - Unified Visualization

## Overview
This notebook provides comprehensive visualization and analysis for persuasion persistence experiments.

## Sections
1. **Configuration & Data Loading** - Setup and data import
2. **Metric Calculation** - Friction scores, percentile ranks, behavioral metrics
3. **Statistical Tests** - Mann-Whitney U, effect sizes, persona deltas
4. **Core Visualizations** - Friction plots, behavioral comparisons, heatmaps
5. **Experiment-Specific Analysis**
   - Coding tasks (opinion_persuasion & prefill_only)
   - Web research tasks (misaligned & aligned)

## Supported Experiment Types
- **Coding**: Opinion-persuasion vs Prefill-only
- **Web (Misaligned)**: Unrelated opinion persuasion
- **Web (Aligned)**: Task-aligned behavior persuasion

## 1. Configuration & Setup

In [None]:
# Core imports
import json
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Optional

# Statistical analysis
from scipy.stats import mannwhitneyu, ttest_ind
from scipy import stats

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch

# Configuration
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

print("✅ Imports loaded successfully")

In [None]:
# ============================================================
# GLOBAL CONFIGURATION
# ============================================================

# Baseline tactic name
BASELINE = "baseline"

# Small epsilon for numerical stability
EPS = 1e-6

# Color schemes
TACTIC_COLORS = {
    "baseline": "#666666",
    "logical_appeal": "#1f77b4",
    "authority_endorsement": "#ff7f0e",
    "evidence_based": "#2ca02c",
    "priming_urgency": "#d62728",
    "anchoring": "#9467bd",
    "neutral_injection": "#8c564b",
}

PERSONA_COLORS = {
    "gpt": "#1f77b4",
    "claude": "#ff7f0e",
    "llama": "#2ca02c",
    "mistral": "#d62728",
    "qwen": "#9467bd",
    "gemini": "#8c564b",
    "neutral": "#666666",
}

# Default metrics for coding tasks
CODING_RAW_METRICS = [
    "num_errors",
    "num_code_revisions",
    "coding_duration_s",
    "revision_entropy",
    "strategy_switch_rate",
    "overcommitment",
    "mean_revision_size",
    "final_revision_delta",
]

# Default metrics for web tasks
WEB_RAW_METRICS = [
    "num_urls",
    "num_unique_urls",
    "num_domains",
    "domain_entropy",
    "num_searches",
    "num_summaries",
    "avg_latency_s",
    "total_duration_s",
]

print("✅ Configuration loaded")

## 2. Data Loading & Preprocessing

In [None]:
# ============================================================
# DATA LOADING UTILITIES
# ============================================================

def load_jsonl(path: str, backbone: str = "gpt") -> pd.DataFrame:
    """Load JSONL file into dataframe with backbone label."""
    df = pd.read_json(path, lines=True)
    df["backbone"] = backbone
    return df

def load_multiple_files(file_dict: Dict[str, str]) -> pd.DataFrame:
    """Load and concatenate multiple JSONL files.
    
    Args:
        file_dict: Dict mapping persona/backbone name to file path
    
    Returns:
        Combined dataframe
    """
    dfs = []
    for name, path in file_dict.items():
        if Path(path).exists():
            df = load_jsonl(path, backbone=name)
            dfs.append(df)
            print(f"  ✓ Loaded {name}: {len(df)} rows")
        else:
            print(f"  ✗ File not found: {path}")
    
    if not dfs:
        raise ValueError("No files were successfully loaded")
    
    df_combined = pd.concat(dfs, ignore_index=True)
    print(f"\n✅ Combined: {len(df_combined)} total rows")
    return df_combined

def filter_baseline(df: pd.DataFrame) -> pd.DataFrame:
    """Remove baseline rows from dataframe."""
    return df[df["tactic"] != BASELINE].copy()

def validate_required_columns(df: pd.DataFrame, required: List[str]):
    """Check if dataframe has required columns."""
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")
    print(f"✅ All required columns present: {required}")

## 3. Metric Calculation Functions

In [None]:
# ============================================================
# FRICTION SCORE CALCULATION
# ============================================================

def percentile_rank_nan_safe(x):
    """Calculate percentile ranks with NaN handling."""
    x = pd.Series(x)
    out = pd.Series(np.nan, index=x.index, dtype=float)
    mask = x.notna()
    out.loc[mask] = x.loc[mask].rank(pct=True)
    return out.values

def compute_friction_score(
    df: pd.DataFrame,
    raw_metrics: List[str],
    baseline_label: str = "baseline",
    eps: float = EPS,
) -> pd.DataFrame:
    """Compute friction score as mean percentile rank across metrics.
    
    Steps:
    1. For each metric, compute percentile rank within each persona
    2. Average percentile ranks across all metrics
    3. Result is friction_score in [0, 1]
    
    Args:
        df: Input dataframe
        raw_metrics: List of raw metric column names
        baseline_label: Name of baseline condition
        eps: Small epsilon for numerical stability
    
    Returns:
        Dataframe with added friction_score column
    """
    df = df.copy()
    
    # Compute percentile ranks for each metric
    for metric in raw_metrics:
        if metric not in df.columns:
            print(f"⚠️  Warning: {metric} not found in dataframe")
            continue
        
        rank_col = f"{metric}_rank"
        df[rank_col] = df.groupby("persona")[metric].transform(percentile_rank_nan_safe)
    
    # Compute mean percentile rank
    rank_cols = [f"{m}_rank" for m in raw_metrics if f"{m}_rank" in df.columns]
    df["friction_score"] = df[rank_cols].mean(axis=1, skipna=True)
    
    print(f"✅ Friction score computed (n={len(df)})")
    print(f"   Mean: {df['friction_score'].mean():.3f}")
    print(f"   Std: {df['friction_score'].std():.3f}")
    
    return df

def compute_normalized_metrics(
    df: pd.DataFrame,
    raw_metrics: List[str],
    baseline_label: str = "baseline",
) -> pd.DataFrame:
    """Compute baseline-normalized metrics: (x - baseline_mean) / baseline_std.
    
    Args:
        df: Input dataframe
        raw_metrics: List of raw metric column names
        baseline_label: Name of baseline condition
    
    Returns:
        Dataframe with added {metric}_norm columns
    """
    df = df.copy()
    
    baseline_df = df[df["tactic"] == baseline_label]
    
    for metric in raw_metrics:
        if metric not in df.columns:
            continue
        
        # Compute baseline stats per persona
        baseline_stats = baseline_df.groupby("persona")[metric].agg(["mean", "std"])
        
        norm_col = f"{metric}_norm"
        
        def normalize_row(row):
            persona = row["persona"]
            if persona not in baseline_stats.index:
                return np.nan
            mean_val = baseline_stats.loc[persona, "mean"]
            std_val = baseline_stats.loc[persona, "std"]
            if std_val == 0 or pd.isna(std_val):
                return np.nan
            return (row[metric] - mean_val) / std_val
        
        df[norm_col] = df.apply(normalize_row, axis=1)
    
    print(f"✅ Normalized metrics computed")
    return df

## 4. Statistical Tests

In [None]:
# ============================================================
# STATISTICAL TESTS
# ============================================================

def pooled_np_p_test(df: pd.DataFrame, score_col: str = "friction_score") -> Dict:
    """Compare not-persuaded vs persuaded groups using Mann-Whitney U test.
    
    Args:
        df: Dataframe with 'persuaded' column and score column
        score_col: Name of score column to compare
    
    Returns:
        Dict with test results
    """
    np_vals = df[df["persuaded"] == 0][score_col].dropna()
    p_vals = df[df["persuaded"] == 1][score_col].dropna()
    
    if len(np_vals) == 0 or len(p_vals) == 0:
        return {
            "n_np": len(np_vals),
            "n_p": len(p_vals),
            "mean_np": np.nan,
            "mean_p": np.nan,
            "delta": np.nan,
            "u_stat": np.nan,
            "p_value": np.nan,
        }
    
    u_stat, p_value = mannwhitneyu(np_vals, p_vals, alternative="two-sided")
    
    return {
        "n_np": len(np_vals),
        "n_p": len(p_vals),
        "mean_np": np_vals.mean(),
        "mean_p": p_vals.mean(),
        "delta": p_vals.mean() - np_vals.mean(),
        "u_stat": u_stat,
        "p_value": p_value,
    }

def persona_delta_summary(df: pd.DataFrame, score_col: str = "friction_score") -> pd.DataFrame:
    """Compute per-persona differences between persuaded and not-persuaded.
    
    Args:
        df: Dataframe with 'persona', 'persuaded' columns
        score_col: Score column to analyze
    
    Returns:
        Summary dataframe with per-persona statistics
    """
    rows = []
    
    for persona, g in df.groupby("persona"):
        np_vals = g[g["persuaded"] == 0][score_col].dropna()
        p_vals = g[g["persuaded"] == 1][score_col].dropna()
        
        if len(np_vals) == 0 or len(p_vals) == 0:
            continue
        
        u_stat, p_value = mannwhitneyu(np_vals, p_vals, alternative="two-sided")
        
        rows.append({
            "persona": persona,
            "n_np": len(np_vals),
            "n_p": len(p_vals),
            "mean_np": np_vals.mean(),
            "mean_p": p_vals.mean(),
            "delta": p_vals.mean() - np_vals.mean(),
            "u_stat": u_stat,
            "p_value": p_value,
        })
    
    return pd.DataFrame(rows)

def tactic_summary(df: pd.DataFrame, score_col: str = "friction_score") -> pd.DataFrame:
    """Summarize persuasion outcomes by tactic.
    
    Args:
        df: Dataframe with 'tactic', 'persuaded' columns
        score_col: Score column to analyze
    
    Returns:
        Summary dataframe with per-tactic statistics
    """
    rows = []
    
    for tactic, g in df.groupby("tactic"):
        if tactic == BASELINE:
            continue
        
        n_total = len(g)
        n_persuaded = g["persuaded"].sum()
        persuasion_rate = n_persuaded / n_total if n_total > 0 else 0
        
        np_vals = g[g["persuaded"] == 0][score_col].dropna()
        p_vals = g[g["persuaded"] == 1][score_col].dropna()
        
        rows.append({
            "tactic": tactic,
            "n_total": n_total,
            "n_persuaded": n_persuaded,
            "persuasion_rate": persuasion_rate,
            "mean_np": np_vals.mean() if len(np_vals) > 0 else np.nan,
            "mean_p": p_vals.mean() if len(p_vals) > 0 else np.nan,
            "delta": (p_vals.mean() - np_vals.mean()) if len(p_vals) > 0 and len(np_vals) > 0 else np.nan,
        })
    
    return pd.DataFrame(rows)

## 5. Core Plotting Functions

In [None]:
# ============================================================
# FRICTION SCORE PLOTS
# ============================================================

def plot_friction_by_persuasion(
    df: pd.DataFrame,
    score_col: str = "friction_score",
    title: str = "Friction Score: Not Persuaded vs Persuaded",
):
    """Box plot comparing friction scores by persuasion status."""
    fig, ax = plt.subplots(figsize=(8, 6))
    
    data_to_plot = [
        df[df["persuaded"] == 0][score_col].dropna(),
        df[df["persuaded"] == 1][score_col].dropna(),
    ]
    
    bp = ax.boxplot(data_to_plot, labels=["Not Persuaded", "Persuaded"], patch_artist=True)
    
    for patch, color in zip(bp['boxes'], ['#1f77b4', '#ff7f0e']):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax.set_ylabel("Friction Score")
    ax.set_title(title)
    ax.grid(axis='y', alpha=0.3)
    
    # Add statistical test result
    test_result = pooled_np_p_test(df, score_col)
    ax.text(0.02, 0.98, 
            f"p = {test_result['p_value']:.4f}\nΔ = {test_result['delta']:.3f}",
            transform=ax.transAxes, va='top', fontsize=10,
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    plt.tight_layout()
    return fig

def plot_friction_by_tactic(
    df: pd.DataFrame,
    score_col: str = "friction_score",
    title: str = "Friction Score by Tactic",
):
    """Bar plot of mean friction scores by tactic and persuasion status."""
    fig, ax = plt.subplots(figsize=(12, 6))
    
    tactics = sorted([t for t in df["tactic"].unique() if t != BASELINE])
    
    x = np.arange(len(tactics))
    width = 0.35
    
    means_np = []
    means_p = []
    
    for tactic in tactics:
        g = df[df["tactic"] == tactic]
        means_np.append(g[g["persuaded"] == 0][score_col].mean())
        means_p.append(g[g["persuaded"] == 1][score_col].mean())
    
    ax.bar(x - width/2, means_np, width, label='Not Persuaded', color='#1f77b4', alpha=0.7)
    ax.bar(x + width/2, means_p, width, label='Persuaded', color='#ff7f0e', alpha=0.7)
    
    ax.set_xlabel('Tactic')
    ax.set_ylabel('Mean Friction Score')
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_xticklabels(tactics, rotation=45, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    return fig

def plot_friction_by_persona(
    df: pd.DataFrame,
    score_col: str = "friction_score",
    title: str = "Friction Score by Persona",
):
    """Bar plot of mean friction scores by persona and persuasion status."""
    fig, ax = plt.subplots(figsize=(10, 6))
    
    personas = sorted(df["persona"].unique())
    
    x = np.arange(len(personas))
    width = 0.35
    
    means_np = []
    means_p = []
    
    for persona in personas:
        g = df[df["persona"] == persona]
        means_np.append(g[g["persuaded"] == 0][score_col].mean())
        means_p.append(g[g["persuaded"] == 1][score_col].mean())
    
    ax.bar(x - width/2, means_np, width, label='Not Persuaded', alpha=0.7)
    ax.bar(x + width/2, means_p, width, label='Persuaded', alpha=0.7)
    
    ax.set_xlabel('Persona')
    ax.set_ylabel('Mean Friction Score')
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_xticklabels(personas)
    ax.legend()
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    return fig

In [None]:
# ============================================================
# BEHAVIORAL METRICS PLOTS
# ============================================================

def plot_behavioral_heatmap(
    df: pd.DataFrame,
    metrics: List[str],
    title: str = "Behavioral Metrics Heatmap",
):
    """Heatmap showing normalized behavioral metrics by tactic and persuasion status."""
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Compute mean for each metric by tactic and persuasion
    heatmap_data = []
    row_labels = []
    
    tactics = sorted([t for t in df["tactic"].unique() if t != BASELINE])
    
    for tactic in tactics:
        g = df[df["tactic"] == tactic]
        
        np_row = []
        p_row = []
        
        for metric in metrics:
            if metric not in df.columns:
                np_row.append(np.nan)
                p_row.append(np.nan)
                continue
            
            np_vals = g[g["persuaded"] == 0][metric].dropna()
            p_vals = g[g["persuaded"] == 1][metric].dropna()
            
            # Normalize by overall mean and std
            overall_mean = df[metric].mean()
            overall_std = df[metric].std()
            
            if overall_std > 0:
                np_row.append((np_vals.mean() - overall_mean) / overall_std if len(np_vals) > 0 else 0)
                p_row.append((p_vals.mean() - overall_mean) / overall_std if len(p_vals) > 0 else 0)
            else:
                np_row.append(0)
                p_row.append(0)
        
        heatmap_data.append(np_row)
        heatmap_data.append(p_row)
        row_labels.append(f"{tactic} (NP)")
        row_labels.append(f"{tactic} (P)")
    
    heatmap_array = np.array(heatmap_data)
    
    im = ax.imshow(heatmap_array, cmap='RdYlGn_r', aspect='auto', vmin=-2, vmax=2)
    
    ax.set_xticks(np.arange(len(metrics)))
    ax.set_yticks(np.arange(len(row_labels)))
    ax.set_xticklabels(metrics, rotation=45, ha='right')
    ax.set_yticklabels(row_labels)
    
    plt.colorbar(im, ax=ax, label='Normalized Value (σ)')
    ax.set_title(title)
    
    plt.tight_layout()
    return fig

def plot_metric_comparison(
    df: pd.DataFrame,
    metric: str,
    title: Optional[str] = None,
):
    """Box plot comparing a single metric by persuasion status."""
    if title is None:
        title = f"{metric}: Not Persuaded vs Persuaded"
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    data_to_plot = [
        df[df["persuaded"] == 0][metric].dropna(),
        df[df["persuaded"] == 1][metric].dropna(),
    ]
    
    bp = ax.boxplot(data_to_plot, labels=["Not Persuaded", "Persuaded"], patch_artist=True)
    
    for patch, color in zip(bp['boxes'], ['#1f77b4', '#ff7f0e']):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax.set_ylabel(metric)
    ax.set_title(title)
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    return fig

In [None]:
# ============================================================
# PREFILL-SPECIFIC PLOTS
# ============================================================

def plot_prefill_comparison(
    df: pd.DataFrame,
    metric: str,
    title: Optional[str] = None,
):
    """Compare metric across prefill conditions (C0, NP, P)."""
    if "prefill_condition" not in df.columns:
        print("⚠️  No prefill_condition column found")
        return None
    
    if title is None:
        title = f"{metric} by Prefill Condition"
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    conditions = ["C0", "NP", "P"]
    data_to_plot = []
    labels = []
    
    for cond in conditions:
        vals = df[df["prefill_condition"] == cond][metric].dropna()
        if len(vals) > 0:
            data_to_plot.append(vals)
            labels.append(cond)
    
    if not data_to_plot:
        print("⚠️  No data found for prefill conditions")
        return None
    
    bp = ax.boxplot(data_to_plot, labels=labels, patch_artist=True)
    
    colors = ['#666666', '#1f77b4', '#ff7f0e']
    for patch, color in zip(bp['boxes'], colors[:len(data_to_plot)]):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax.set_xlabel('Prefill Condition')
    ax.set_ylabel(metric)
    ax.set_title(title)
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    return fig

## 6. Example Usage

Below are example analysis workflows for different experiment types.

### 6.1 Coding Tasks (Opinion-Persuasion Mode)

In [None]:
# Example: Load and analyze coding task data (opinion-persuasion mode)

# Load data
coding_files = {
    "gpt": "coding-gpt-all.jsonl",
    "claude": "coding-claude-all.jsonl",
    "llama": "coding-llama-all.jsonl",
}

# df_coding = load_multiple_files(coding_files)

# Validate required columns
# required_cols = ["persona", "tactic", "persuaded", "persisted"] + CODING_RAW_METRICS
# validate_required_columns(df_coding, required_cols)

# Compute friction score
# df_coding = compute_friction_score(df_coding, CODING_RAW_METRICS)

# Filter to non-baseline
# df_coding_nobase = filter_baseline(df_coding)

# Statistical tests
# print("\n=== Pooled Test ===")
# print(pooled_np_p_test(df_coding_nobase))

# print("\n=== Per-Persona Delta ===")
# print(persona_delta_summary(df_coding_nobase))

# print("\n=== Per-Tactic Summary ===")
# print(tactic_summary(df_coding_nobase))

# Visualizations
# plot_friction_by_persuasion(df_coding_nobase, title="Coding: Friction by Persuasion Status")
# plot_friction_by_tactic(df_coding_nobase, title="Coding: Friction by Tactic")
# plot_friction_by_persona(df_coding_nobase, title="Coding: Friction by Persona")
# plot_behavioral_heatmap(df_coding_nobase, CODING_RAW_METRICS, title="Coding: Behavioral Metrics Heatmap")

print("✅ Example coding analysis (uncomment to run)")

### 6.2 Coding Tasks (Prefill-Only Mode)

In [None]:
# Example: Analyze prefill-only experiment

# Load data
# df_prefill = pd.read_json("prefill-coding-all.jsonl", lines=True)

# Compute metrics for each prefill condition
# for metric in CODING_RAW_METRICS:
#     plot_prefill_comparison(df_prefill, metric, title=f"Coding Prefill: {metric}")

# Compare friction across conditions
# df_prefill = compute_friction_score(df_prefill, CODING_RAW_METRICS)
# plot_prefill_comparison(df_prefill, "friction_score", title="Coding Prefill: Friction Score")

print("✅ Example prefill analysis (uncomment to run)")

### 6.3 Web Tasks

In [None]:
# Example: Load and analyze web research task data

# Load data
# df_web = pd.read_json("web-research-all.jsonl", lines=True)

# Compute friction score using web metrics
# df_web = compute_friction_score(df_web, WEB_RAW_METRICS)

# Filter to non-baseline
# df_web_nobase = filter_baseline(df_web)

# Visualizations
# plot_friction_by_persuasion(df_web_nobase, title="Web: Friction by Persuasion Status")
# plot_behavioral_heatmap(df_web_nobase, WEB_RAW_METRICS, title="Web: Behavioral Metrics Heatmap")

# Individual metric comparisons
# for metric in ["num_unique_urls", "num_domains", "domain_entropy"]:
#     plot_metric_comparison(df_web_nobase, metric)

print("✅ Example web analysis (uncomment to run)")

## 7. Custom Analysis

Add your own analysis cells below.

In [None]:
# Your custom analysis here
