In [1]:
import numpy as np
import pandas as pd
import pickle
from time import time
from rsa_optimal_exp_core import World, LiteralListener, PragmaticListener_obs_n

# ============================================================================
# CONFIGURATION
# ============================================================================

UTTERANCE_SEQUENCES = {
    "informative": [
        ["most,successful", "no,unsuccessful", "all,successful", "no,unsuccessful", "all,successful"],
        ["most,unsuccessful", "all,unsuccessful", "no,successful", "all,unsuccessful", "no,successful"],
    ],
    "pers_plus": [
        ["some,successful", "some,successful", "some,unsuccessful", "some,successful", "some,successful"],
        ["most,successful", "some,successful", "some,unsuccessful", "some,successful", "some,unsuccessful"],
        ["most,successful", "some,unsuccessful", "some,successful", "some,unsuccessful", "some,successful"],
    ],
    "pers_minus": [
        ["some,unsuccessful", "some,unsuccessful", "some,successful", "some,unsuccessful", "some,unsuccessful"],
        ["most,unsuccessful", "some,unsuccessful", "some,successful", "some,unsuccessful", "some,successful"],
        ["most,unsuccessful", "some,successful", "some,unsuccessful", "some,successful", "some,unsuccessful"],
    ],
}

THETA_VALUES = np.linspace(0, 1, 21)  # 0%, 5%, ..., 100%
ALPHA_GRID = np.logspace(np.log10(1), np.log10(50), 100)

# ============================================================================
# MAIN COMPUTATION
# ============================================================================
    
world = World(n=1, m=5, theta_values=THETA_VALUES)

lookup = {}
start_time = time()

# Iterate over all sequences
for speaker_cond, sequences in UTTERANCE_SEQUENCES.items():
    for seq_idx, utterances in enumerate(sequences):
        key = (speaker_cond, seq_idx)
        print(f"\nProcessing {key}: {utterances}")
        
        lookup[key] = {
            "utterances": utterances,
            "literal": None,
            "credulous_T": {},
            "credulous_F": {},
            "vigilant_T": {},
            "vigilant_F": {},
        }
        
        # --- Literal Listener ---
        listener = LiteralListener(world)
        posteriors = []
        for u in utterances:
            listener.listen_and_update(u)
            posteriors.append(listener.current_belief_theta.copy())
        lookup[key]["literal"] = np.array(posteriors)
        print(f"  Literal done")
        
        # --- Pragmatic Listeners ---
        configs = [
            ("credulous_T", "coop", True),
            ("credulous_F", "coop", False),
            ("vigilant_T", "strat", True),
            ("vigilant_F", "strat", False),
        ]
        
        for model_name, omega, update_internal in configs:
            print(f"  {model_name}...", end=" ", flush=True)
            t0 = time()
            
            for alpha in ALPHA_GRID:
                listener = PragmaticListener_obs_n(
                    world=world,
                    level=1,
                    omega=omega,
                    update_internal=update_internal,
                    alpha=alpha,
                    beta=0.0,
                )
                posteriors = []
                for u in utterances:
                    listener.listen_and_update(u)
                    posteriors.append(listener.current_belief_theta.copy())
                lookup[key][model_name][alpha] = np.array(posteriors)
            
            print(f"done ({time() - t0:.1f}s)")
    
    print(f"\nTotal time: {time() - start_time:.1f}s")


Processing ('informative', 0): ['most,successful', 'no,unsuccessful', 'all,successful', 'no,unsuccessful', 'all,successful']
  Literal done
  credulous_T... 



done (1.3s)
  credulous_F... done (0.5s)
  vigilant_T... done (2.9s)
  vigilant_F... done (0.8s)

Processing ('informative', 1): ['most,unsuccessful', 'all,unsuccessful', 'no,successful', 'all,unsuccessful', 'no,successful']
  Literal done
  credulous_T... done (1.3s)
  credulous_F... done (0.5s)
  vigilant_T... done (2.4s)
  vigilant_F... done (0.7s)

Total time: 10.4s

Processing ('pers_plus', 0): ['some,successful', 'some,successful', 'some,unsuccessful', 'some,successful', 'some,successful']
  Literal done
  credulous_T... done (1.3s)
  credulous_F... done (0.5s)
  vigilant_T... done (2.4s)
  vigilant_F... done (0.8s)

Processing ('pers_plus', 1): ['most,successful', 'some,successful', 'some,unsuccessful', 'some,successful', 'some,unsuccessful']
  Literal done
  credulous_T... done (1.2s)
  credulous_F... done (0.5s)
  vigilant_T... done (2.3s)
  vigilant_F... done (0.7s)

Processing ('pers_plus', 2): ['most,successful', 'some,unsuccessful', 'some,successful', 'some,unsuccessful', 

In [2]:
import copy
EPSILON = 0.01

theta_values = THETA_VALUES
alpha_grid = ALPHA_GRID
n_theta = len(THETA_VALUES)

print(f"Applying epsilon smoothing with ε = {EPSILON}")
print(f"Uniform floor: {EPSILON / n_theta:.6f}")

smoothed_lookup = copy.deepcopy(lookup)
# Apply smoothing to all predictions
for key in smoothed_lookup:
    # Literal
    smoothed_lookup[key]["literal"] = (1 - EPSILON) * smoothed_lookup[key]["literal"] + EPSILON / n_theta
    
    # Pragmatic models
    for model_name in ["credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]:
        for alpha in alpha_grid:
            smoothed_lookup[key][model_name][alpha] = (1 - EPSILON) * smoothed_lookup[key][model_name][alpha] + EPSILON / n_theta
    

Applying epsilon smoothing with ε = 0.01
Uniform floor: 0.000476


In [3]:
df = pd.read_csv("./processed_listener_n1_anonymized.csv")
print(f"Loaded data: {len(df)} rows")

# ============================================================================
# COMPREHENSION CHECK FILTERING
# ============================================================================

comp_cols = ["comp1_some_correct", "comp1_most_correct", "comp2_correct", 
             "comp_multi_correct", "comp3_correct"]

# Count correct per participant
df["n_comp_correct"] = df[comp_cols].sum(axis=1)

print("Comprehension check distribution:")
print(df["n_comp_correct"].value_counts().sort_index())

# Option A: Did not fail on last two (most important)
df["pass_last_two"] = df["comp_multi_correct"] & df["comp3_correct"]

# Option B: Did not fail 4 out of 5 (passed at least 2)
df["pass_at_least_3"] = df["n_comp_correct"] >= 3

print(f"\nOriginal: {len(df)} participants")
print(f"Pass last two: {df['pass_last_two'].sum()} participants")
print(f"Pass at least 3: {df['pass_at_least_3'].sum()} participants")
# Cross-tabulation
print("\nCross-tab (last_two x at_least_3):")
print(pd.crosstab(df["pass_last_two"], df["pass_at_least_3"]))

# Choose filter (can switch between them)
#df = df[df["pass_last_two"]]
#df = df[df["pass_at_least_3"]]

# Check balance across conditions
print("\nFiltered sample by condition:")
print(df.groupby(["listener_belief_condition", "speaker_condition"]).size())

print(f"Loaded data: {len(df)} rows")

Loaded data: 614 rows
Comprehension check distribution:
n_comp_correct
0     12
1     83
2    121
3    127
4    144
5    127
Name: count, dtype: int64

Original: 614 participants
Pass last two: 269 participants
Pass at least 3: 398 participants

Cross-tab (last_two x at_least_3):
pass_at_least_3  False  True 
pass_last_two                
False              213    132
True                 3    266

Filtered sample by condition:
listener_belief_condition  speaker_condition
credulous                  informative          66
                           pers_minus           53
                           pers_plus            76
naturalistic               informative          71
                           pers_minus           64
                           pers_plus            80
vigilant                   informative          61
                           pers_minus           67
                           pers_plus            76
dtype: int64
Loaded data: 614 rows


In [4]:

# ============================================================================
# CLEAN DATA
# ============================================================================

# Filter to completed participants who passed attention check
df = df[df["completed_all_rounds"] == True]
df = df[df["attention_check_passed"] == True]
print(f"After filtering: {len(df)} participants")

# Extract relevant columns
# Responses are in r1_effectiveness, ..., r5_effectiveness (0-100 scale)
response_cols = [f"r{i}_effectiveness" for i in range(1, 6)]

# Check for missing values in key columns
key_cols = ["speaker_condition", "listener_belief_condition", "sequence_idx"] + response_cols
df = df.dropna(subset=key_cols)
print(f"After dropping NaN: {len(df)} participants")

# Convert sequence_idx to int
df["sequence_idx"] = df["sequence_idx"].astype(int)

# Convert responses to theta indices (0-100 → 0-20)
# Response of 0% → index 0, 5% → index 1, ..., 100% → index 20
for col in response_cols:
    df[col] = df[col].astype(int)
    
# Create response matrix (n_participants x 5 rounds)
responses = df[response_cols].values  # shape (n_participants, 5), values 0-100

# Convert to theta indices (divide by 5)
response_indices = (responses / 5).astype(int)  # values 0-20

# Clamp to valid range (in case of any edge cases)
response_indices = np.clip(response_indices, 0, n_theta - 1)

print(f"Response matrix shape: {response_indices.shape}")


After filtering: 588 participants
After dropping NaN: 588 participants
Response matrix shape: (588, 5)


In [5]:
# ============================================================================
# COMPUTE LOG-LIKELIHOODS (alpha optimized at per participant level)
# ============================================================================

# Get sequence keys for each participant
sequence_keys = list(zip(df["speaker_condition"], df["sequence_idx"]))

# Model names
model_names = ["literal", "credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]

# Store results
results = []

for i, (idx, row) in enumerate(df.iterrows()):
    key = (row["speaker_condition"], int(row["sequence_idx"]))
    resp_idx = response_indices[i]  # shape (5,), values 0-20
    
    participant_result = {
        "participant_idx": idx,
        "speaker_condition": row["speaker_condition"],
        "listener_belief_condition": row["listener_belief_condition"],
        "sequence_idx": int(row["sequence_idx"]),
    }
    
    # --- Literal model (no alpha) ---
    posteriors = smoothed_lookup[key]["literal"]  # shape (5, 21)
    # Get probability of each response
    probs = posteriors[np.arange(5), resp_idx]  # shape (5,)
    ll = np.sum(np.log(probs))
    participant_result["ll_literal"] = ll
    
    # --- Pragmatic models (for each alpha) ---
    for model_name in ["credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]:
        best_ll = -np.inf
        best_alpha = None
        
        for alpha in alpha_grid:
            posteriors = smoothed_lookup[key][model_name][alpha]  # shape (5, 21)
            probs = posteriors[np.arange(5), resp_idx]
            ll = np.sum(np.log(probs))
            
            if ll > best_ll:
                best_ll = ll
                best_alpha = alpha
        
        participant_result[f"ll_{model_name}"] = best_ll
        participant_result[f"alpha_{model_name}"] = best_alpha
    
    results.append(participant_result)
    
    if (i + 1) % 50 == 0:
        print(f"Processed {i + 1}/{len(df)} participants")

print(f"Processed all {len(df)} participants")

# ============================================================================
# CREATE RESULTS DATAFRAME
# ============================================================================

results_df = pd.DataFrame(results)

# Add responses for reference
for r in range(1, 6):
    results_df[f"r{r}_effectiveness"] = df[f"r{r}_effectiveness"].values

print("\n" + "=" * 60)
print("RESULTS SUMMARY")
print("=" * 60)

# Summary by listener condition
for listener_cond in ["credulous", "vigilant", "naturalistic"]:
    subset = results_df[results_df["listener_belief_condition"] == listener_cond]
    print(f"\n{listener_cond.upper()} (n={len(subset)}):")
    
    ll_cols = ["ll_literal", "ll_credulous_T", "ll_credulous_F", "ll_vigilant_T", "ll_vigilant_F"]
    for col in ll_cols:
        mean_ll = subset[col].mean()
        print(f"  {col}: mean={mean_ll:.2f}")
    
    # Which model fits best (highest mean LL)?
    mean_lls = {col: subset[col].mean() for col in ll_cols}
    best_model = max(mean_lls, key=mean_lls.get)
    print(f"  Best fitting: {best_model}")

Processed 50/588 participants
Processed 100/588 participants
Processed 150/588 participants
Processed 200/588 participants
Processed 250/588 participants
Processed 300/588 participants
Processed 350/588 participants
Processed 400/588 participants
Processed 450/588 participants
Processed 500/588 participants
Processed 550/588 participants
Processed all 588 participants

RESULTS SUMMARY

CREDULOUS (n=187):
  ll_literal: mean=-12.23
  ll_credulous_T: mean=-12.63
  ll_credulous_F: mean=-12.22
  ll_vigilant_T: mean=-11.39
  ll_vigilant_F: mean=-11.49
  Best fitting: ll_vigilant_T

VIGILANT (n=197):
  ll_literal: mean=-13.28
  ll_credulous_T: mean=-13.50
  ll_credulous_F: mean=-13.10
  ll_vigilant_T: mean=-12.47
  ll_vigilant_F: mean=-12.57
  Best fitting: ll_vigilant_T

NATURALISTIC (n=204):
  ll_literal: mean=-12.47
  ll_credulous_T: mean=-12.92
  ll_credulous_F: mean=-12.44
  ll_vigilant_T: mean=-11.70
  ll_vigilant_F: mean=-11.82
  Best fitting: ll_vigilant_T


In [6]:
# ============================================================================
# COMPUTE LOG-LIKELIHOODS (alpha optimized at listener condition level)
# ============================================================================

ll_cols = ["ll_literal", "ll_credulous_T", "ll_credulous_F", "ll_vigilant_T", "ll_vigilant_F"]
listener_conditions = ["credulous", "vigilant", "naturalistic"]
pragmatic_models = ["credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]

# Store per-participant log-likelihoods for each model x alpha
# Structure: {listener_cond: {model: {alpha: [ll_per_participant]}}}
ll_by_condition = {cond: {} for cond in listener_conditions}

for listener_cond in listener_conditions:
    subset = df[df["listener_belief_condition"] == listener_cond]
    subset_indices = subset.index.tolist()
    
    # Literal (no alpha)
    lls = []
    for i, idx in enumerate(subset_indices):
        row = df.loc[idx]
        key = (row["speaker_condition"], int(row["sequence_idx"]))
        resp_idx = response_indices[df.index.get_loc(idx)]
        
        posteriors = smoothed_lookup[key]["literal"]
        probs = posteriors[np.arange(5), resp_idx]
        lls.append(np.sum(np.log(probs)))
    ll_by_condition[listener_cond]["literal"] = {"none": lls}
    
    # Pragmatic models
    for model_name in pragmatic_models:
        ll_by_condition[listener_cond][model_name] = {}
        
        for alpha in alpha_grid:
            lls = []
            for i, idx in enumerate(subset_indices):
                row = df.loc[idx]
                key = (row["speaker_condition"], int(row["sequence_idx"]))
                resp_idx = response_indices[df.index.get_loc(idx)]
                
                posteriors = smoothed_lookup[key][model_name][alpha]
                probs = posteriors[np.arange(5), resp_idx]
                lls.append(np.sum(np.log(probs)))
            
            ll_by_condition[listener_cond][model_name][alpha] = lls

# Find best alpha for each listener_cond x model (using mean LL)
best_alphas = {cond: {} for cond in listener_conditions}
mean_lls = {cond: {} for cond in listener_conditions}

for listener_cond in listener_conditions:
    # Literal
    mean_lls[listener_cond]["literal"] = np.mean(ll_by_condition[listener_cond]["literal"]["none"])
    best_alphas[listener_cond]["literal"] = None
    
    # Pragmatic
    for model_name in pragmatic_models:
        best_mean_ll = -np.inf
        best_alpha = None
        
        for alpha in alpha_grid:
            m = np.mean(ll_by_condition[listener_cond][model_name][alpha])
            if m > best_mean_ll:
                best_mean_ll = m
                best_alpha = alpha
        
        mean_lls[listener_cond][model_name] = best_mean_ll
        best_alphas[listener_cond][model_name] = best_alpha

# ============================================================================
# RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("RESULTS (alpha optimized per listener condition)")
print("=" * 60)

for listener_cond in listener_conditions:
    n = len(df[df["listener_belief_condition"] == listener_cond])
    print(f"\n{listener_cond.upper()} (n={n}):")
    
    for model_name in ["literal"] + pragmatic_models:
        m = mean_lls[listener_cond][model_name]
        a = best_alphas[listener_cond][model_name]
        if a is None:
            print(f"  {model_name}: mean_ll={m:.3f}")
        else:
            print(f"  {model_name}: mean_ll={m:.3f}, alpha={a:.3f}")
    
    # Best model
    best_model = max(mean_lls[listener_cond], key=mean_lls[listener_cond].get)
    print(f"  --> Best: {best_model}")


RESULTS (alpha optimized per listener condition)

CREDULOUS (n=187):
  literal: mean_ll=-12.229
  credulous_T: mean_ll=-12.802, alpha=1.000
  credulous_F: mean_ll=-12.384, alpha=1.000
  vigilant_T: mean_ll=-12.481, alpha=1.000
  vigilant_F: mean_ll=-12.406, alpha=1.000
  --> Best: literal

VIGILANT (n=197):
  literal: mean_ll=-13.278
  credulous_T: mean_ll=-13.704, alpha=1.000
  credulous_F: mean_ll=-13.296, alpha=1.000
  vigilant_T: mean_ll=-13.484, alpha=1.000
  vigilant_F: mean_ll=-13.412, alpha=1.000
  --> Best: literal

NATURALISTIC (n=204):
  literal: mean_ll=-12.474
  credulous_T: mean_ll=-13.033, alpha=1.000
  credulous_F: mean_ll=-12.591, alpha=1.000
  vigilant_T: mean_ll=-12.715, alpha=1.000
  vigilant_F: mean_ll=-12.635, alpha=1.000
  --> Best: literal


In [7]:
# ============================================================================
# COMPUTE LOG-LIKELIHOODS (alpha optimized at speaker x listener condition level)
# ============================================================================

ll_cols = ["ll_literal", "ll_credulous_T", "ll_credulous_F", "ll_vigilant_T", "ll_vigilant_F"]
listener_conditions = ["credulous", "vigilant", "naturalistic"]
speaker_conditions = ["informative", "pers_plus", "pers_minus"]
pragmatic_models = ["credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]

# Store per-participant log-likelihoods for each model x alpha
# Structure: {(speaker_cond, listener_cond): {model: {alpha: [ll_per_participant]}}}
ll_by_condition = {}

for speaker_cond in speaker_conditions:
    for listener_cond in listener_conditions:
        cell = (speaker_cond, listener_cond)
        ll_by_condition[cell] = {}
        
        subset = df[(df["speaker_condition"] == speaker_cond) & 
                    (df["listener_belief_condition"] == listener_cond)]
        subset_indices = subset.index.tolist()
        
        if len(subset_indices) == 0:
            continue
        
        # Literal (no alpha)
        lls = []
        for idx in subset_indices:
            row = df.loc[idx]
            key = (row["speaker_condition"], int(row["sequence_idx"]))
            resp_idx = response_indices[df.index.get_loc(idx)]
            
            posteriors = smoothed_lookup[key]["literal"]
            probs = posteriors[np.arange(5), resp_idx]
            lls.append(np.sum(np.log(probs)))
        ll_by_condition[cell]["literal"] = {"none": lls}
        
        # Pragmatic models
        for model_name in pragmatic_models:
            ll_by_condition[cell][model_name] = {}
            
            for alpha in alpha_grid:
                lls = []
                for idx in subset_indices:
                    row = df.loc[idx]
                    key = (row["speaker_condition"], int(row["sequence_idx"]))
                    resp_idx = response_indices[df.index.get_loc(idx)]
                    
                    posteriors = smoothed_lookup[key][model_name][alpha]
                    probs = posteriors[np.arange(5), resp_idx]
                    lls.append(np.sum(np.log(probs)))
                
                ll_by_condition[cell][model_name][alpha] = lls

# Find best alpha for each cell x model (using mean LL)
best_alphas = {}
mean_lls = {}

for speaker_cond in speaker_conditions:
    for listener_cond in listener_conditions:
        cell = (speaker_cond, listener_cond)
        best_alphas[cell] = {}
        mean_lls[cell] = {}
        
        if cell not in ll_by_condition or len(ll_by_condition[cell]) == 0:
            continue
        
        # Literal
        mean_lls[cell]["literal"] = np.mean(ll_by_condition[cell]["literal"]["none"])
        best_alphas[cell]["literal"] = None
        
        # Pragmatic
        for model_name in pragmatic_models:
            best_mean_ll = -np.inf
            best_alpha = None
            
            for alpha in alpha_grid:
                m = np.mean(ll_by_condition[cell][model_name][alpha])
                if m > best_mean_ll:
                    best_mean_ll = m
                    best_alpha = alpha
            
            mean_lls[cell][model_name] = best_mean_ll
            best_alphas[cell][model_name] = best_alpha

# ============================================================================
# RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("RESULTS (alpha optimized per speaker x listener condition)")
print("=" * 60)

for speaker_cond in speaker_conditions:
    for listener_cond in listener_conditions:
        cell = (speaker_cond, listener_cond)
        subset = df[(df["speaker_condition"] == speaker_cond) & 
                    (df["listener_belief_condition"] == listener_cond)]
        n = len(subset)
        
        print(f"\n{speaker_cond.upper()} x {listener_cond.upper()} (n={n}):")
        
        for model_name in ["literal"] + pragmatic_models:
            m = mean_lls[cell][model_name]
            a = best_alphas[cell][model_name]
            if a is None:
                print(f"  {model_name}: mean_ll={m:.3f}")
            else:
                print(f"  {model_name}: mean_ll={m:.3f}, alpha={a:.3f}")
        
        # Best model
        best_model = max(mean_lls[cell], key=mean_lls[cell].get)
        print(f"  --> Best: {best_model}")


RESULTS (alpha optimized per speaker x listener condition)

INFORMATIVE x CREDULOUS (n=65):
  literal: mean_ll=-7.960
  credulous_T: mean_ll=-9.078, alpha=1.000
  credulous_F: mean_ll=-9.078, alpha=1.000
  vigilant_T: mean_ll=-8.456, alpha=1.000
  vigilant_F: mean_ll=-8.568, alpha=1.000
  --> Best: literal

INFORMATIVE x VIGILANT (n=58):
  literal: mean_ll=-9.964
  credulous_T: mean_ll=-10.945, alpha=1.000
  credulous_F: mean_ll=-10.945, alpha=1.000
  vigilant_T: mean_ll=-10.397, alpha=1.000
  vigilant_F: mean_ll=-10.496, alpha=1.000
  --> Best: literal

INFORMATIVE x NATURALISTIC (n=69):
  literal: mean_ll=-8.484
  credulous_T: mean_ll=-9.522, alpha=1.000
  credulous_F: mean_ll=-9.522, alpha=1.000
  vigilant_T: mean_ll=-8.950, alpha=1.000
  vigilant_F: mean_ll=-9.055, alpha=1.000
  --> Best: literal

PERS_PLUS x CREDULOUS (n=74):
  literal: mean_ll=-14.686
  credulous_T: mean_ll=-15.003, alpha=1.000
  credulous_F: mean_ll=-14.165, alpha=50.000
  vigilant_T: mean_ll=-14.441, alpha=6.9

In [8]:
# ============================================================================
# COMPUTE LOG-LIKELIHOODS (alpha optimized at sequence x listener condition level)
# ============================================================================

listener_conditions = ["credulous", "vigilant", "naturalistic"]
pragmatic_models = ["credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]

# Get all unique sequences
sequences = [
    ("informative", 0), ("informative", 1),
    ("pers_plus", 0), ("pers_plus", 1), ("pers_plus", 2),
    ("pers_minus", 0), ("pers_minus", 1), ("pers_minus", 2),
]

# Store per-participant log-likelihoods for each model x alpha
# Structure: {(seq, listener_cond): {model: {alpha: [ll_per_participant]}}}
ll_by_condition = {}

for seq in sequences:
    speaker_cond, seq_idx = seq
    for listener_cond in listener_conditions:
        cell = (seq, listener_cond)
        ll_by_condition[cell] = {}
        
        subset = df[(df["speaker_condition"] == speaker_cond) & 
                    (df["sequence_idx"] == seq_idx) &
                    (df["listener_belief_condition"] == listener_cond)]
        subset_indices = subset.index.tolist()
        
        if len(subset_indices) == 0:
            continue
        
        # Literal (no alpha)
        lls = []
        for idx in subset_indices:
            row = df.loc[idx]
            key = (row["speaker_condition"], int(row["sequence_idx"]))
            resp_idx = response_indices[df.index.get_loc(idx)]
            
            posteriors = smoothed_lookup[key]["literal"]
            probs = posteriors[np.arange(5), resp_idx]
            lls.append(np.sum(np.log(probs)))
        ll_by_condition[cell]["literal"] = {"none": lls}
        
        # Pragmatic models
        for model_name in pragmatic_models:
            ll_by_condition[cell][model_name] = {}
            
            for alpha in alpha_grid:
                lls = []
                for idx in subset_indices:
                    row = df.loc[idx]
                    key = (row["speaker_condition"], int(row["sequence_idx"]))
                    resp_idx = response_indices[df.index.get_loc(idx)]
                    
                    posteriors = smoothed_lookup[key][model_name][alpha]
                    probs = posteriors[np.arange(5), resp_idx]
                    lls.append(np.sum(np.log(probs)))
                
                ll_by_condition[cell][model_name][alpha] = lls

# Find best alpha for each cell x model (using mean LL)
best_alphas = {}
mean_lls = {}

for seq in sequences:
    for listener_cond in listener_conditions:
        cell = (seq, listener_cond)
        best_alphas[cell] = {}
        mean_lls[cell] = {}
        
        if cell not in ll_by_condition or len(ll_by_condition[cell]) == 0:
            continue
        
        # Literal
        mean_lls[cell]["literal"] = np.mean(ll_by_condition[cell]["literal"]["none"])
        best_alphas[cell]["literal"] = None
        
        # Pragmatic
        for model_name in pragmatic_models:
            best_mean_ll = -np.inf
            best_alpha = None
            
            for alpha in alpha_grid:
                m = np.mean(ll_by_condition[cell][model_name][alpha])
                if m > best_mean_ll:
                    best_mean_ll = m
                    best_alpha = alpha
            
            mean_lls[cell][model_name] = best_mean_ll
            best_alphas[cell][model_name] = best_alpha

# ============================================================================
# RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("RESULTS (alpha optimized per sequence x listener condition)")
print("=" * 60)

for seq in sequences:
    speaker_cond, seq_idx = seq
    for listener_cond in listener_conditions:
        cell = (seq, listener_cond)
        subset = df[(df["speaker_condition"] == speaker_cond) & 
                    (df["sequence_idx"] == seq_idx) &
                    (df["listener_belief_condition"] == listener_cond)]
        n = len(subset)
        
        if n == 0:
            continue
        
        print(f"\n{speaker_cond}[{seq_idx}] x {listener_cond.upper()} (n={n}):")
        
        for model_name in ["literal"] + pragmatic_models:
            m = mean_lls[cell][model_name]
            a = best_alphas[cell][model_name]
            if a is None:
                print(f"  {model_name}: mean_ll={m:.3f}")
            else:
                print(f"  {model_name}: mean_ll={m:.3f}, alpha={a:.3f}")
        
        # Best model
        best_model = max(mean_lls[cell], key=mean_lls[cell].get)
        print(f"  --> Best: {best_model}")


RESULTS (alpha optimized per sequence x listener condition)

informative[0] x CREDULOUS (n=29):
  literal: mean_ll=-9.145
  credulous_T: mean_ll=-10.241, alpha=1.000
  credulous_F: mean_ll=-10.241, alpha=1.000
  vigilant_T: mean_ll=-9.641, alpha=1.000
  vigilant_F: mean_ll=-9.757, alpha=1.000
  --> Best: literal

informative[0] x VIGILANT (n=26):
  literal: mean_ll=-10.911
  credulous_T: mean_ll=-11.696, alpha=1.000
  credulous_F: mean_ll=-11.696, alpha=1.000
  vigilant_T: mean_ll=-11.244, alpha=1.000
  vigilant_F: mean_ll=-11.323, alpha=1.000
  --> Best: literal

informative[0] x NATURALISTIC (n=36):
  literal: mean_ll=-8.544
  credulous_T: mean_ll=-9.635, alpha=1.000
  credulous_F: mean_ll=-9.635, alpha=1.000
  vigilant_T: mean_ll=-9.033, alpha=1.000
  vigilant_F: mean_ll=-9.142, alpha=1.000
  --> Best: literal

informative[1] x CREDULOUS (n=36):
  literal: mean_ll=-7.005
  credulous_T: mean_ll=-8.141, alpha=1.000
  credulous_F: mean_ll=-8.141, alpha=1.000
  vigilant_T: mean_ll=-7.5

In [9]:
# ============================================================================
# DISTRIBUTION DISTANCE APPROACH
# ============================================================================

import numpy as np
from scipy.spatial.distance import jensenshannon
from scipy.stats import pearsonr

listener_conditions = ["credulous", "vigilant", "naturalistic"]
pragmatic_models = ["credulous_T", "credulous_F", "vigilant_T", "vigilant_F"]
all_models = ["literal"] + pragmatic_models

sequences = [
    ("informative", 0), ("informative", 1),
    ("pers_plus", 0), ("pers_plus", 1), ("pers_plus", 2),
    ("pers_minus", 0), ("pers_minus", 1), ("pers_minus", 2),
]

n_theta = 21
n_rounds = 5

# ============================================================================
# COMPUTE EMPIRICAL DISTRIBUTIONS
# ============================================================================

# Structure: {(seq, listener_cond): np.array of shape (5, 21)}
empirical_dists = {}

for seq in sequences:
    speaker_cond, seq_idx = seq
    for listener_cond in listener_conditions:
        cell = (seq, listener_cond)
        
        subset = df[(df["speaker_condition"] == speaker_cond) & 
                    (df["sequence_idx"] == seq_idx) &
                    (df["listener_belief_condition"] == listener_cond)]
        
        if len(subset) == 0:
            continue
        
        # Get responses for each round
        dist = np.zeros((n_rounds, n_theta))
        for r in range(n_rounds):
            responses = subset[f"r{r+1}_effectiveness"].values  # 0-100
            indices = (responses / 5).astype(int)
            indices = np.clip(indices, 0, n_theta - 1)
            
            # Count histogram
            for idx in indices:
                dist[r, idx] += 1
            
            # Normalize to probability
            dist[r] /= dist[r].sum()
        
        empirical_dists[cell] = dist

print(f"Computed empirical distributions for {len(empirical_dists)} cells")

# ============================================================================
# COMPUTE DISTANCES FOR EACH MODEL x ALPHA
# ============================================================================

def compute_distance(empirical, predicted, metric="js"):
    """Compute distance between two distributions."""
    if metric == "js":
        return jensenshannon(empirical, predicted)
    elif metric == "correlation":
        r, _ = pearsonr(empirical, predicted)
        return 1 - r  # Convert to distance (0 = perfect match)
    elif metric == "mse":
        return np.mean((empirical - predicted) ** 2)

# Structure: {(seq, listener_cond): {model: {alpha: mean_distance_across_rounds}}}
distances = {}

for cell in empirical_dists:
    seq, listener_cond = cell
    distances[cell] = {}
    
    emp = empirical_dists[cell]  # shape (5, 21)
    
    # Literal (no alpha)
    pred = smoothed_lookup[seq]["literal"]  # shape (5, 21)
    round_dists = [compute_distance(emp[r], pred[r]) for r in range(n_rounds)]
    distances[cell]["literal"] = {"none": np.mean(round_dists)}
    
    # Pragmatic models
    for model_name in pragmatic_models:
        distances[cell][model_name] = {}
        
        for alpha in alpha_grid:
            pred = smoothed_lookup[seq][model_name][alpha]
            round_dists = [compute_distance(emp[r], pred[r]) for r in range(n_rounds)]
            distances[cell][model_name][alpha] = np.mean(round_dists)

# ============================================================================
# FIND BEST ALPHA AND SUMMARIZE
# ============================================================================

best_alphas = {}
best_distances = {}

for cell in distances:
    best_alphas[cell] = {}
    best_distances[cell] = {}
    
    # Literal
    best_distances[cell]["literal"] = distances[cell]["literal"]["none"]
    best_alphas[cell]["literal"] = None
    
    # Pragmatic
    for model_name in pragmatic_models:
        best_dist = np.inf
        best_alpha = None
        
        for alpha in alpha_grid:
            d = distances[cell][model_name][alpha]
            if d < best_dist:
                best_dist = d
                best_alpha = alpha
        
        best_distances[cell][model_name] = best_dist
        best_alphas[cell][model_name] = best_alpha

# ============================================================================
# RESULTS BY CELL
# ============================================================================

print("\n" + "=" * 60)
print("RESULTS: JS DISTANCE (lower = better)")
print("=" * 60)

for seq in sequences:
    speaker_cond, seq_idx = seq
    for listener_cond in listener_conditions:
        cell = (seq, listener_cond)
        
        if cell not in best_distances:
            continue
        
        subset = df[(df["speaker_condition"] == speaker_cond) & 
                    (df["sequence_idx"] == seq_idx) &
                    (df["listener_belief_condition"] == listener_cond)]
        n = len(subset)
        
        print(f"\n{speaker_cond}[{seq_idx}] x {listener_cond.upper()} (n={n}):")
        
        for model_name in all_models:
            d = best_distances[cell][model_name]
            a = best_alphas[cell][model_name]
            if a is None:
                print(f"  {model_name}: JS={d:.4f}")
            else:
                print(f"  {model_name}: JS={d:.4f}, alpha={a:.3f}")
        
        best_model = min(best_distances[cell], key=best_distances[cell].get)
        print(f"  --> Best: {best_model}")

# ============================================================================
# AGGREGATE BY LISTENER CONDITION
# ============================================================================

print("\n" + "=" * 60)
print("AGGREGATE BY LISTENER CONDITION (mean JS distance)")
print("=" * 60)

for listener_cond in listener_conditions:
    cells = [c for c in best_distances if c[1] == listener_cond]
    
    print(f"\n{listener_cond.upper()} ({len(cells)} cells):")
    
    for model_name in all_models:
        mean_dist = np.mean([best_distances[c][model_name] for c in cells])
        print(f"  {model_name}: {mean_dist:.4f}")
    
    # Best model
    mean_dists = {m: np.mean([best_distances[c][m] for c in cells]) for m in all_models}
    best_model = min(mean_dists, key=mean_dists.get)
    print(f"  --> Best: {best_model}")

Computed empirical distributions for 24 cells

RESULTS: JS DISTANCE (lower = better)

informative[0] x CREDULOUS (n=29):
  literal: JS=0.4621
  credulous_T: JS=0.5135, alpha=1.000
  credulous_F: JS=0.5135, alpha=1.000
  vigilant_T: JS=0.4883, alpha=1.000
  vigilant_F: JS=0.4943, alpha=1.000
  --> Best: literal

informative[0] x VIGILANT (n=26):
  literal: JS=0.4233
  credulous_T: JS=0.4693, alpha=1.000
  credulous_F: JS=0.4693, alpha=1.000
  vigilant_T: JS=0.4459, alpha=1.000
  vigilant_F: JS=0.4512, alpha=1.000
  --> Best: literal

informative[0] x NATURALISTIC (n=36):
  literal: JS=0.4260
  credulous_T: JS=0.4836, alpha=1.000
  credulous_F: JS=0.4836, alpha=1.000
  vigilant_T: JS=0.4559, alpha=1.000
  vigilant_F: JS=0.4626, alpha=1.000
  --> Best: literal

informative[1] x CREDULOUS (n=36):
  literal: JS=0.3699
  credulous_T: JS=0.4336, alpha=1.000
  credulous_F: JS=0.4336, alpha=1.000
  vigilant_T: JS=0.4021, alpha=1.000
  vigilant_F: JS=0.4092, alpha=1.000
  --> Best: literal

info