In [2]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd() / "notebooks"))  # so we can import _utils from notebooks/

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from _utils import get_data_dir

DATA_DIR = get_data_dir()
DATA_DIR


WindowsPath('C:/Users/AdamR/OneDrive/UCSB/VIU/HonorsThesis/data')

In [3]:
#list(DATA_DIR.glob("**/*"))

In [4]:
# === Load human + model CSVs for 50_50, 80_20, and 100_0 datasets ===
from pathlib import Path
import pandas as pd

ROOT = DATA_DIR  # already defined in your environment
FOLDERS = ["50_50", "80_20", "100_0"]

def safe_read_csv(path: Path, **kwargs) -> pd.DataFrame:
    """Robust CSV reader with fallback parsing."""
    defaults = dict(low_memory=False, encoding_errors="ignore")
    defaults.update(kwargs)
    try:
        return pd.read_csv(path, **defaults)
    except Exception:
        return pd.read_csv(path, engine="python", sep=None, **defaults)

def load_dataset(folder: Path) -> dict:
    """Load human_data.csv and all model decision CSVs inside 'decisions/'."""
    human_path = folder / "human_data.csv"
    decisions_dir = folder / "decisions"

    if not human_path.exists():
        raise FileNotFoundError(f"Missing human_data.csv in {folder}")
    if not decisions_dir.exists():
        raise FileNotFoundError(f"Missing 'decisions/' subfolder in {folder}")

    # Load human data
    human_df = safe_read_csv(human_path)

    # Load each model file
    models = {}
    for csv_path in sorted(decisions_dir.glob("*.csv")):
        model_name = csv_path.stem
        models[model_name] = safe_read_csv(csv_path)

    return {
        "human": human_df,
        "human_path": human_path,
        "models": models,
        "model_paths": {m: csv_path for m, csv_path in zip(models.keys(), sorted(decisions_dir.glob('*.csv')))}
    }

# === Main loading loop ===
datasets: dict[str, dict] = {}
records = []

for name in FOLDERS:
    folder = ROOT / name
    if not folder.exists():
        print(f"⚠️ Warning: Folder '{name}' not found under {ROOT}")
        continue

    data_bundle = load_dataset(folder)
    datasets[name] = data_bundle

    # Record human file
    h = data_bundle["human"]
    records.append({
        "dataset": name,
        "kind": "human",
        "name": "human_data",
        "n_rows": len(h),
        "n_cols": h.shape[1],
        "path": str(data_bundle["human_path"].resolve())
    })

    # Record model files
    for mname, mdf in data_bundle["models"].items():
        records.append({
            "dataset": name,
            "kind": "model",
            "name": mname,
            "n_rows": len(mdf),
            "n_cols": mdf.shape[1],
            "path": str((folder / "decisions" / f"{mname}.csv").resolve())
        })

# === Summary table ===
assignment_index = pd.DataFrame.from_records(records).sort_values(
    ["dataset", "kind", "name"]
).reset_index(drop=True)

print(f"✅ Loaded datasets: {list(datasets.keys())}")


✅ Loaded datasets: ['50_50', '80_20', '100_0']


In [5]:
#column titles

def get_csv_columns(csv_path):
    """
    Return a list of column names from a CSV file.
    Accepts either a string/Path to a file or a pandas DataFrame.
    """
    if isinstance(csv_path, pd.DataFrame):
        return list(csv_path.columns)
    
    csv_path = Path(csv_path)
    if not csv_path.exists():
        raise FileNotFoundError(f"File not found: {csv_path}")
    
    # Read only the header row
    df = pd.read_csv(csv_path, nrows=0, encoding_errors="ignore")
    return list(df.columns)



In [6]:
#Get each dataset into df variables

human50_50 = datasets["50_50"]["human"]
human80_20 = datasets["80_20"]["human"]
human100_0 = datasets["100_0"]["human"]
human50_50_columns = get_csv_columns(human50_50)

human_all = pd.concat([human50_50, human80_20, human100_0], ignore_index=True)

claude35haiku = datasets["50_50"]["models"]

display(human50_50.head(5))
print(human50_50_columns)

Unnamed: 0,stimID,condition,response,side_selected,cue_points,line1_angle,line2_angle,valid_cue,TP,participantID
0,100,condition_2,6,1,2,14.314827,1.921956,False,True,SA
1,845,condition_2,5,1,2,15.054317,4.22223,False,True,SA
2,245,condition_2,4,1,1,14.314827,6.508956,True,True,SA
3,72,condition_2,4,2,2,8.775056,15.054317,True,True,SA
4,469,condition_2,4,2,2,4.22223,19.885165,True,True,SA


['stimID', 'condition', 'response', 'side_selected', 'cue_points', 'line1_angle', 'line2_angle', 'valid_cue', 'TP', 'participantID']


In [23]:
import pandas as pd
import numpy as np

# HUMAN_DF expected columns:
# ['stimID', 'participantID', 'response', 'TP', 'condition']
# response is a 1–6 confidence rating (>= threshold => "present")

def _compute_metrics(g, choice_col="present_choice"):
    n_signal = int((g["TP"] == 1).sum())
    n_noise  = int((g["TP"] == 0).sum())
    n_total  = int(len(g))
    hit_rate = float(np.mean(g.loc[g["TP"] == 1, choice_col])) if n_signal > 0 else np.nan
    fa_rate  = float(np.mean(g.loc[g["TP"] == 0, choice_col])) if n_noise  > 0 else np.nan
    acc      = float(np.mean(g[choice_col] == g["TP"])) if n_total > 0 else np.nan
    return pd.Series({
        "hit_rate": hit_rate, "fa_rate": fa_rate, "accuracy": acc,
        "n_signal": n_signal, "n_noise": n_noise, "n_total": n_total
    })

def human_metrics_per_participant(HUMAN_DF: pd.DataFrame, threshold: int = 4):
    df = HUMAN_DF.copy()
    df["TP"] = pd.to_numeric(df["TP"], errors="coerce").astype(int)
    df["response"] = pd.to_numeric(df["response"], errors="coerce")
    df["present_choice"] = (df["response"] >= threshold).astype(int)

    # Per-condition per-participant
    per_cond = (
        df.groupby(["participantID", "condition"], as_index=False)
          .apply(lambda g: _compute_metrics(g))
          .reset_index(drop=True)
          .sort_values(["participantID", "condition"])
    )

    # Overall (across all conditions) per-participant
    overall = (
        df.groupby(["participantID"], as_index=False)
          .apply(lambda g: _compute_metrics(g))
          .reset_index(drop=True)
          .assign(condition="ALL")
          .sort_values(["participantID"])
    )

    return per_cond, overall
human_per_cond, human_overall = human_metrics_per_participant(human_all, threshold=4)
display(human_overall[["participantID", "hit_rate", "fa_rate", "accuracy"]])


  .apply(lambda g: _compute_metrics(g))
  .apply(lambda g: _compute_metrics(g))


Unnamed: 0,participantID,hit_rate,fa_rate,accuracy
0,AG,0.824667,0.500667,0.662
1,AW,0.703333,0.461333,0.621
2,AZ,0.646,0.586,0.53
3,BC,0.674667,0.603333,0.535667
4,CY,0.576,0.501333,0.537333
5,GS,0.53,0.456,0.537
6,HG,0.694667,0.379333,0.657667
7,JH,0.631333,0.279333,0.676
8,KM,0.647333,0.205333,0.721
9,KZ,0.372,0.138,0.617


In [8]:
# Majority-vote accuracy for three HUMAN DataFrames, then combined
# Expects in-memory DataFrames:
#   human50_50, human80_20, human100_0
#
# Assumed schema in each df (same as before):
#   stimID, condition, response (1–6), side_selected, cue_points,
#   line1_angle, line2_angle, valid_cue (0|1), TP (0|1), participantID

import pandas as pd
import numpy as np

def compute_trial_table(df: pd.DataFrame, cond_label: str | None = None, tie_rule: str = ">"):
    """
    Build a trial-level table with majority-vote prediction per (stimID, condition),
    with truth defined per (stimID, condition). Raw uniqueness is enforced at
    (stimID, condition, participantID).
    
    tie_rule: ">" means ties (==0.5) -> 0 (absent); ">=" means ties -> 1 (present).
    If cond_label is provided, it overrides/sets the 'condition' for all trials in df.
    Returns: (trial_table: pd.DataFrame, overall_acc: float)
    """
    required = {
        "stimID", "response", "TP", "participantID",
        "side_selected", "cue_points", "line1_angle", "line2_angle", "valid_cue"
    }
    missing = required - set(df.columns)
    if missing:
        raise KeyError(f"Missing required columns: {sorted(missing)}")

    df = df.copy()

    # Ensure 'condition' present (use override if provided)
    if cond_label is not None:
        df["condition"] = cond_label
    elif "condition" not in df.columns:
        raise KeyError("Column 'condition' missing and no cond_label provided.")

    # Parse response (map 1–6 -> binary present/absent)
    if not np.issubdtype(df["response"].dtype, np.number):
        df["response"] = pd.to_numeric(df["response"], errors="coerce")
    if df["response"].isna().any():
        bad = df.loc[df["response"].isna(), ["stimID", "participantID"]].drop_duplicates()
        raise ValueError(
            f"Some 'response' values could not be parsed as numbers (1–6). "
            f"Examples (stimID, participantID): {bad.head(5).to_dict('records')}"
        )
    df["human_decision"] = (df["response"] >= 4).astype(int)

    # --- Enforce raw uniqueness at (stimID, condition, participantID) ---
    # If a participant has multiple rows for the same (stimID, condition),
    # collapse to that participant's MEAN binary decision for that trial.
    per_participant = (
        df.groupby(["stimID", "condition", "participantID"], as_index=False)
          .agg(human_decision=("human_decision", "mean"),
               TP=("TP", lambda s: s.mode().iloc[0] if not s.mode().empty else int(round(s.mean()))))
    )
    # human_decision is in [0,1]; keep as float until voting

    # Majority vote per (stimID, condition) across participants
    prop_present = (
        per_participant.groupby(["stimID", "condition"], as_index=False)["human_decision"]
                       .mean()
                       .rename(columns={"human_decision": "prop_present"})
    )
    if tie_rule not in {">", ">="}:
        raise ValueError("tie_rule must be '>' or '>='.")
    prop_present["pred"] = (
        (prop_present["prop_present"] >= 0.5) if tie_rule == ">=" else (prop_present["prop_present"] > 0.5)
    ).astype(int)

    # Truth per (stimID, condition) from raw df (mode fallback)
    tp_per_trial = df.groupby(["stimID", "condition"])["TP"]
    tp_mode = tp_per_trial.agg(lambda s: s.mode().iloc[0] if not s.mode().empty else int(round(s.mean()))).astype(int)
    tp_incons = tp_per_trial.nunique()
    n_bad = int((tp_incons > 1).sum())
    if n_bad > 0:
        print(f"Warning: {n_bad} (stimID, condition) group(s) had inconsistent TP; using per-group mode.")

    # Number of unique participants who voted per (stimID, condition)
    n_voters = per_participant.groupby(["stimID", "condition"])["participantID"].nunique().rename("n_participants")

    # Assemble trial-level table
    trial_table = (
        prop_present.merge(tp_mode.rename("truth"), on=["stimID", "condition"], how="left")
                    .merge(n_voters, on=["stimID", "condition"], how="left")
                    .drop(columns=["prop_present"])
                    .sort_values(["condition", "stimID"])
                    .reset_index(drop=True)
    )

    # Accuracy
    overall_acc = (trial_table["pred"] == trial_table["truth"]).mean()

    # Composite key to protect uniqueness across conditions
    trial_table["trial_key"] = trial_table["condition"].astype(str) + "|" + trial_table["stimID"].astype(str)

    return trial_table, float(overall_acc)


# ---- Compute for each condition-specific DataFrame ----
tt_50, acc_50 = compute_trial_table(human50_50, cond_label="50/50", tie_rule=">")
tt_80, acc_80 = compute_trial_table(human80_20, cond_label="80/20", tie_rule=">")
tt_100, acc_100 = compute_trial_table(human100_0, cond_label="100/0", tie_rule=">")

print(f"Human majority-vote accuracy — 50/50: {acc_50:.4f} (unique trials: {tt_50[['stimID','condition']].drop_duplicates().shape[0]})")
print(f"Human majority-vote accuracy — 80/20: {acc_80:.4f} (unique trials: {tt_80[['stimID','condition']].drop_duplicates().shape[0]})")
print(f"Human majority-vote accuracy — 100/0: {acc_100:.4f} (unique trials: {tt_100[['stimID','condition']].drop_duplicates().shape[0]})")

# ---- Combine and summarize ----
combined = pd.concat([tt_50, tt_80, tt_100], ignore_index=True)

# Sanity: check that combined has unique (stimID, condition) via trial_key
if combined["trial_key"].duplicated().any():
    dups = combined.loc[combined["trial_key"].duplicated(), "trial_key"].nunique()
    print(f"Note: {dups} duplicated trial_key values found in combined data.")

overall_acc = (combined["pred"] == combined["truth"]).mean()

by_condition = (
    combined.assign(correct=lambda x: (x["pred"] == x["truth"]).astype(int))
            .groupby("condition", as_index=False)
            .agg(trials=("stimID", "nunique"),
                 participants_per_trial=("n_participants", "mean"),
                 accuracy=("correct", "mean"))
            .sort_values("condition")
)

print(f"\nHuman majority-vote accuracy — COMBINED: {overall_acc:.4f}")
display(by_condition)


Human majority-vote accuracy — 50/50: 0.6820 (unique trials: 1000)
Human majority-vote accuracy — 80/20: 0.6650 (unique trials: 1000)
Human majority-vote accuracy — 100/0: 0.7810 (unique trials: 1000)

Human majority-vote accuracy — COMBINED: 0.7093


Unnamed: 0,condition,trials,participants_per_trial,accuracy
0,100/0,1000,12.0,0.781
1,50/50,1000,12.0,0.682
2,80/20,1000,12.0,0.665


In [9]:
# Majority-with-Exceptions accuracy for HUMAN trials
# --------------------------------------------------
# Expected columns in `human_df` (single DF or concatenation of all conditions):
#   stimID, condition (50/50|80/20|100/0), response (1–6), TP (0|1), participantID, ...

import pandas as pd
import numpy as np

# --- CONFIG ---
present_exception_threshold = 6    # set to 5 to "leave room for 5 and 6"
absent_exception_threshold  = None # e.g., 1 or 2 if you want an ABSENT-side exception
exception_priority = "present"     # "present" or "absent"
tie_rule = ">"                     # ">" -> ties -> ABSENT; ">=" -> ties -> PRESENT

# Set this to your DataFrame name if different
human_df = human_all  # or pd.concat([human50_50, human80_20, human100_0], ignore_index=True)

# --- Sanity checks ---
required = {"stimID", "condition", "response", "TP", "participantID"}
missing = required - set(human_df.columns)
if missing:
    raise KeyError(f"Missing required columns: {sorted(missing)}")

df = human_df.copy()

# Numeric response
if not np.issubdtype(df["response"].dtype, np.number):
    df["response"] = pd.to_numeric(df["response"], errors="coerce")
if df["response"].isna().any():
    bad = df.loc[df["response"].isna(), ["stimID", "condition", "participantID"]].drop_duplicates()
    raise ValueError(
        "Some 'response' values could not be parsed as numbers (1–6). "
        f"Examples (stimID, condition, participantID): {bad.head(5).to_dict('records')}"
    )

# Binary decision for fallback majority
df["dec_bin"] = (df["response"] >= 4).astype(int)

# --- Collapse to per-participant votes to enforce uniqueness at (stimID, condition, participantID) ---
# For exception rules:
#   - present-exception looks for any response >= threshold  -> use per-participant MAX
#   - absent-exception  looks for any response <= threshold  -> use per-participant MIN
# Fallback majority uses mean of dec_bin per participant to avoid double-counting duplicates.
per_participant = (
    df.groupby(["stimID", "condition", "participantID"], as_index=False)
      .agg(resp_max=("response", "max"),
           resp_min=("response", "min"),
           dec_bin_mean=("dec_bin", "mean"))
)

# --- Truth per (stimID, condition) from the raw df (mode fallback) ---
tp_per_sc = df.groupby(["stimID", "condition"])["TP"]
tp_mode = tp_per_sc.agg(lambda s: s.mode().iloc[0] if not s.mode().empty else int(round(s.mean()))).astype(int)
tp_incons = tp_per_sc.nunique()
n_bad = int((tp_incons > 1).sum())
if n_bad > 0:
    print(f"Warning: {n_bad} (stimID, condition) group(s) had inconsistent TP; using per-group mode.")

# --- Group-level decision per (stimID, condition) ---
def decide_group_pred_from_pp(g_pp: pd.DataFrame) -> pd.Series:
    # Exception checks (from per-participant aggregates)
    has_present_exc = (g_pp["resp_max"] >= present_exception_threshold).any() if present_exception_threshold is not None else False
    has_absent_exc  = (g_pp["resp_min"] <= absent_exception_threshold).any()  if absent_exception_threshold  is not None else False

    exception_type = "none"
    pred = None

    if has_present_exc and has_absent_exc:
        if exception_priority == "present":
            pred = 1
            exception_type = "present"
        elif exception_priority == "absent":
            pred = 0
            exception_type = "absent"
        else:
            pred = None  # unspecified -> fallback to majority
    elif has_present_exc:
        pred = 1
        exception_type = "present"
    elif has_absent_exc:
        pred = 0
        exception_type = "absent"

    # Majority fallback (if no decisive exception)
    if pred is None:
        prop_present = g_pp["dec_bin_mean"].mean()
        pred = int(prop_present >= 0.5) if tie_rule == ">=" else int(prop_present > 0.5)
        exception_type = "none"

    return pd.Series({
        "pred": int(pred),
        "n_participants": int(g_pp["participantID"].nunique()),
        "present_exception": bool(has_present_exc),
        "absent_exception": bool(has_absent_exc),
        "exception_type": exception_type,
        "prop_present_majority": float(g_pp["dec_bin_mean"].mean())
    })

group_decisions = (
    per_participant.groupby(["stimID", "condition"], as_index=False)
                   .apply(decide_group_pred_from_pp)
                   .reset_index(drop=True)
)

# Attach truth
trial_table = group_decisions.merge(tp_mode.rename("truth"), on=["stimID", "condition"], how="left")

# Diagnostics & accuracy
trial_table["correct"] = (trial_table["pred"] == trial_table["truth"]).astype(int)
trial_table["trial_key"] = trial_table["condition"].astype(str) + "|" + trial_table["stimID"].astype(str)

overall_acc = trial_table["correct"].mean()

by_condition = (
    trial_table.groupby("condition", as_index=False)
               .agg(trials=("stimID", "nunique"),
                    participants_per_trial=("n_participants", "mean"),
                    accuracy=("correct", "mean"),
                    pct_exception=("exception_type", lambda s: (s != "none").mean()),
                    pct_present_exc=("present_exception", "mean"),
                    pct_absent_exc=("absent_exception", "mean"))
               .sort_values("condition")
)

# Exception diagnostics (counts & shares)
exc_counts = trial_table["exception_type"].value_counts(dropna=False).rename("count")
exc_share  = (exc_counts / len(trial_table)).rename("share")

print(
    f"Majority-with-Exceptions (present≥{present_exception_threshold}"
    f"{'' if absent_exception_threshold is None else f', absent≤{absent_exception_threshold}'}; "
    f"priority={exception_priority}; tie='{tie_rule}')"
)
print(f"Overall accuracy: {overall_acc:.4f} | Trials (stimID×condition): {trial_table[['stimID','condition']].drop_duplicates().shape[0]}")

display(by_condition)
display(pd.concat([exc_counts, exc_share], axis=1))


Majority-with-Exceptions (present≥6; priority=present; tie='>')
Overall accuracy: 0.6433 | Trials (stimID×condition): 3000


  .apply(decide_group_pred_from_pp)


Unnamed: 0,condition,trials,participants_per_trial,accuracy,pct_exception,pct_present_exc,pct_absent_exc
0,condition_1,1000,12.0,0.627,0.762,0.762,0.0
1,condition_2,1000,12.0,0.593,0.821,0.821,0.0
2,condition_3,1000,12.0,0.71,0.735,0.735,0.0


Unnamed: 0_level_0,count,share
exception_type,Unnamed: 1_level_1,Unnamed: 2_level_1
present,2318,0.772667
none,682,0.227333


In [10]:
# Averaging Rule accuracy for HUMAN trials
# ----------------------------------------
# Schema expected:
#   stimID, condition (50/50|80/20|100/0), response (1–6), TP (0|1), participantID
#
# Rule:
#   - Collapse to per-participant mean response for each (stimID, condition).
#   - Compute the mean across participants (mean_conf) for each (stimID, condition).
#   - If mean_conf > criterion → predict PRESENT; else → ABSENT (tie control via tie_rule).

import pandas as pd
import numpy as np

# === Config ===
human_df = human_all.copy()        # or pd.concat([human50_50, human80_20, human100_0], ignore_index=True)
tie_rule = ">"                     # ">" = strict, ">=" = equal means -> PRESENT

# Criterion options:
# 1) Fixed numeric threshold (e.g., 3.5)
# 2) Adaptive:
#       criterion_strategy = "global_median"        -> one global threshold
#       criterion_strategy = "per_condition_median" -> threshold per condition
criterion = 3.5
criterion_strategy = None          # set to "global_median" or "per_condition_median" to override `criterion`

# === Sanity check ===
required = {"stimID", "condition", "response", "TP", "participantID"}
missing = required - set(human_df.columns)
if missing:
    raise KeyError(f"Missing required columns: {sorted(missing)}")

df = human_df.copy()

# Convert response to numeric if needed
if not np.issubdtype(df["response"].dtype, np.number):
    df["response"] = pd.to_numeric(df["response"], errors="coerce")
if df["response"].isna().any():
    bad = df.loc[df["response"].isna(), ["stimID", "condition", "participantID"]].drop_duplicates()
    raise ValueError(
        "Some 'response' values could not be parsed as numbers (1–6). "
        f"Examples (stimID, condition, participantID): {bad.head(5).to_dict('records')}"
    )

# === 1) Enforce uniqueness at (stimID, condition, participantID) via per-participant mean ===
per_participant = (
    df.groupby(["stimID", "condition", "participantID"], as_index=False)
      .agg(resp_mean=("response", "mean"))
)

# === 2) Trial-level (stimID, condition) mean across participants ===
trial_mean = (
    per_participant.groupby(["stimID", "condition"], as_index=False)
                   .agg(mean_conf=("resp_mean", "mean"),
                        n_participants=("participantID", "nunique"))
)

# Attach truth via mode (robust to accidental inconsistencies)
tp_mode = (
    df.groupby(["stimID", "condition"])["TP"]
      .agg(lambda s: s.mode().iloc[0] if not s.mode().empty else int(round(s.mean())))
      .astype(int)
      .rename("truth")
      .reset_index()
)
trial_mean = trial_mean.merge(tp_mode, on=["stimID", "condition"], how="left")

# === 3) Choose criterion (fixed or adaptive) ===
if criterion_strategy is None:
    # Fixed numeric threshold for all conditions
    trial_mean["criterion"] = float(criterion)
elif criterion_strategy == "global_median":
    # One global threshold based on trial-level mean_conf
    global_thresh = float(trial_mean["mean_conf"].median())
    trial_mean["criterion"] = global_thresh
elif criterion_strategy == "per_condition_median":
    # Per-condition thresholds based on trial-level mean_conf within each condition
    cond_thresh = (
        trial_mean.groupby("condition")["mean_conf"].median().rename("criterion").reset_index()
    )
    trial_mean = trial_mean.merge(cond_thresh, on="condition", how="left")
else:
    raise ValueError("criterion_strategy must be None, 'global_median', or 'per_condition_median'.")

# === 4) Apply Averaging Rule decision ===
if tie_rule == ">=":
    trial_mean["pred"] = (trial_mean["mean_conf"] >= trial_mean["criterion"]).astype(int)
elif tie_rule == ">":
    trial_mean["pred"] = (trial_mean["mean_conf"] > trial_mean["criterion"]).astype(int)
else:
    raise ValueError("tie_rule must be '>' or '>='.")

# === 5) Evaluate accuracy & summarize ===
trial_mean["correct"] = (trial_mean["pred"] == trial_mean["truth"]).astype(int)
trial_mean["trial_key"] = trial_mean["condition"].astype(str) + "|" + trial_mean["stimID"].astype(str)

overall_acc = trial_mean["correct"].mean()

by_condition = (
    trial_mean.groupby("condition", as_index=False)
              .agg(trials=("stimID", "nunique"),
                   participants_per_trial=("n_participants", "mean"),
                   accuracy=("correct", "mean"),
                   mean_conf_mean=("mean_conf", "mean"),
                   mean_conf_std=("mean_conf", "std"),
                   criterion_used=("criterion", "mean"))
              .sort_values("condition")
)

# === Output ===
crit_msg = f"fixed={criterion}" if criterion_strategy is None else f"adaptive={criterion_strategy}"
print(f"Averaging Rule (criterion: {crit_msg}, tie='{tie_rule}')")
print(f"Overall accuracy: {overall_acc:.4f} | Trials (stimID×condition): {trial_mean[['stimID','condition']].drop_duplicates().shape[0]}")

display(by_condition)


Averaging Rule (criterion: fixed=3.5, tie='>')
Overall accuracy: 0.7277 | Trials (stimID×condition): 3000


Unnamed: 0,condition,trials,participants_per_trial,accuracy,mean_conf_mean,mean_conf_std,criterion_used
0,condition_1,1000,12.0,0.686,3.65725,0.855552,3.5
1,condition_2,1000,12.0,0.692,3.564917,0.735121,3.5
2,condition_3,1000,12.0,0.805,3.477667,0.99107,3.5


In [11]:
# Weighted Linear Combination (WLC) rule for HUMAN trials
# -------------------------------------------------------
# Expected columns in HUMAN_DF:
#   stimID, participantID, response (1–6), TP (0/1), condition
#
# Uniqueness & aggregation:
#   - Raw uniqueness enforced at (stimID, condition, participantID) by averaging duplicates.
#   - Trial rows are (stimID, condition) so each stimID yields 3 trials (one per condition).
#
# Method:
#   1) Build trial x participant matrix of responses.
#   2) Compute participant-wise Δμ = mean(response|TP=1) - mean(response|TP=0).
#   3) Estimate covariance Σ across participants (responses centered by participant).
#   4) Compute weights w = (Σ + λI)^(-1) Δμ  (ridge-regularized).
#   5) Decision variable per trial: D = X_centered @ w.
#   6) Sweep a scalar criterion to maximize accuracy (tie behavior via TIE_RULE).
#   7) Accuracy overall and by condition; return diagnostics.

import numpy as np
import pandas as pd

# ======== CONFIG ========
HUMAN_DF = human_all.copy()   # or pd.concat([human50_50, human80_20, human100_0], ignore_index=True)
CENTER_BY_PARTICIPANT = True  # center each participant's ratings (remove response bias)
STANDARDIZE = False           # optional z-score per participant (usually not needed for WLC)
REG_LAMBDA = 1e-6             # ridge regularization strength added to diag(Σ)
TIE_RULE = ">"                # '>' -> equal-to-threshold -> absent ; '>=' -> equal -> present
# ========================

# --- Sanity checks ---
req = {"stimID", "participantID", "response", "TP", "condition"}
missing = req - set(HUMAN_DF.columns)
if missing:
    raise KeyError(f"Missing required columns: {sorted(missing)}")

df = HUMAN_DF.copy()

# Coerce dtypes
df["TP"] = pd.to_numeric(df["TP"], errors="coerce").astype(int)
df["response"] = pd.to_numeric(df["response"], errors="coerce")
if df["response"].isna().any():
    bad_n = int(df["response"].isna().sum())
    bad = df.loc[df["response"].isna(), ["stimID", "condition", "participantID"]].drop_duplicates()
    raise ValueError(
        f"{bad_n} response value(s) could not be parsed as numeric. "
        f"Examples: {bad.head(5).to_dict('records')}"
    )

# --- Enforce raw uniqueness at (stimID, condition, participantID) ---
# If duplicates exist for the same participant & (stimID, condition), average them.
df_uni = (
    df.groupby(["stimID", "condition", "participantID"], as_index=False)
      .agg(response=("response", "mean"))
)

# --- 1) Trial x participant matrix (responses) at (stimID, condition) ---
R = df_uni.pivot_table(
        index=["stimID", "condition"],
        columns="participantID",
        values="response",
        aggfunc="mean"
    ).sort_index()

# --- Truth per (stimID, condition) from raw df (mode fallback) ---
truth_sc = (
    df.groupby(["stimID", "condition"])["TP"]
      .agg(lambda s: s.mode().iloc[0] if not s.mode().empty else int(round(s.mean())))
      .astype(int)
      .reindex(R.index)
)

# --- 2) Participant discrimination Δμ ---
# Compute per-participant means split by TP on the raw df (not R), then difference.
means_by_tp = (
    df.groupby(["participantID", "TP"])["response"]
      .mean()
      .unstack("TP")  # columns {0,1}
)
if 0 not in means_by_tp.columns: means_by_tp[0] = np.nan
if 1 not in means_by_tp.columns: means_by_tp[1] = np.nan
Delta_mu = (means_by_tp[1] - means_by_tp[0]).fillna(0.0)

# Align Δμ to R's participant order
Delta_mu = Delta_mu.reindex(R.columns).fillna(0.0).to_numpy(dtype=float)  # shape (P,)

# --- 3) Prepare data matrix for covariance ---
X = R.copy()

# Center / standardize per participant (column-wise)
col_means = X.mean(axis=0, skipna=True)
if CENTER_BY_PARTICIPANT:
    X = X.subtract(col_means, axis=1)

if STANDARDIZE:
    col_stds = X.std(axis=0, ddof=1, skipna=True).replace(0, np.nan)
    X = X.divide(col_stds, axis=1)

# Fill remaining NaNs with 0 (participant-at-mean after centering)
X = X.fillna(0.0)

# Convert to numpy (trials x participants)
X_np = X.to_numpy(dtype=float)     # shape (N, P)
N, P = X_np.shape

# --- 4) Covariance Σ and weights w ---
Sigma = np.cov(X_np, rowvar=False, ddof=1)  # (P, P)
Sigma_reg = Sigma + REG_LAMBDA * np.eye(P)

try:
    w = np.linalg.solve(Sigma_reg, Delta_mu)   # (P,)
except np.linalg.LinAlgError:
    w = np.linalg.pinv(Sigma_reg) @ Delta_mu

# --- 5) Decision variable per trial ---
D = X_np @ w  # shape (N,)

# --- 6) Fit a scalar criterion by sweeping to maximize accuracy ---
y = truth_sc.to_numpy()
D_sorted = np.unique(D)

if len(D_sorted) == 1:
    crit_best = D_sorted[0]
    pred = np.full_like(D, fill_value=int(y.mean() >= 0.5))
else:
    mids = (D_sorted[:-1] + D_sorted[1:]) / 2.0
    cands = np.concatenate(([-np.inf], mids, [np.inf]))

    best_acc, crit_best, pred = -1.0, None, None
    for c in cands:
        if TIE_RULE == ">=":
            p = (D >= c).astype(int)
        else:
            p = (D >  c).astype(int)
        acc = (p == y).mean()
        if acc > best_acc:
            best_acc, crit_best, pred = acc, c, p

# --- Trial table (index is (stimID, condition)) ---
trial_table = (
    pd.DataFrame({
        "D": D,
        "pred": pred.astype(int),
        "truth": y,
    }, index=X.index)
    .reset_index()
    .assign(correct=lambda t: (t["pred"] == t["truth"]).astype(int))
)

trial_table["trial_key"] = trial_table["condition"].astype(str) + "|" + trial_table["stimID"].astype(str)

# --- Summaries ---
overall_acc = trial_table["correct"].mean()
by_condition = (
    trial_table.groupby("condition", as_index=False)
               .agg(trials=("stimID", "nunique"),
                    accuracy=("correct", "mean"),
                    D_mean=("D", "mean"),
                    D_std=("D", "std"))
               .sort_values("condition")
)

# --- Diagnostics (weights & contributions) ---
Delta_mu_series = (
    pd.Series(Delta_mu, index=R.columns, name="Delta_mu")
)

weights_df = (
    pd.DataFrame({"weight": w}, index=R.columns)  # index = participantID
      .join(Delta_mu_series, how="left")
      .reset_index()
      .rename(columns={"index": "participantID"})
      .sort_values("weight", ascending=False)
      .reset_index(drop=True)
)

rank_sigma = np.linalg.matrix_rank(Sigma)
print(f"WLC fit complete | Trials (stimID×condition): {N} | Participants: {P} | rank(Σ)={rank_sigma} | λ={REG_LAMBDA:g}")
print(f"Best criterion (tie='{TIE_RULE}'): {crit_best:.6g}")
print(f"Overall accuracy: {overall_acc:.4f}")

print("\nTop weights (head):")
display(weights_df.head(10))
display(by_condition)


WLC fit complete | Trials (stimID×condition): 3000 | Participants: 12 | rank(Σ)=12 | λ=1e-06
Best criterion (tie='>'): -0.0904937
Overall accuracy: 0.8060

Top weights (head):


Unnamed: 0,participantID,weight,Delta_mu
0,KM,0.462318,1.709333
1,JH,0.243044,1.290667
2,KZ,0.190095,1.006667
3,AG,0.168199,1.514
4,UR,0.132811,1.127333
5,SA,0.070051,0.647333
6,GS,0.048714,0.202667
7,AZ,0.015612,0.467333
8,AW,0.011665,0.779333
9,HG,-0.009786,1.588


Unnamed: 0,condition,trials,accuracy,D_mean,D_std
0,condition_1,1000,0.784,0.050847,1.144754
1,condition_2,1000,0.759,-0.059705,1.16292
2,condition_3,1000,0.875,0.008858,1.553994
