# Script Description

This script takes two input dataframes and produces a JSON file that serves as the item table for constructing trials in the listener-side experiment.

## Output Format

The resulting JSON contains the following fields:

-	Q1 – the first quantifier used in the utterance (e.g., "some", "most", "none").
-	Q2 – the second quantifier (e.g., "some", "all").
-	A – the adverbial phrase describing the correctness of answers ("right" or "wrong").

These three fields (Q1, Q2, A) are used to construct the linguistic stimulus, e.g.:
“some of the students got some of the answers wrong.”

-	observation – an array of 0s and 1s representing the correctness pattern in the exam.
This is used to generate the state table shown to participants as the “true” results of the exam. They are of the shape e.g. [0, 0, 0, 0, 0].
-	condition – the ground-truth speaker type for that utterance–state pair.
This specifies which argumentative strategy is correct according to the model and has three levels:
	-	"high" – high-argumentative speaker
	-	"low" – low-argumentative speaker
	-	"info" – information-oriented (neutral) speaker

The JSON output is directly usable as the item specification for trial generation in the listener experiment.

In [None]:
import json
import pandas as pd
from pathlib import Path

# -----------------------------
# File paths
# -----------------------------
file_high = Path("./analysis_values/most_discriminative_stimuli_for_listener_exp_high_fulldata.json")
file_low  = Path("./analysis_values/most_discriminative_stimuli_for_listener_exp_low_fulldata.json")
# -----------------------------

✓ Loaded JSON files successfully.

Keys in HIGH data: ['model', 'Q', 'A1', 'A2', 'observation', 'factor', 'factor_ci_low', 'factor_ci_high', 'extremeness']
Keys in LOW data:  ['model', 'Q', 'A1', 'A2', 'observation', 'factor', 'factor_ci_low', 'factor_ci_high', 'extremeness']

High DataFrame shape: (1040, 9)
Low DataFrame shape:  (1040, 9)

Combined DataFrame: (2080, 10)


Unnamed: 0,model,Q,A1,A2,observation,factor,factor_ci_low,factor_ci_high,extremeness,condition
0,lr,none,none,right,"[9, 9, 3, 3, 3]",0.047333,-0.132041,0.215257,0.047333,high
1,lr,none,none,right,"[12, 12, 12, 12, 12]",-3.882112,-4.313824,-3.516698,3.882112,high
2,lr,none,none,right,"[9, 9, 9, 9, 9]",-1.422144,-1.665711,-1.202539,1.422144,high
3,lr,none,none,right,"[12, 12, 9, 9, 9]",-1.575497,-1.817445,-1.356301,1.575497,high
4,lr,none,none,right,"[3, 3, 3, 3, 3]",0.585175,0.400875,0.761797,0.585175,high


In [21]:
# Show 10 smallest df_both["factor"] 
df_both.nsmallest(30, "factor")

Unnamed: 0,model,Q,A1,A2,observation,factor,factor_ci_low,factor_ci_high,extremeness,condition
502,maximin,all,some,wrong,"[9, 9, 9, 9, 9]",-4.022498,-4.257759,-3.807873,4.022498,high
1534,maximin,all,some,right,"[3, 3, 3, 3, 3]",-4.020956,-4.25195,-3.803349,4.020956,low
1287,lr,all,some,wrong,"[0, 0, 0, 0, 0]",-3.908896,-4.24156,-3.619767,3.908896,low
231,lr,all,some,right,"[12, 12, 12, 12, 12]",-3.908278,-4.216568,-3.628684,3.908278,high
1794,nonparametric,all,some,right,"[3, 3, 3, 3, 3]",-3.895506,-4.11799,-3.691837,3.895506,low
762,nonparametric,all,some,wrong,"[9, 9, 9, 9, 9]",-3.89156,-4.113378,-3.687855,3.89156,high
1,lr,none,none,right,"[12, 12, 12, 12, 12]",-3.882112,-4.313824,-3.516698,3.882112,high
41,lr,none,all,wrong,"[12, 12, 12, 12, 12]",-3.882112,-4.313824,-3.516698,3.882112,high
1057,lr,none,none,wrong,"[0, 0, 0, 0, 0]",-3.879812,-4.33191,-3.491645,3.879812,low
1077,lr,none,all,right,"[0, 0, 0, 0, 0]",-3.879812,-4.33191,-3.491645,3.879812,low


In [24]:

def build_listener_items(
    file_high: str,
    file_low: str,
    n_per_condition: int = 12,
) -> pd.DataFrame:
    """
    From high/low full model outputs, construct 10 high-arg, 10 low-arg,
    and 10 info items.

    Steps:
    0. Load and tag with 'condition' (high / low).
    1. Average factors across models (prag, lr, maximin, nonparametric).
    2. For 'high': take n_per_condition highest averaged factors.
    3. For 'low' : take n_per_condition highest averaged factors.
    4. Merge high+low, compute average across conditions, and take
       n_per_condition lowest items as 'info'.
    5. Return a DataFrame with columns:
       Q1, Q2, A, observation, condition.
    """

    # ---------- Load ----------
    df_high = pd.DataFrame(json.load(open(file_high)))
    df_low  = pd.DataFrame(json.load(open(file_low)))

    df_high["condition"] = "high"
    df_low["condition"]  = "low"

    # ---------- Make observation hashable ----------
    df_high["obs_tuple"] = df_high["observation"].apply(lambda x: tuple(x))
    df_low["obs_tuple"]  = df_low["observation"].apply(lambda x: tuple(x))

    group_cols = ["Q", "A1", "A2", "obs_tuple"]

    # ---------- 1. Average across models for each condition ----------
    high_avg = (
        df_high.groupby(group_cols)
               .agg(factor_high=("factor", "mean"))
               .reset_index()
    )

    low_avg = (
        df_low.groupby(group_cols)
              .agg(factor_low=("factor", "mean"))
              .reset_index()
    )

    # ---------- 2. Select high-arg items ----------
    top_high = (
        high_avg.sort_values("factor_high", ascending=False)
                .head(n_per_condition)
                .copy()
    )
    top_high["condition"] = "high"

    # ---------- 3. Select low-arg items ----------
    top_low = (
        low_avg.sort_values("factor_low", ascending=False)
               .head(n_per_condition)
               .copy()
    )
    top_low["condition"] = "low"

    # ---------- 4. Select info items (average across high+low) ----------
    merged = pd.merge(high_avg, low_avg, on=group_cols, how="inner")

    merged["avg_factor"] = (merged["factor_high"] + merged["factor_low"]) / 2.0

    info_items = (
        merged.sort_values("avg_factor", ascending=True)
              .head(n_per_condition)
              .copy()
    )
    info_items["condition"] = "info"

    # ---------- 5. Harmonize & rename columns ----------
    def tidy(df, factor_col_name):
        # reconstruct observation as list
        df["observation"] = df["obs_tuple"].apply(list)
        # rename Q, A1, A2 -> Q1, Q2, A
        df = df.rename(columns={"Q": "Q1", "A1": "Q2", "A2": "A"})
        # keep only what we need (+ factor if you want to inspect)
        keep_cols = ["Q1", "Q2", "A", "observation", "condition"]
        if factor_col_name in df:
            keep_cols.append(factor_col_name)
        return df[keep_cols]

    top_high_tidy = tidy(top_high, "factor_high")
    top_low_tidy  = tidy(top_low, "factor_low")
    info_tidy     = tidy(info_items, "avg_factor")

    # ---------- Combine all three sets ----------
    items = pd.concat([top_high_tidy, top_low_tidy, info_tidy],
                      ignore_index=True)

    return items


# Example usage:
# items_df = build_listener_items(
#     "most_discriminative_stimuli_for_listener_exp_high_fulldata.json",
#     "most_discriminative_stimuli_for_listener_exp_low_fulldata.json",
#     n_per_condition=10,
# )
# print(items_df)
# items_df.to_json("listener_items.json", orient="records", indent=2)

In [34]:
info_items = build_listener_items(file_high, file_low, n_per_condition=12)
print(info_items.to_string(index=True))

# Rows to modify
# observation in row 26 change from [12, 12, 0, 0, 0] to [12, 9, 9, 9, 9] + most some wrong
info_items.at[26, 'observation'] = [12, 9, 9, 9, 9]

# observation in row 27 change from [12, 12, 12, 0, 0] to [3, 3, 3, 3, 0] + most some right
info_items.at[27, 'observation'] = [3, 3, 3, 3, 0]

# observation in row 34 change from [12, 12, 12, 0, 0] to [12, 12, 0, 0, 0] + some all right
info_items.at[34, 'observation'] = [12, 12, 0, 0, 0]
info_items.at[34, 'Q2'] = "all"

# observation in row 35 change from [12, 12, 12, 0, 0]  + some all wrong
info_items.at[35, 'Q2'] = "all"


# Rows to exclude
row_to_exclude = [6, # high
                  8, # high
                17, #low
                20, # low
                31, # info
                33, # info#
                ]
info_items = info_items.drop(row_to_exclude).reset_index(drop=True)
# Rearrange according to condition
info_items = info_items.sort_values(by=['condition']).reset_index(drop=True)
print("\nAfter manual edits:\n")
print(info_items.to_string(index=True))

# Save to csv file
info_items.to_csv("../experiments/listener_side/items/final_listener_items.csv", index=False)


      Q1    Q2      A           observation condition  factor_high  factor_low  avg_factor
0   some  some  wrong       [0, 0, 0, 0, 0]      high     2.090269         NaN         NaN
1   most  some  wrong       [0, 0, 0, 0, 0]      high     1.906597         NaN         NaN
2   some  most  right       [9, 9, 0, 0, 0]      high     1.739603         NaN         NaN
3   some  none  wrong     [12, 12, 0, 0, 0]      high     1.676246         NaN         NaN
4   some   all  right     [12, 12, 0, 0, 0]      high     1.596101         NaN         NaN
5   none   all  right       [0, 0, 0, 0, 0]      high     1.591761         NaN         NaN
6   none  none  wrong       [0, 0, 0, 0, 0]      high     1.591761         NaN         NaN
7    all  some  wrong       [0, 0, 0, 0, 0]      high     1.509167         NaN         NaN
8   some  some  right       [3, 3, 0, 0, 0]      high     1.435395         NaN         NaN
9   some  most  wrong       [0, 0, 0, 0, 0]      high     1.397498         NaN         NaN