# Multilingual Semantics Probe

## Step 1: Corpus Generation

In [268]:
from __future__ import annotations

import json
from dataclasses import dataclass
from typing import Dict, List

import pandas as pd
import os

In [269]:
STIMULI_DIR = "./stimuli"
EXISTENTIAL_UNIVERSAL = True
quantifier_type_str = "eu" if EXISTENTIAL_UNIVERSAL else "ue"

if not os.path.exists(STIMULI_DIR):
    os.mkdir(STIMULI_DIR)

In [270]:
# --- English lexicon ---
EN_SUBJECTS = [
    "shark",
    "robot",
    "chef",
    "dog",
    "kangaroo",
]

EN_OBJECTS = [
    "pirate",
    "student",
    "doctor",
    "tourist",
    "politician"
]

# Use correct simple past forms
EN_VERBS_PAST = [
    "ate",
    "helped",
    "pushed",
    "chased",
]

# --- Mandarin lexicon ---
# Bare nouns only (no quantifiers inside)
ZH_SUBJECTS = [
    "鲨鱼",
    "机器人",
    "厨师",
    "狗",
    "袋鼠"
]

ZH_OBJECTS = [
    "海盗",
    "学生",
    "医生",
    "游客",
    "政治家"
]

# Verb stems compatible with 了
ZH_VERBS = [
    "吃",
    "帮助",
    "推",
    "追",
]

# Optional classifier map (defaults to 个)
ZH_CLASSIFIER: Dict[str, str] = {
    "鲨鱼": "只",
    "狗": "只",
    "机器人": "个",
    "厨师": "个",
    "海盗": "个",
    "学生": "个",
    "医生": "个",
    "游客": "个",
    "袋鼠": "只",
    "政治家": "个",
}

In [271]:
if EXISTENTIAL_UNIVERSAL:
    EN_TEMPLATES = [
        # Classic ambiguous English form
        "A {subj} {verb_past} every {obj}.",
    ]

    ZH_TEMPLATES = [
        # Canonical Mandarin surface-scope reading
        "有一{cl}{subj}{verb}了每个{obj}。",
    ]
else:
    EN_TEMPLATES = [
        # Classic ambiguous English form
        "Every {subj} {verb_past} a {obj}.",
    ]

    ZH_TEMPLATES = [
        # Universal-Existential Construction
        "每个{cl}{subj}{verb}了一个{obj}。",
    ]

In [272]:
@dataclass(frozen=True)
class Stimulus:
    language: str
    template_id: str
    subj: str
    obj: str
    verb: str
    sentence: str

    # Unique cross linguistic identifiers
    tid: int
    subj_i: int
    obj_i: int
    verb_i: int



def get_classifier(noun: str, cl_map: Dict[str, str]) -> str:
    return cl_map.get(noun, "个")


def generate_english(
    subjects: List[str],
    objects: List[str],
    verbs_past: List[str],
) -> List[Stimulus]:
    out: List[Stimulus] = []
    for tid, tmpl in enumerate(EN_TEMPLATES):
        for subj_i, subj in enumerate(subjects):
            for obj_i, obj in enumerate(objects):
                for verb_i, verb in enumerate(verbs_past):
                    out.append(
                        Stimulus(
                            language="en",
                            template_id=f"en_{tid}",
                            subj=subj,
                            obj=obj,
                            verb=verb,
                            sentence=tmpl.format(
                                subj=subj,
                                obj=obj,
                                verb_past=verb,
                            ),
                            tid=tid,
                            subj_i=subj_i,
                            obj_i=obj_i,
                            verb_i=verb_i,
                        )
                    )
    return out

def generate_mandarin(
    subjects: List[str],
    objects: List[str],
    verbs: List[str],
    cl_map: Dict[str, str],
) -> List[Stimulus]:
    out: List[Stimulus] = []
    for tid, tmpl in enumerate(ZH_TEMPLATES):
        for subj_i, subj in enumerate(subjects):
            for obj_i, obj in enumerate(objects):
                for verb_i, verb in enumerate(verbs):
                    cl = get_classifier(subj, cl_map)
                    out.append(
                        Stimulus(
                            language="zh",
                            template_id=f"zh_{tid}",
                            subj=subj,
                            obj=obj,
                            verb=verb,
                            sentence=tmpl.format(
                                cl=cl,
                                subj=subj,
                                obj=obj,
                                verb=verb,
                            ),
                            tid=tid,
                            subj_i=subj_i,
                            obj_i=obj_i,
                            verb_i=verb_i,
                        )
                    )
    return out


In [273]:
stimuli = []
stimuli += generate_english(EN_SUBJECTS, EN_OBJECTS, EN_VERBS_PAST)
stimuli += generate_mandarin(ZH_SUBJECTS, ZH_OBJECTS, ZH_VERBS, ZH_CLASSIFIER)

continuation_df = pd.DataFrame([s.__dict__ for s in stimuli])

# Language-invariant semantic ID
continuation_df["pair_id"] = continuation_df.apply(
    lambda r: f"t{r.tid}_s{r.subj_i}_o{r.obj_i}_v{r.verb_i}",
    axis=1,
)

# Final unique ID
continuation_df.insert(
    0,
    "stimulus_id",
    continuation_df.apply(
        lambda r: f"{r.language}-{r.pair_id}",
        axis=1,
    ),
)

In [274]:
print("Total stimuli:", len(continuation_df))
print(continuation_df["language"].value_counts())

display(
    continuation_df[continuation_df["language"] == "en"][["stimulus_id", "sentence"]].sample(
        min(5, (continuation_df["language"] == "en").sum()),
        random_state=0,
    )
)

display(
    continuation_df[continuation_df["language"] == "zh"][["stimulus_id", "sentence"]].sample(
        min(5, (continuation_df["language"] == "zh").sum()),
        random_state=0,
    )
)

Total stimuli: 200
language
en    100
zh    100
Name: count, dtype: int64


Unnamed: 0,stimulus_id,sentence
26,en-t0_s1_o1_v2,A robot pushed every student.
86,en-t0_s4_o1_v2,A kangaroo pushed every student.
2,en-t0_s0_o0_v2,A shark pushed every pirate.
55,en-t0_s2_o3_v3,A chef chased every tourist.
75,en-t0_s3_o3_v3,A dog chased every tourist.


Unnamed: 0,stimulus_id,sentence
126,zh-t0_s1_o1_v2,有一个机器人推了每个学生。
186,zh-t0_s4_o1_v2,有一只袋鼠推了每个学生。
102,zh-t0_s0_o0_v2,有一只鲨鱼推了每个海盗。
155,zh-t0_s2_o3_v3,有一个厨师追了每个游客。
175,zh-t0_s3_o3_v3,有一只狗追了每个游客。


In [275]:
# Serialize
continuation_df.to_csv(os.path.join(STIMULI_DIR,f"stimuli_{quantifier_type_str}.csv"), index=False)

with open(os.path.join(STIMULI_DIR, f"stimuli_{quantifier_type_str}.jsonl"), "w", encoding="utf-8") as f:
    for row in continuation_df.to_dict(orient="records"):
        f.write(json.dumps(row, ensure_ascii=False) + "\n")

print(f"Wrote stimuli_{quantifier_type_str}.csv and stimuli_{quantifier_type_str}.jsonl")

Wrote stimuli_eu.csv and stimuli_eu.jsonl


### Add Natural Language Continuations

In [276]:
EN_CONTINUATIONS = {
    "surface": " There was only one {noun}.",
    "inverse": " There were many {noun}.",
}

# Mandarin: keep equally short.
# Note: plural is usually implicit; "很多" is a decent lexical cue.
ZH_CONTINUATIONS = {
    "surface": " 只有一{cl}{noun}。",
    "inverse": " 有很多{cl}{noun}。",
}

In [277]:
def add_continuations(df: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for r in df.itertuples(index=False):
        base = r._asdict()

        if base["language"] == "en":
            # naive pluralization: add "s"
            # If you care about irregular plurals later, add a map.
            subj_plural = base["subj"] + "s"
            obj_plural = base["obj"] + "s"

            cont_map = {
                "surface": EN_CONTINUATIONS["surface"].format(noun=base["subj"] if EXISTENTIAL_UNIVERSAL else base["obj"]),
                "inverse": EN_CONTINUATIONS["inverse"].format(noun=subj_plural if EXISTENTIAL_UNIVERSAL else obj_plural),
            }

        elif base["language"] == "zh":
            cl = ZH_CLASSIFIER.get(base["subj"], "个")
            cont_map = {
                "surface": ZH_CONTINUATIONS["surface"].format(cl=cl, noun=base["subj"] if EXISTENTIAL_UNIVERSAL else base["obj"]),
                "inverse": ZH_CONTINUATIONS["inverse"].format(cl=cl, noun=base["subj"] if EXISTENTIAL_UNIVERSAL else base["obj"]),
            }
        else:
            raise ValueError(f"Unknown language: {base['language']}")

        for cont_type, cont_text in cont_map.items():
            ex = dict(base)
            ex["continuation_type"] = cont_type            # "surface" or "inverse"
            ex["continuation_text"] = cont_text            # the thing you'll score
            ex["full_text"] = base["sentence"] + cont_text # convenient for debugging
            rows.append(ex)

    return pd.DataFrame(rows)

In [278]:
df_cont = add_continuations(continuation_df)

if "concept_id" not in df_cont.columns:
    concept_series = df_cont["subj"] + "|" + df_cont["obj"] + "|" + df_cont["verb"]
    df_cont.insert(1, "concept_id", concept_series)

In [279]:
df_cont.to_csv(os.path.join(STIMULI_DIR, f"stimuli_with_continuations_{quantifier_type_str}.csv"), index=False)

with open(os.path.join(STIMULI_DIR, f"stimuli_with_continuations_{quantifier_type_str}.jsonl"), "w", encoding="utf-8") as f:
    for row in df_cont.to_dict(orient="records"):
        f.write(json.dumps(row, ensure_ascii=False) + "\n")

print(f"Wrote stimuli_with_continuations_{quantifier_type_str}.csv and stimuli_with_continuations_{quantifier_type_str}.jsonl")

Wrote stimuli_with_continuations_eu.csv and stimuli_with_continuations_eu.jsonl


## Step 2: Evaluate Log Probs of Continuations

In [None]:
import numpy as np
import json
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM

### Load in Continuations

In [281]:
JSONL_PATH = f"stimuli/stimuli_with_continuations_{quantifier_type_str}.jsonl"

def read_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                yield json.loads(line)

In [282]:
rows = list(read_jsonl(JSONL_PATH))
continuation_df = pd.DataFrame(rows)

### Load Model and Tokenizer

In [283]:
MODEL_NAME = "uer/gpt2-chinese-cluecorpussmall"
LOWER_CASE = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.bfloat16 if (DEVICE == "cuda") else torch.float32

In [284]:
BATCH_SIZE = 16

In [285]:
OUT_CSV = "logprob_surface_vs_inverse.csv"

In [286]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [287]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=DTYPE
).to(DEVICE)

model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(21128, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=21128, bias=False)
)

### Calculate Continuation Log Prob
1) Tokenize Prompt and Continuation

2) Calculate Log Probs for prompt + continuation

3) Calculate continuation log probs: [a] get log prob of each next token from [0,T-1]; [b] sum log probs
- Given a prompt of length T, log_prob[:, :T-1, :] gives the probability of the first [0,T-1] tokens

```

a shark ate every pirate
                    ^ stop here; we already know the probability of this word
```

```python
for b in range(B):
    for t in range(T):
        target_log_probs[b,t] = shifted_log_probs[b, t, target_tokens[b,t]]
```


In [288]:
def get_log_probs(prompts: list[str], continuations: list[str]) -> list[dict]:
    # 1) Tokenize Prompt and Continuation
    enc_base_prompts = tokenizer(
        prompts,
        return_tensors="pt",

        # Handle different length prompts
        padding=True,
        truncation=True,

        # Avoid [EOS]/[BOS] from being inserted
        add_special_tokens=False
    )

    full_prompts = [p + c for p, c in zip(prompts, continuations)]
    
    full_prompts = [p.lower() for p in full_prompts] if LOWER_CASE else full_prompts
    
    enc_full_prompts = tokenizer(
        full_prompts,
        return_tensors="pt",

        # Handle different length prompts
        padding=True,
        truncation=True,

        # Avoid [EOS]/[BOS] from being inserted
        add_special_tokens=False
    )

    input_ids = enc_full_prompts["input_ids"].to(DEVICE)
    attention_mask = enc_full_prompts["attention_mask"].to(DEVICE)

    # 2) Calculate logProbs for prompt + continuation
    out = model(input_ids=input_ids, attention_mask=attention_mask)
    logits = out.logits.to(torch.float32)  # [B, T, V]
    log_probs = torch.log_softmax(logits, dim=-1)  # [B, T, V]

    # 3) Calculate continuation log probs: [a] get log prob of each next token from [0,T-1]; [b] sum log probs
    target_tokens = input_ids[:, 1:]  # [B, T-1]
    shifted_log_probs = log_probs[:, :-1, :]  # [B, T-1, V]

    # Select the logProb for the selected token
    target_log_probs = torch.gather(
        input=shifted_log_probs,
        dim=-1,  # Select the log_prob for the selected prompt token in vocab
        index=target_tokens.unsqueeze(-1)  # [B, T-1, 1]
    ).squeeze(-1)  # [B, T-1]

    base_prompt_lens = enc_base_prompts["attention_mask"].sum(dim=1).tolist()
    full_prompt_lens = enc_full_prompts["attention_mask"].sum(dim=1).tolist()

    # The logProbs for the continuation live at (inclusive)
    # logits[m-1:n-2] -> logits[m-1:L-2] -> target_log_probs[m-1:L-1] since we already removed one token in the shift
    B, _ = target_log_probs.shape

    cont_log_probs_list = []

    for b in range(B):
        base_prompt_length = base_prompt_lens[b]
        full_prompt_length = full_prompt_lens[b]

        cont_log_probs = target_log_probs[b,
                                          base_prompt_length-1:full_prompt_length-1]
        cont_log_probs_sum = cont_log_probs.sum().item()
        cont_log_probs_mean = cont_log_probs.mean().item()
        n_cont_tokens = full_prompt_length - base_prompt_length  # Sanity Check

        cont_log_probs_list.append(
            {"cont_log_probs_sum": cont_log_probs_sum,
                "cont_log_probs_mean": cont_log_probs_mean,
                "n_cont_tokens": n_cont_tokens
             }
        )
    return cont_log_probs_list

In [289]:
def process_log_probs(df: pd.DataFrame, b_sz: int = BATCH_SIZE):
    df = df.reset_index(drop=True).copy() # Pandas safety reasons
    all_stats = []

    for start in range(0, len(df), b_sz):
        end = min(start + b_sz, len(df))
        batch = continuation_df.iloc[start:end]

        stats = get_log_probs(
            prompts=batch["sentence"].tolist(),
            continuations=batch["continuation_text"].tolist()
        )

        all_stats += stats

    stats_df = pd.DataFrame(all_stats)    

    return pd.concat([df, stats_df], axis=1)

In [290]:
# AI Generated pivot table magic to get the metrics
def collapse_surface_inverse(df: pd.DataFrame) -> pd.DataFrame:
    df = df[df["continuation_type"].isin(["surface", "inverse"])].copy()

    num_cols  = ["cont_log_probs_sum", "cont_log_probs_mean", "n_cont_tokens"]
    text_cols = ["continuation_text", "full_text"]

    # metadata that is shared across types
    meta_cols = [
        c for c in df.columns
        if c not in (["continuation_type"] + num_cols + text_cols)
    ]
    meta = df.groupby("stimulus_id", as_index=False)[meta_cols].first()

    # --- 1) pivot numeric columns (keeps float/int dtypes) ---
    wide_num = (
        df[["stimulus_id", "continuation_type"] + num_cols]
        .pivot(index="stimulus_id", columns="continuation_type", values=num_cols)
    )
    wide_num.columns = [f"{col}_{ctype}" for col, ctype in wide_num.columns]
    wide_num = wide_num.reset_index()

    # --- 2) pivot text columns (object dtype is fine here) ---
    wide_text = (
        df[["stimulus_id", "continuation_type"] + text_cols]
        .pivot(index="stimulus_id", columns="continuation_type", values=text_cols)
    )
    wide_text.columns = [f"{col}_{ctype}" for col, ctype in wide_text.columns]
    wide_text = wide_text.reset_index()

    # merge everything
    out = meta.merge(wide_num, on="stimulus_id", how="left") \
              .merge(wide_text, on="stimulus_id", how="left")

    # deltas + ratios (now safe: numeric dtypes)
    out["delta_sum"]  = out["cont_log_probs_sum_inverse"]  - out["cont_log_probs_sum_surface"]
    out["ratio_sum"]  = np.exp(out["delta_sum"])

    out["delta_mean"] = out["cont_log_probs_mean_inverse"] - out["cont_log_probs_mean_surface"]
    out["ratio_mean"] = np.exp(out["delta_mean"])

    out["preference_sum"] = np.select(
        [out["delta_sum"] > 0, out["delta_sum"] < 0],
        ["inverse", "surface"],
        default="tie"
    )

    out["preference_mean"] = np.select(
        [out["delta_mean"] > 0, out["delta_mean"] < 0],
        ["inverse", "surface"],
        default="tie"
    )

    return out

### Save Outputs
- Add model name as a column

In [291]:
from pathlib import Path
RESULTS_DIR = Path("./results")
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

In [292]:
import re
safe_model = re.sub(r"[^\w\-\.]", "_", MODEL_NAME)

In [293]:
scored_long = process_log_probs(continuation_df, b_sz=BATCH_SIZE)
scored_long.insert(0,"model", safe_model)

scored_wide = collapse_surface_inverse(scored_long)

In [294]:
scored_wide

Unnamed: 0,model,stimulus_id,concept_id,language,template_id,subj,obj,verb,sentence,tid,...,continuation_text_inverse,continuation_text_surface,full_text_inverse,full_text_surface,delta_sum,ratio_sum,delta_mean,ratio_mean,preference_sum,preference_mean
0,uer_gpt2-chinese-cluecorpussmall,en-t0_s0_o0_v0,shark|pirate|ate,en,en_0,shark,pirate,ate,A shark ate every pirate.,0,...,There were many sharks.,There was only one shark.,A shark ate every pirate. There were many sharks.,A shark ate every pirate. There was only one s...,2.095390,8.128613,1.069577,2.914146,inverse,inverse
1,uer_gpt2-chinese-cluecorpussmall,en-t0_s0_o0_v1,shark|pirate|helped,en,en_0,shark,pirate,helped,A shark helped every pirate.,0,...,There were many sharks.,There was only one shark.,A shark helped every pirate. There were many s...,A shark helped every pirate. There was only on...,1.271116,3.564830,1.008132,2.740476,inverse,inverse
2,uer_gpt2-chinese-cluecorpussmall,en-t0_s0_o0_v2,shark|pirate|pushed,en,en_0,shark,pirate,pushed,A shark pushed every pirate.,0,...,There were many sharks.,There was only one shark.,A shark pushed every pirate. There were many s...,A shark pushed every pirate. There was only on...,1.041332,2.832989,0.953306,2.594273,inverse,inverse
3,uer_gpt2-chinese-cluecorpussmall,en-t0_s0_o0_v3,shark|pirate|chased,en,en_0,shark,pirate,chased,A shark chased every pirate.,0,...,There were many sharks.,There was only one shark.,A shark chased every pirate. There were many s...,A shark chased every pirate. There was only on...,1.033434,2.810701,0.961239,2.614933,inverse,inverse
4,uer_gpt2-chinese-cluecorpussmall,en-t0_s0_o1_v0,shark|student|ate,en,en_0,shark,student,ate,A shark ate every student.,0,...,There were many sharks.,There was only one shark.,A shark ate every student. There were many sha...,A shark ate every student. There was only one ...,2.197477,9.002275,1.089395,2.972474,inverse,inverse
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,uer_gpt2-chinese-cluecorpussmall,zh-t0_s4_o3_v3,袋鼠|游客|追,zh,zh_0,袋鼠,游客,追,有一只袋鼠追了每个游客。,0,...,有很多只袋鼠。,只有一只袋鼠。,有一只袋鼠追了每个游客。 有很多只袋鼠。,有一只袋鼠追了每个游客。 只有一只袋鼠。,-7.002993,0.000909,-1.000427,0.367722,surface,surface
196,uer_gpt2-chinese-cluecorpussmall,zh-t0_s4_o4_v0,袋鼠|政治家|吃,zh,zh_0,袋鼠,政治家,吃,有一只袋鼠吃了每个政治家。,0,...,有很多只袋鼠。,只有一只袋鼠。,有一只袋鼠吃了每个政治家。 有很多只袋鼠。,有一只袋鼠吃了每个政治家。 只有一只袋鼠。,-5.853102,0.002871,-0.836157,0.433373,surface,surface
197,uer_gpt2-chinese-cluecorpussmall,zh-t0_s4_o4_v1,袋鼠|政治家|帮助,zh,zh_0,袋鼠,政治家,帮助,有一只袋鼠帮助了每个政治家。,0,...,有很多只袋鼠。,只有一只袋鼠。,有一只袋鼠帮助了每个政治家。 有很多只袋鼠。,有一只袋鼠帮助了每个政治家。 只有一只袋鼠。,-5.490833,0.004124,-0.784405,0.456391,surface,surface
198,uer_gpt2-chinese-cluecorpussmall,zh-t0_s4_o4_v2,袋鼠|政治家|推,zh,zh_0,袋鼠,政治家,推,有一只袋鼠推了每个政治家。,0,...,有很多只袋鼠。,只有一只袋鼠。,有一只袋鼠推了每个政治家。 有很多只袋鼠。,有一只袋鼠推了每个政治家。 只有一只袋鼠。,-6.617328,0.001337,-0.945333,0.388550,surface,surface


In [295]:
scored_long.to_json(
    RESULTS_DIR / f"scored_long_{quantifier_type_str}_{safe_model}.jsonl",
    orient="records",
    lines=True,
    force_ascii=False
)

scored_wide.to_json(
    RESULTS_DIR / f"scored_wide_{quantifier_type_str}_{safe_model}.jsonl",
    orient="records",
    lines=True,
    force_ascii=False
)