In [1]:
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Literal
import random
import re
import torch
import json
from dataclasses import asdict
from transformers import AutoTokenizer, AutoModelForCausalLM

In [3]:
@dataclass
class ParticipantProfile:
    pid: str
    demographic_group_no: int
    location: str
    occupation: str
    trust_role: Optional[Literal["trustor", "trustee"]] = None


In [4]:
def build_participants_decomposing(exp_json: dict) -> List[ParticipantProfile]:
    participants = []
    pid_counter = 0

    for group in exp_json["demographic_info"]:
        n = group["num_of_participants"]
        group_no = group["demographic_group_no"]

        # extract nested demographic info
        location = None
        occupation = None

        for block in group["demographic_info"]:
            if block["category"] == "location":
                location = block["category_info"][0]["subcategory"]
            elif block["category"] == "occupation":
                occupation = block["category_info"][0]["subcategory"]

        assert location is not None
        assert occupation is not None

        for _ in range(n):
            participants.append(
                ParticipantProfile(
                    pid=f"P{pid_counter:04d}",
                    demographic_group_no=group_no,
                    location=location,
                    occupation=occupation,
                )
            )
            pid_counter += 1

    assert len(participants) == exp_json["total_participants_count"]
    return participants


In [5]:
def assign_trust_roles(participants: List[ParticipantProfile], seed: int = 0):
    random.seed(seed)

    by_group = {}
    for p in participants:
        by_group.setdefault(p.demographic_group_no, []).append(p)

    for group_no, group_participants in by_group.items():
        random.shuffle(group_participants)

        half = len(group_participants) // 2
        trustors = group_participants[:half]
        trustees = group_participants[half:]

        for p in trustors:
            p.trust_role = "trustor"
        for p in trustees:
            p.trust_role = "trustee"


In [6]:
import json

with open("experiments_v3.json", "r") as f:
    all_experiments = json.load(f)

EXP = next(
    e for e in all_experiments
    if e["experiment_id"] == "decomposing_trust_2006"
)


participants = build_participants_decomposing(EXP)
assign_trust_roles(participants, seed=42)

from collections import Counter
print("Total:", len(participants))
print("By site:", Counter(p.location for p in participants))
print("Trust roles:", Counter(p.trust_role for p in participants))


Total: 359
By site: Counter({'Capetown, South Africa': 129, 'Moscow, Russia': 118, 'Boston, United States': 112})
Trust roles: Counter({'trustee': 180, 'trustor': 179})


In [7]:
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Literal

In [8]:
@dataclass
class DecomposingTrustState:
    participant: ParticipantProfile
    transcript: List[Dict] = field(default_factory=list)

    # Trust Game ONLY (operational, needed during generation)
    trust_role: Optional[Literal["trustor", "trustee"]] = None
    trust_send_operational: Optional[int] = None             # trustor only
    trust_expect_operational: Optional[int] = None           # trustor belief
    trust_return_schedule_operational: Optional[Dict] = None # trustee only


In [9]:
def init_states(participants: List[ParticipantProfile]) -> List[DecomposingTrustState]:
    states = []
    for p in participants:
        s = DecomposingTrustState(
            participant=p,
            trust_role=p.trust_role
        )
        states.append(s)
    return states


In [10]:
def dictator_prompt(state: DecomposingTrustState) -> str:
    p = state.participant
    return f"""
You are participating in an economics experiment.

About you:
- Location: {p.location}
- Occupation: {p.occupation}

You are given 100 units of money.
You may give any amount between 0 and 100 to another participant.
The other participant cannot respond.

Please:
1) Decide how much you want to give.
2) Briefly explain your reason in one or two sentences.

Respond naturally, as a real participant would.
"""


In [11]:
def triple_dictator_prompt(state: DecomposingTrustState) -> str:
    p = state.participant
    return f"""
You are participating in an economics experiment.

About you:
- Location: {p.location}
- Occupation: {p.occupation}

You are given 100 units of money.
Any amount you give will be tripled before the other participant receives it.
The other participant cannot respond.

Please:
1) Decide how much you want to give.
2) Briefly explain your reason in one or two sentences.

Respond naturally, as a real participant would.
"""



In [12]:
def trustor_prompt(state: DecomposingTrustState) -> str:
    p = state.participant
    return f"""
You are participating in an economics experiment.

About you:
- Location: {p.location}
- Occupation: {p.occupation}

You are Player 1.

You are given 100 units of money.
You may send any amount between 0 and 100 to Player 2.

Any amount you send will be tripled before Player 2 receives it.
Player 2 may later return some amount to you.

Please:
1) Decide how much money you want to send.
2) State how much you expect Player 2 to return to you.
3) Briefly explain your reasoning in one or two sentences.

Respond naturally, as a real participant would.
"""


In [13]:
def trustee_prompt(state: DecomposingTrustState) -> str:
    p = state.participant
    return f"""
You are participating in an economics experiment.

About you:
- Location: {p.location}
- Occupation: {p.occupation}

You are Player 2.

Player 1 may send you any amount between 0 and 100 units.
Any amount sent to you will be tripled before you receive it.

Think carefully about how you would respond to different levels of trust.

For each of the following amounts that Player 1 could send:
10, 20, 30, 40, 50, 60, 70, 80, 90, 100

Please state how much you would return to Player 1 after receiving the tripled amount.

Important:
- Please give a decision for **every amount listed above**.
- You may explain briefly how your returns change as the sent amount increases.
- Make sure none of the listed amounts are skipped.

Respond as a real participant would.
"""



In [14]:
def risk_prompt(state: DecomposingTrustState) -> str:
    p = state.participant
    sure_amounts = [40, 60, 80, 100, 120, 140]

    lines = "\n".join(
        [f"Decision {i+1}: choose GAMBLE or SURE {amt}" for i, amt in enumerate(sure_amounts)]
    )

    return f"""
You are participating in an economics experiment.

About you:
- Location: {p.location}
- Occupation: {p.occupation}

You will make 6 independent decisions.

For each decision, choose ONE option:

GAMBLE:
- 50% chance to receive 300 units
- 50% chance to receive 0 units

SURE:
- Receive the stated amount for sure

{lines}

Important:
- Do NOT explain your choices.
- Give your final decisions only.

Reply with exactly one line:
CHOICES: c1,c2,c3,c4,c5,c6
(each ci must be GAMBLE or SURE)
"""


In [15]:
def run_dictator_game(state: DecomposingTrustState):
    prompt = dictator_prompt(state)
    output = run_llm(prompt)

    state.transcript.append({
        "task": "dictator_game",
        "prompt": prompt,
        "output": output,
        "status": "completed",
    })


In [16]:
def run_triple_dictator_game(state: DecomposingTrustState):
    prompt = triple_dictator_prompt(state)
    output = run_llm(prompt)

    state.transcript.append({
        "task": "triple_dictator_game",
        "prompt": prompt,
        "output": output,
        "status": "completed",
    })


In [18]:
def run_trust_game(state: DecomposingTrustState):
    
    # TRUSTOR
    if state.trust_role == "trustor":
        prompt = trustor_prompt(state)
        output = run_llm(prompt)

        sent = tg_extract_scalar_llm(
        raw_text=output,
        min_value=0,
        max_value=100,
        target="send",
        model=model,
        tokenizer=tok,
    )
    
        expected = tg_extract_scalar_llm(
            raw_text=output,
            min_value=0,
            max_value=100 + (3 * sent if sent is not None else 300),
            target="expect",
            model=model,
            tokenizer=tok,
        )

        state.trust_send_operational = sent
        state.trust_expect_operational = expected

        state.transcript.append({
            "task": "trust_game",
            "role": "trustor",
            "prompt": prompt,
            "output": output,
            "status": "completed",
        })

    # TRUSTEE (strategy method)
    elif state.trust_role == "trustee":
        prompt = trustee_prompt(state)
        output = run_llm(prompt)

        schedule = tg_extract_schedule_llm(
            raw_text=output,
            amounts=[10,20,30,40,50,60,70,80,90,100],
            max_receive_multiplier=3,
            model=model,
            tokenizer=tok,
        )

        state.trust_return_schedule_operational = schedule

        state.transcript.append({
            "task": "trust_game",
            "role": "trustee",
            "prompt": prompt,
            "output": output,
            "status": "completed",
        })


In [19]:
def run_risk_task(state: DecomposingTrustState):
    
    prompt = risk_prompt(state)
    output = run_llm(prompt)

    state.transcript.append({
        "task": "risk_task",
        "prompt": prompt,
        "output": output,
        "status": "completed",
    })


In [20]:
def run_participant(state: DecomposingTrustState):
    
    run_dictator_game(state)
    run_triple_dictator_game(state)
    run_trust_game(state)     
    run_risk_task(state) 
    
    return state


In [21]:
# 5 experiments, dictator, triple dictator, trust game (TG- normal and schedule), and risk task.
# out of these, we need to extract ops in real time only for the TG
# so we will have 3 LLM extractors. 2 for TG and one common to the other 3

def tg_extract_scalar_llm(
    raw_text: str,
    min_value: int,
    max_value: int,
    target: Literal["send", "expect"],
    model,
    tokenizer,
    max_new_tokens: int = 32,
) -> int | None:
    if target == "send":
        task = "the amount the person decided to SEND"
        rule = "Ignore examples or other numbers."
    else:
        task = "the amount the person EXPECTS to RECEIVE BACK"
        rule = "If a range is stated (e.g., 20–30), extract the LOWER bound of the range."
    
    prompt = f"""From the text below, extract {task}.
    
    {rule}
    Return a single integer.
    
    Valid range: {min_value}–{max_value}.
    If unclear, output NONE.
    
    Text:
    \"\"\"{raw_text}\"\"\"
    
    Output exactly:
    AMOUNT: <integer>
    or
    AMOUNT: NONE
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    in_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
        )

    gen = out[0][in_len:]
    ans = tokenizer.decode(gen, skip_special_tokens=True).strip()

    m = re.search(r"AMOUNT:\s*(NONE|\d+)", ans, flags=re.IGNORECASE)
    if not m:
        return None
    v = m.group(1)
    if v.upper() == "NONE":
        return None

    val = int(v)
    return val if min_value <= val <= max_value else None


In [22]:
def tg_extract_schedule_llm(
    raw_text: str,
    amounts: list[int],              # e.g., [10,20,...,100]
    max_receive_multiplier: int,     # 3 in this paper
    model,
    tokenizer,
    max_new_tokens: int = 256,
) -> dict[int, int] | None:
    
    # For each x, trustee can return 0..(3*x)
    ranges = ", ".join([f"{x}:0-{max_receive_multiplier*x}" for x in amounts])

    prompt = f"""From the text below, extract the trustee's return schedule.

We need returns for these sent amounts: {amounts}.
For each sent amount x, return y must be an integer in [0, {max_receive_multiplier}*x].

If the schedule is missing or unclear, output NONE.

Text:
\"\"\"{raw_text}\"\"\"

Output exactly one line:
SCHEDULE: x=y, x=y, ...
or
SCHEDULE: NONE

Valid per-x ranges: {ranges}
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    in_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
        )

    gen = out[0][in_len:]
    ans = tokenizer.decode(gen, skip_special_tokens=True).strip()

    m = re.search(r"SCHEDULE:\s*(.*)", ans, flags=re.IGNORECASE)
    if not m:
        return None
    body = m.group(1).strip()
    if body.upper().startswith("NONE"):
        return None

    # Parse "10=5, 20=10, ..."
    pairs = re.findall(r"(\d+)\s*=\s*(\d+)", body)
    sched = {}
    for k, v in pairs:
        x = int(k); y = int(v)
        sched[x] = y

    # Validate completeness + bounds
    for x in amounts:
        if x not in sched:
            return None
        y = sched[x]
        if not (0 <= y <= max_receive_multiplier * x):
            return None

    return sched


In [23]:
def general_extract_amount_llm(
    raw_text: str,
    min_value: int,
    max_value: int,
    model,
    tokenizer,
) -> int | None:
    prompt = f"""Extract the amount the person says they will give from the text.

Range: {min_value}-{max_value}.
If no clear amount is stated, output NONE.

Text:
\"\"\"{raw_text}\"\"\"

Output:
AMOUNT: <integer>
or
AMOUNT: NONE
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    in_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=32,
            do_sample=False,
        )

    gen = out[0][in_len:]
    ans = tokenizer.decode(gen, skip_special_tokens=True).strip()

    m = re.search(r"AMOUNT:\s*(NONE|\d+)", ans, flags=re.IGNORECASE)
    if not m:
        return None
    v = m.group(1)
    if v.upper() == "NONE":
        return None
    val = int(v)
    return val if (min_value <= val <= max_value) else None


In [24]:
def general_extract_risk_choices_llm(
    raw_text: str,
    n_choices: int,
    model,
    tokenizer,
) -> list[str] | None:
    prompt = f"""From the text below, extract the participant's choices for {n_choices} decisions.
Each choice must be exactly GAMBLE or SURE.
If unclear, output NONE.

Text:
\"\"\"{raw_text}\"\"\"

Output exactly one line:
CHOICES: c1,c2,c3,c4,c5,c6
or
CHOICES: NONE
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    in_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=64,
            do_sample=False,
        )

    gen = out[0][in_len:]
    ans = tokenizer.decode(gen, skip_special_tokens=True).strip()

    m = re.search(r"CHOICES:\s*(.*)", ans, flags=re.IGNORECASE)
    if not m:
        return None
    body = m.group(1).strip()
    if body.upper().startswith("NONE"):
        return None

    parts = [p.strip().upper() for p in body.split(",")]
    if len(parts) != n_choices:
        return None
    if any(p not in ("GAMBLE", "SURE") for p in parts):
        return None
    return parts


In [None]:
model_id = "Qwen/Qwen2.5-7B-Instruct"
tok = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_8bit=True,
    device_map="auto",
    dtype=torch.float16,
    offload_folder="offload"
)

model.eval()

In [29]:
def run_llm(prompt: str, max_new_tokens: int = 256) -> str:
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    input_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.3,
            top_p=0.9,
        )

    gen_tokens = output[0][input_len:]
    text = tok.decode(gen_tokens, skip_special_tokens=True).strip()

    #print("-" * 50)
    #print("RAW OUTPUT:\n", text)
    return text


In [None]:
smoke_states = []
states = init_states(participants)

for s in states:
    run_participant(s)

    #print(s.participant.pid, s.trust_role)
    for t in s.transcript:
        print("  ", t["task"], t["status"])

    smoke_states.append(asdict(s))


   dictator_game completed
   triple_dictator_game completed
   trust_game completed
   risk_task completed


In [None]:
with open("decomposing_trust.json", "w") as f:
    json.dump(smoke_states, f, indent=2)