In [20]:
import os, json, re, ast
import pandas as pd
import requests
from tqdm import tqdm

JSON_DIR = "."
CONTROLLED_CSV = "controlled_agents_llama3.csv"
OUT_CSV = "llama3_qwen_agreement_noid.csv"

USE_OLLAMA = True
OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "qwen2.5:7b"


json_files = sorted([f for f in os.listdir(JSON_DIR) if f.startswith("topic_") and f.endswith(".jsonl")])
print(f"✅ Found {len(json_files)} JSONL files:", json_files)

controlled = pd.read_csv(CONTROLLED_CSV, encoding="utf-8")
print("✅ controlled_agents_llama3.csv columns:", controlled.columns.tolist())

def get_qwen_agreement(text_a, text_b, context="preference"):
    prompt = f"""
You are an impartial evaluator.
Rate how much these two people agree with each other (1–5)
based on their {context} statements.

1 = totally disagree
5 = totally agree

A: {text_a}
B: {text_b}

Please output ONLY the number (1–5).
"""
    try:
        r = requests.post(
            OLLAMA_URL,
            json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
            timeout=90
        )
        data = r.json()
        text = data.get("response", "")
        match = re.search(r"[1-5]", text)
        return int(match.group()) if match else None
    except Exception as e:
        print("⚠️ Error:", e)
        return None

results = []

def safe_get_agent(obj):
    if isinstance(obj, dict):
        return obj.get("A", {}), obj.get("B", {})
    elif isinstance(obj, list) and len(obj) >= 2:
        return obj[0], obj[1]
    return {}, {}

for jf in tqdm(json_files, desc="📂 Reading JSONL"):
    with open(os.path.join(JSON_DIR, jf), "r", encoding="utf-8") as f:
        for line in f:
            try:
                d = json.loads(line)
            except:
                continue

            topic_id = d.get("topic_id")
            agents = d.get("agents", [])
            A, B = safe_get_agent(agents)

            A_pref = A.get("pref")
            B_pref = B.get("pref")

            def find_pref_response(agent):
                cond = (
                    (controlled["topic_id"] == topic_id) &
                    (controlled["age"] == pd.to_numeric(agent.get("age"), errors="coerce")) &
                    (controlled["gender"] == agent.get("gender")) &
                    (controlled["region"] == agent.get("region")) &
                    (controlled["occupation"] == agent.get("occupation"))
                )
                row = controlled[cond]
                return str(row.iloc[0]["Preference_Response"]) if len(row) > 0 else None

            A_pref_response = find_pref_response(A)
            B_pref_response = find_pref_response(B)

            rounds = d.get("rounds", [])
            A_post = rounds[-1].get("A", "") if rounds else ""
            B_post = rounds[-1].get("B", "") if rounds else ""

            pre_score = get_qwen_agreement(A_pref_response or "", B_pref_response or "", context="preference")
            post_score = get_qwen_agreement(A_post, B_post, context="post-debate reflection")

            results.append({
                "topic_id": topic_id,
                "A_pref": A_pref,
                "B_pref": B_pref,
                "A_pref_response": A_pref_response,
                "B_pref_response": B_pref_response,
                "pre_agreement": pre_score,
                "post_agreement": post_score
            })

out_df = pd.DataFrame(results)
cols = [
    "topic_id", "A_pref", "B_pref",
    "A_pref_response", "B_pref_response",
    "pre_agreement", "post_agreement"
]
out_df = out_df[cols]

out_df.to_csv(OUT_CSV, index=False, encoding="utf-8-sig")
print(f"\n✅ Done! Saved to '{OUT_CSV}' ({len(out_df)} rows)")
out_df.head(10)

✅ Found 6 JSONL files: ['topic_1_llama3.1.jsonl', 'topic_2_llama3.1.jsonl', 'topic_3_llama3.1.jsonl', 'topic_4_llama3.1.jsonl', 'topic_5llama3.1.jsonl', 'topic_6_llama3.1.jsonl']
✅ controlled_agents_llama3.csv columns: ['topic_id', 'statement', 'age', 'gender', 'region', 'occupation', 'topic_preference', 'Preference_Response', 'Empathy_Response']


📂 Reading JSONL: 100%|███████████████████████████| 6/6 [02:18<00:00, 23.15s/it]


✅ Done! Saved to 'llama3_qwen_agreement_noid.csv' (90 rows)





Unnamed: 0,topic_id,A_pref,B_pref,A_pref_response,B_pref_response,pre_agreement,post_agreement
0,1,1,5,,,2,2
1,1,1,5,,,3,3
2,1,1,5,,,3,2
3,1,1,5,,,3,3
4,1,1,5,,,4,3
5,1,2,4,,,3,2
6,1,2,4,,,3,2
7,1,2,4,,,3,3
8,1,2,4,,,4,2
9,1,2,4,,,4,3


In [21]:
import pandas as pd

# 讀你的控制檔
controlled = pd.read_csv("controlled_agents_llama3.csv")
print("Controlled columns:", controlled.columns.tolist())

# 看 topic_id=1 的 agents 長什麼樣子
print("\n=== controlled_agents_llama3.csv (topic_id=1) ===")
print(controlled[controlled["topic_id"]==1][["age","gender","region","occupation","topic_preference"]].head(10))

Controlled columns: ['topic_id', 'statement', 'age', 'gender', 'region', 'occupation', 'topic_preference', 'Preference_Response', 'Empathy_Response']

=== controlled_agents_llama3.csv (topic_id=1) ===
   age  gender        region          occupation  topic_preference
0   44  Female         Italy        Psychologist                 1
1   25    Male  South Africa      Police officer                 1
2   28    Male         Japan       Social worker                 1
3   46    Male       Germany  University student                 1
4   36    Male   South Korea           Architect                 1
5   63    Male            UK          IT manager                 1
6   45  Female         Egypt       Civil servant                 2
7   63    Male        France      Police officer                 2
8   25    Male  South Africa      Police officer                 2
9   28    Male         Japan       Social worker                 2


In [22]:
import json

with open("topic_1_llama3.1.jsonl", "r", encoding="utf-8") as f:
    for i, line in enumerate(f):
        if i >= 1:
            break
        data = json.loads(line)
        print(json.dumps(data["agents"], indent=2))

[
  {
    "id": 3,
    "occupation": "University student",
    "region": "Germany",
    "pref": 1,
    "gender": "Male"
  },
  {
    "id": 24,
    "occupation": "Social worker",
    "region": "Singapore",
    "pref": 5,
    "gender": "Male"
  }
]


In [1]:
import os
import json
import re
import pandas as pd
import requests
from tqdm import tqdm

# === 基本設定 ===
JSON_DIR = "."
CONTROL_FILE = "controlled_agents_llama3.csv"
OUTPUT_FILE = "llama3_qwen_agreement_matched.csv"

OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "qwen2.5:7b"

# === Qwen 打分函式 ===
def get_qwen_agreement(text_a, text_b, context="preference"):
    prompt = f"""
You are an impartial evaluator.
Rate how much these two people agree with each other (1–5)
based on their {context} statements.

1 = totally disagree
5 = totally agree

A: {text_a}
B: {text_b}

Please output ONLY a single number (1–5).
"""
    try:
        r = requests.post(
            OLLAMA_URL,
            json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
            timeout=90
        )
        data = r.json()
        text = data.get("response", "")
        match = re.search(r"[1-5]", text)
        return int(match.group()) if match else None
    except Exception as e:
        print("⚠️ Error:", e)
        return None

# === 載入 CSV ===
controlled = pd.read_csv(CONTROL_FILE, encoding="utf-8")
json_files = sorted([f for f in os.listdir(JSON_DIR) if f.startswith("topic_") and f.endswith(".jsonl")])
print(f"✅ Found {len(json_files)} JSONL files:", json_files)

# === 建立配對函式 ===
def find_pref_response(agent, topic_id):
    region = str(agent.get("region", "")).strip().lower()
    occ = str(agent.get("occupation", "")).strip().lower()
    gender = str(agent.get("gender", "")).strip().lower()
    age = agent.get("age", None)
    pref = agent.get("pref", None)

    # 先用 region + occupation + gender + topic_id
    cond = (
        (controlled["topic_id"] == topic_id) &
        (controlled["region"].str.lower() == region) &
        (controlled["occupation"].str.lower() == occ) &
        (controlled["gender"].str.lower() == gender)
    )

    candidates = controlled[cond]

    # 如果 JSON 有 age，用來進一步過濾
    if age is not None and len(candidates) > 1:
        candidates = candidates[candidates["age"] == age]

    # 如果還是多筆，就選擇 topic_preference 最接近 pref 的
    if len(candidates) > 1 and pref is not None:
        candidates["pref_diff"] = abs(candidates["topic_preference"] - int(pref))
        candidates = candidates.sort_values(by="pref_diff").head(1)

    if len(candidates) > 0:
        return str(candidates.iloc[0]["Preference_Response"]), "exact"
    else:
        return None, "missing"

# === 主流程 ===
results = []

for jf in tqdm(json_files, desc="📂 Processing topics"):
    with open(os.path.join(JSON_DIR, jf), "r", encoding="utf-8") as f:
        for line in f:
            try:
                d = json.loads(line)
                topic_id = d.get("topic_id")
                agents = d.get("agents", [])
                if len(agents) < 2:
                    continue

                A = agents[0]
                B = agents[1]
                A_pref = A.get("pref")
                B_pref = B.get("pref")

                # 找出各自對應的 Preference_Response
                A_resp, A_src = find_pref_response(A, topic_id)
                B_resp, B_src = find_pref_response(B, topic_id)

                # 抓最後一輪作為 post reflection
                rounds = d.get("rounds", [])
                A_post = rounds[-1].get("A", "") if rounds else ""
                B_post = rounds[-1].get("B", "") if rounds else ""

                # 打 pre/post 分數
                pre_score = get_qwen_agreement(A_resp or "", B_resp or "", context="preference")
                post_score = get_qwen_agreement(A_post, B_post, context="post-debate reflection")

                results.append({
                    "topic_id": topic_id,
                    "A_pref": A_pref,
                    "B_pref": B_pref,
                    "A_region": A.get("region"),
                    "B_region": B.get("region"),
                    "A_occupation": A.get("occupation"),
                    "B_occupation": B.get("occupation"),
                    "A_pref_response": A_resp,
                    "B_pref_response": B_resp,
                    "A_match_source": A_src,
                    "B_match_source": B_src,
                    "pre_agreement": pre_score,
                    "post_agreement": post_score
                })

            except Exception as e:
                print(f"⚠️ Error in {jf}: {e}")
                continue

# === 輸出結果 ===
out_df = pd.DataFrame(results)
cols = [
    "topic_id", "A_pref", "B_pref",
    "A_region", "B_region",
    "A_occupation", "B_occupation",
    "A_pref_response", "B_pref_response",
    "A_match_source", "B_match_source",
    "pre_agreement", "post_agreement"
]
out_df = out_df[cols]
out_df.to_csv(OUTPUT_FILE, index=False, encoding="utf-8-sig")

print(f"\n✅ Done! Saved to '{OUTPUT_FILE}' ({len(out_df)} rows)")
out_df.head(10)

✅ Found 6 JSONL files: ['topic_1_llama3.1.jsonl', 'topic_2_llama3.1.jsonl', 'topic_3_llama3.1.jsonl', 'topic_4_llama3.1.jsonl', 'topic_5llama3.1.jsonl', 'topic_6_llama3.1.jsonl']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidates["pref_diff"] = abs(candidates["topic_preference"] - int(pref))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidates["pref_diff"] = abs(candidates["topic_preference"] - int(pref))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidates["pref_diff"] = abs(candidates["topic_preferenc


✅ Done! Saved to 'llama3_qwen_agreement_matched.csv' (90 rows)





Unnamed: 0,topic_id,A_pref,B_pref,A_region,B_region,A_occupation,B_occupation,A_pref_response,B_pref_response,A_match_source,B_match_source,pre_agreement,post_agreement
0,1,1,5,Germany,Singapore,University student,Social worker,"Reason 1: As a university student, I have witn...","Reason 1: As a social worker, I have witnessed...",exact,exact,1,2
1,1,1,5,Japan,Singapore,Social worker,Social worker,"Reason 1: In my experience as a social worker,...","Reason 1: As a social worker, I have witnessed...",exact,exact,4,3
2,1,1,5,South Africa,USA,Police officer,Musician,====================================\n\nReason...,"Reason 1: As a musician, I'm accustomed to a c...",exact,exact,4,1
3,1,1,5,UK,Germany,IT manager,Social worker,"Reason 1: As an IT manager, I'm deeply concern...","Reason 1: As a Social worker, I have seen firs...",exact,exact,1,3
4,1,1,5,Germany,Argentina,University student,Marketing manager,"Reason 1: As a university student, I have witn...","Reason 1: In Argentina, where I grew up, there...",exact,exact,1,3
5,1,2,4,South Africa,Kenya,Police officer,Data analyst,"Reason 1: As a Police officer, I've seen first...","Reason 1: As a Data analyst, I value data accu...",exact,exact,4,2
6,1,2,4,Singapore,Kenya,Social worker,Data analyst,"Reason 1: As a social worker, I've seen firsth...","Reason 1: As a Data analyst, I value data accu...",exact,exact,3,2
7,1,2,4,Egypt,Japan,Civil servant,Entrepreneur,"Reason 1: As a civil servant in Egypt, I am tr...","Reason 1: As a Japanese, I grew up with the co...",exact,exact,4,3
8,1,2,4,France,Kenya,Police officer,Data analyst,"Reason 1: As a Police officer, I believe in up...","Reason 1: As a Data analyst, I value data accu...",exact,exact,3,1
9,1,2,4,Egypt,Poland,Civil servant,Graphic designer,"Reason 1: As a civil servant in Egypt, I am tr...","Reason 1: As a graphic designer, I value indiv...",exact,exact,3,3


In [2]:
import os
import json
import re
import pandas as pd
import requests
import warnings
from tqdm import tqdm

warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)

# === 基本設定 ===
JSON_DIR = "."
CONTROL_FILE = "controlled_agents_llama3.csv"
OUTPUT_FILE = "llama3_qwen_agreement_final.csv"

OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "qwen2.5:7b"

# === Qwen 打分函式 ===
def get_qwen_agreement(text_a, text_b, context="preference"):
    prompt = f"""
You are an impartial evaluator.
Rate how much these two people agree with each other (1–5)
based on their {context} statements.

1 = totally disagree
5 = totally agree

A: {text_a}
B: {text_b}

Please output ONLY a single number (1–5).
"""
    try:
        r = requests.post(
            OLLAMA_URL,
            json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
            timeout=90
        )
        data = r.json()
        text = data.get("response", "")
        match = re.search(r"[1-5]", text)
        return int(match.group()) if match else None
    except Exception as e:
        print("⚠️ Error:", e)
        return None

# === 載入 CSV ===
controlled = pd.read_csv(CONTROL_FILE, encoding="utf-8")
json_files = sorted([f for f in os.listdir(JSON_DIR) if f.startswith("topic_") and f.endswith(".jsonl")])
print(f"✅ Found {len(json_files)} JSONL files:", json_files)

# === 對應函式 ===
def find_pref_response(agent, topic_id):
    region = str(agent.get("region", "")).strip().lower()
    occ = str(agent.get("occupation", "")).strip().lower()
    gender = str(agent.get("gender", "")).strip().lower()
    age = agent.get("age", None)
    pref = agent.get("pref", None)

    cond = (
        (controlled["topic_id"] == topic_id) &
        (controlled["region"].str.lower() == region) &
        (controlled["occupation"].str.lower() == occ) &
        (controlled["gender"].str.lower() == gender)
    )

    candidates = controlled[cond]

    if age is not None and len(candidates) > 1:
        candidates = candidates[candidates["age"] == age]

    if len(candidates) > 1 and pref is not None:
        candidates = candidates.copy()
        candidates["pref_diff"] = abs(candidates["topic_preference"] - int(pref))
        candidates = candidates.sort_values(by="pref_diff").head(1)

    if len(candidates) > 0:
        return str(candidates.iloc[0]["Preference_Response"]), "exact"
    else:
        return None, "missing"

# === 主流程 ===
results = []

for jf in tqdm(json_files, desc="📂 Processing topics"):
    with open(os.path.join(JSON_DIR, jf), "r", encoding="utf-8") as f:
        for line in f:
            try:
                d = json.loads(line)
                topic_id = d.get("topic_id")
                agents = d.get("agents", [])
                if len(agents) < 2:
                    continue

                A = agents[0]
                B = agents[1]
                A_pref = A.get("pref")
                B_pref = B.get("pref")

                A_resp, A_src = find_pref_response(A, topic_id)
                B_resp, B_src = find_pref_response(B, topic_id)

                rounds = d.get("rounds", [])
                A_post = rounds[-1].get("A", "") if rounds else ""
                B_post = rounds[-1].get("B", "") if rounds else ""

                pre_score = get_qwen_agreement(A_resp or "", B_resp or "", context="preference")
                post_score = get_qwen_agreement(A_post, B_post, context="post-debate reflection")
                delta = (post_score - pre_score) if (pre_score is not None and post_score is not None) else None

                results.append({
                    "topic_id": topic_id,
                    "A_pref": A_pref,
                    "B_pref": B_pref,
                    "A_region": A.get("region"),
                    "B_region": B.get("region"),
                    "A_occupation": A.get("occupation"),
                    "B_occupation": B.get("occupation"),
                    "A_pref_response": A_resp,
                    "B_pref_response": B_resp,
                    "A_match_source": A_src,
                    "B_match_source": B_src,
                    "pre_agreement": pre_score,
                    "post_agreement": post_score,
                    "Δagreement": delta
                })

            except Exception as e:
                print(f"⚠️ Error in {jf}: {e}")
                continue

# === 輸出結果 ===
out_df = pd.DataFrame(results)
cols = [
    "topic_id", "A_pref", "B_pref",
    "A_region", "B_region",
    "A_occupation", "B_occupation",
    "A_pref_response", "B_pref_response",
    "A_match_source", "B_match_source",
    "pre_agreement", "post_agreement", "Δagreement"
]
out_df = out_df[cols]
out_df.to_csv(OUTPUT_FILE, index=False, encoding="utf-8-sig")

print(f"\n✅ Done! Saved to '{OUTPUT_FILE}' ({len(out_df)} rows)")
out_df.head(10)

✅ Found 6 JSONL files: ['topic_1_llama3.1.jsonl', 'topic_2_llama3.1.jsonl', 'topic_3_llama3.1.jsonl', 'topic_4_llama3.1.jsonl', 'topic_5llama3.1.jsonl', 'topic_6_llama3.1.jsonl']


📂 Processing topics: 100%|███████████████████████| 6/6 [06:32<00:00, 65.44s/it]


✅ Done! Saved to 'llama3_qwen_agreement_final.csv' (90 rows)





Unnamed: 0,topic_id,A_pref,B_pref,A_region,B_region,A_occupation,B_occupation,A_pref_response,B_pref_response,A_match_source,B_match_source,pre_agreement,post_agreement,Δagreement
0,1,1,5,Germany,Singapore,University student,Social worker,"Reason 1: As a university student, I have witn...","Reason 1: As a social worker, I have witnessed...",exact,exact,1,1,0
1,1,1,5,Japan,Singapore,Social worker,Social worker,"Reason 1: In my experience as a social worker,...","Reason 1: As a social worker, I have witnessed...",exact,exact,5,3,-2
2,1,1,5,South Africa,USA,Police officer,Musician,====================================\n\nReason...,"Reason 1: As a musician, I'm accustomed to a c...",exact,exact,5,1,-4
3,1,1,5,UK,Germany,IT manager,Social worker,"Reason 1: As an IT manager, I'm deeply concern...","Reason 1: As a Social worker, I have seen firs...",exact,exact,1,3,2
4,1,1,5,Germany,Argentina,University student,Marketing manager,"Reason 1: As a university student, I have witn...","Reason 1: In Argentina, where I grew up, there...",exact,exact,1,3,2
5,1,2,4,South Africa,Kenya,Police officer,Data analyst,"Reason 1: As a Police officer, I've seen first...","Reason 1: As a Data analyst, I value data accu...",exact,exact,4,1,-3
6,1,2,4,Singapore,Kenya,Social worker,Data analyst,"Reason 1: As a social worker, I've seen firsth...","Reason 1: As a Data analyst, I value data accu...",exact,exact,3,2,-1
7,1,2,4,Egypt,Japan,Civil servant,Entrepreneur,"Reason 1: As a civil servant in Egypt, I am tr...","Reason 1: As a Japanese, I grew up with the co...",exact,exact,4,4,0
8,1,2,4,France,Kenya,Police officer,Data analyst,"Reason 1: As a Police officer, I believe in up...","Reason 1: As a Data analyst, I value data accu...",exact,exact,2,1,-1
9,1,2,4,Egypt,Poland,Civil servant,Graphic designer,"Reason 1: As a civil servant in Egypt, I am tr...","Reason 1: As a graphic designer, I value indiv...",exact,exact,3,2,-1
