In [25]:
import pandas as pd

# === Config ===
API_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "llama3:instruct"
OUTPUT_FILE = "agents_llama3.csv"

# === Load data ===
backgrounds = pd.read_csv("agent_backgrounds.csv")
topics = pd.read_csv("topics.csv", encoding="utf-8-sig")  

# === Utility ===
def call_ollama(prompt, model=MODEL_NAME, stream=False):
    """Send prompt to local Ollama and return plain text response."""
    try:
        res = requests.post(API_URL, json={"model": model, "prompt": prompt, "stream": stream})
        data = res.json()
        if isinstance(data, dict):
            return data.get("response") or data.get("data", {}).get("response", "") or str(data)
        else:
            return str(data)
    except Exception as e:
        return f"[Error: {e}]"

# === Main logic ===
def run_agent(bg, topic_row):
    """Generate two-step dialogue for one agent."""
    identity = f"You are {bg['age']} years old, {bg['gender']}, from {bg['region']}, working as a {bg['occupation']}."
    statement = topic_row["statement"]

    # Step 1 — preference reasoning
    prompt1 = f"""{identity}
Statement: "{statement}"
As this specific person, explain your personal position toward the statement with EXACTLY three concise reasons (2–3 sentences total). 
Base your opinion on your background, job, and cultural context.
Then rate how much you agree or disagree with it on a scale from 1 to 5 
(1 = strongly disagree, 3 = neutral or mixed feelings, 5 = strongly agree),
and report this as your "Preference score".
Format:
Reason 1: ...
Reason 2: ...
Reason 3: ...
Preference score: #"""
    response1 = call_ollama(prompt1).strip()

    # Step 2 — empathy assessment
    prompt2 = f"""{identity}
Now rate yourself 1–5 for each of the following:
E1. I try to understand other people’s perspectives when we disagree.
E2. I can notice when someone feels uncomfortable even if unspoken.
E3. I respond politely and thoughtfully to others’ feelings.
E4. I consider how my words may affect others before speaking.
E5. I feel motivated to help others when they’re struggling.
Output format:
E1: #
E2: #
E3: #
E4: #
E5: #"""
    response2 = call_ollama(prompt2).strip()

    return {
        "topic_id": topic_row["topic_id"],
        "statement": statement,
        "age": bg["age"],
        "gender": bg["gender"],
        "region": bg["region"],
        "occupation": bg["occupation"],
        "Preference_Response": response1,
        "Empathy_Response": response2
    }

# === Generate agents with resume ===
def generate_agents(topic_agent_counts):
    """Generate agents with resume support."""
    # Load existing results if available
    if os.path.exists(OUTPUT_FILE):
        df_existing = pd.read_csv(OUTPUT_FILE)
        print(f"🔄 Loaded existing {len(df_existing)} records from {OUTPUT_FILE}")
    else:
        # ✅ Create an empty dataframe with all columns
        df_existing = pd.DataFrame(columns=[
            "topic_id", "statement", "age", "gender", "region", "occupation",
            "Preference_Response", "Empathy_Response"
        ])
        print("🆕 No existing file found — starting fresh.")

    new_rows = []

    for _, topic_row in topics.iterrows():
        topic_id = topic_row["topic_id"]
        n_agents = topic_agent_counts.get(topic_id, 0)
        if n_agents <= 0:
            continue

        # Count how many already exist for this topic
        done_count = len(df_existing[df_existing["topic_id"] == topic_id])
        if done_count >= n_agents:
            print(f"✅ Topic {topic_id} already has {done_count}/{n_agents} agents — skipping.")
            continue

        print(f"\n--- Generating {n_agents - done_count} remaining agents for: {topic_row['statement']} ---")

        for i in range(done_count, n_agents):
            bg = backgrounds.sample(n=1).iloc[0]
            record = run_agent(bg, topic_row)
            new_rows.append(record)
            print(f"✓ Agent {i+1}/{n_agents} done for topic {topic_id}")

            # ✅ Save progress every 5 agents or at the end
            if (len(new_rows) % 5 == 0) or (i == n_agents - 1):
                df_existing = pd.concat([df_existing, pd.DataFrame(new_rows)], ignore_index=True)
                df_existing.to_csv(OUTPUT_FILE, index=False)
                new_rows = []
                print(f"💾 Progress saved ({len(df_existing)} total records).")

            time.sleep(0.5)

    return df_existing

# === Run ===
topic_agent_counts = {1: 30, 2: 30, 3: 30, 4: 30, 5: 30, 6: 30}
df_final = generate_agents(topic_agent_counts)
print(f"\n✅ All done. Total {len(df_final)} records saved to {OUTPUT_FILE}.")

🔄 Loaded existing 110 records from agents_llama3.csv
✅ Topic 1 already has 30/30 agents — skipping.
✅ Topic 2 already has 30/30 agents — skipping.
✅ Topic 3 already has 30/30 agents — skipping.

--- Generating 10 remaining agents for: Seat belt use can save lives in car accidents ---
✓ Agent 21/30 done for topic 4
✓ Agent 22/30 done for topic 4
✓ Agent 23/30 done for topic 4
✓ Agent 24/30 done for topic 4
✓ Agent 25/30 done for topic 4
💾 Progress saved (115 total records).
✓ Agent 26/30 done for topic 4
✓ Agent 27/30 done for topic 4
✓ Agent 28/30 done for topic 4
✓ Agent 29/30 done for topic 4
✓ Agent 30/30 done for topic 4
💾 Progress saved (120 total records).

--- Generating 30 remaining agents for: Every child should have access to education ---
✓ Agent 1/30 done for topic 5
✓ Agent 2/30 done for topic 5
✓ Agent 3/30 done for topic 5
✓ Agent 4/30 done for topic 5
✓ Agent 5/30 done for topic 5
💾 Progress saved (125 total records).
✓ Agent 6/30 done for topic 5
✓ Agent 7/30 done for 

In [27]:
import pandas as pd
import requests
import time
import os

# === Config ===
API_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "qwen2.5:7b"
OUTPUT_FILE = "agents_qwen.csv"

# === Load data ===
backgrounds = pd.read_csv("agent_backgrounds.csv")
topics = pd.read_csv("topics.csv", encoding="utf-8-sig")  # handle BOM safely

# === Utility ===
def call_ollama(prompt, model=MODEL_NAME, stream=False, max_retries=3):
    """Send prompt to local Ollama and return plain text response."""
    for attempt in range(max_retries):
        try:
            res = requests.post(API_URL, json={
                "model": model,
                "prompt": prompt,
                "stream": stream,
                "options": {"temperature": 0.7, "top_p": 0.9}
            })
            res.raise_for_status()
            data = res.json()
            if isinstance(data, dict):
                return data.get("response") or data.get("data", {}).get("response", "") or str(data)
            else:
                return str(data)
        except Exception as e:
            print(f"[Retry {attempt+1}/{max_retries}] Error calling model: {e}")
            time.sleep(2)
    return "[Error: Model failed after retries]"

# === Main logic ===
def run_agent(bg, topic_row):
    """Generate two-step dialogue for one agent."""
    identity = f"You are {bg['age']} years old, {bg['gender']}, from {bg['region']}, working as a {bg['occupation']}."
    statement = topic_row["statement"]

    # Step 1 — preference reasoning
    prompt1 = f"""{identity}
Statement: "{statement}"
As this specific person, explain your personal position toward the statement with EXACTLY three concise reasons (2–3 sentences total). 
Base your opinion on your background, job, and cultural context.
Then rate how much you agree or disagree with it on a scale from 1 to 5 
(1 = strongly disagree, 3 = neutral or mixed feelings, 5 = strongly agree),
and report this as your "Preference score".
Format:
Reason 1: ...
Reason 2: ...
Reason 3: ...
Preference score: #"""
    response1 = call_ollama(prompt1).strip()

    # Step 2 — empathy assessment
    prompt2 = f"""{identity}
Now rate yourself 1–5 for each of the following:
E1. I try to understand other people’s perspectives when we disagree.
E2. I can notice when someone feels uncomfortable even if unspoken.
E3. I respond politely and thoughtfully to others’ feelings.
E4. I consider how my words may affect others before speaking.
E5. I feel motivated to help others when they’re struggling.
Output format:
E1: #
E2: #
E3: #
E4: #
E5: #"""
    response2 = call_ollama(prompt2).strip()

    return {
        "topic_id": topic_row["topic_id"],
        "statement": statement,
        "age": bg["age"],
        "gender": bg["gender"],
        "region": bg["region"],
        "occupation": bg["occupation"],
        "Preference_Response": response1,
        "Empathy_Response": response2
    }

# === Controlled generation ===
def generate_agents(topic_agent_counts):
    """Generate agents with progress saving."""
    if os.path.exists(OUTPUT_FILE):
        df_existing = pd.read_csv(OUTPUT_FILE)
        print(f"🔄 Loaded existing {len(df_existing)} records from {OUTPUT_FILE}")
    else:
        df_existing = pd.DataFrame(columns=[
            "topic_id", "statement", "age", "gender", "region", "occupation",
            "Preference_Response", "Empathy_Response"
        ])
        print("🆕 No existing file found — starting fresh.")

    new_rows = []

    for _, topic_row in topics.iterrows():
        topic_id = topic_row["topic_id"]
        n_agents = topic_agent_counts.get(topic_id, 0)
        if n_agents <= 0:
            continue

        done_count = len(df_existing[df_existing["topic_id"] == topic_id])
        if done_count >= n_agents:
            print(f"✅ Topic {topic_id} already has {done_count}/{n_agents} agents — skipping.")
            continue

        print(f"\n--- Generating {n_agents - done_count} remaining agents for: {topic_row['statement']} ---")

        for i in range(done_count, n_agents):
            bg = backgrounds.sample(n=1).iloc[0]
            record = run_agent(bg, topic_row)
            new_rows.append(record)
            print(f"✓ Agent {i+1}/{n_agents} done for topic {topic_id}")

            # save progress every 5 agents
            if (len(new_rows) % 5 == 0) or (i == n_agents - 1):
                df_existing = pd.concat([df_existing, pd.DataFrame(new_rows)], ignore_index=True)
                df_existing.to_csv(OUTPUT_FILE, index=False)
                new_rows = []
                print(f"💾 Progress saved ({len(df_existing)} total records).")

            time.sleep(0.5)

    return df_existing

# === Run ===
topic_agent_counts = {1: 30, 2: 30, 3: 30, 4: 30, 5: 30, 6: 30}
df_final = generate_agents(topic_agent_counts)
print(f"\n✅ All done. Total {len(df_final)} records saved to {OUTPUT_FILE}.")

🆕 No existing file found — starting fresh.

--- Generating 30 remaining agents for: Citizen privacy takes precedence over national security ---
✓ Agent 1/30 done for topic 1
✓ Agent 2/30 done for topic 1
✓ Agent 3/30 done for topic 1
✓ Agent 4/30 done for topic 1
✓ Agent 5/30 done for topic 1
💾 Progress saved (5 total records).
✓ Agent 6/30 done for topic 1
✓ Agent 7/30 done for topic 1
✓ Agent 8/30 done for topic 1
✓ Agent 9/30 done for topic 1
✓ Agent 10/30 done for topic 1
💾 Progress saved (10 total records).
✓ Agent 11/30 done for topic 1
✓ Agent 12/30 done for topic 1
✓ Agent 13/30 done for topic 1
✓ Agent 14/30 done for topic 1
✓ Agent 15/30 done for topic 1
💾 Progress saved (15 total records).
✓ Agent 16/30 done for topic 1
✓ Agent 17/30 done for topic 1
✓ Agent 18/30 done for topic 1
✓ Agent 19/30 done for topic 1
✓ Agent 20/30 done for topic 1
💾 Progress saved (20 total records).
✓ Agent 21/30 done for topic 1
✓ Agent 22/30 done for topic 1
✓ Agent 23/30 done for topic 1
✓ Age