In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)
    print("Torch version:", torch.__version__)

CUDA available: True
GPU name: NVIDIA A100 80GB PCIe
CUDA version: 12.8
Torch version: 2.8.0+cu128


In [2]:
%%capture
%pip install vllm # note that vllm also installs many dependencies such as transformers, torch, pydantic etc.
%pip install seaborn

In [3]:
!pip install --upgrade huggingface_hub transformers accelerate safetensors sentencepiece

Defaulting to user installation because normal site-packages is not writeable


In [None]:
from huggingface_hub import login
login("token")

In [2]:
from huggingface_hub import whoami
print(whoami())

{'type': 'user', 'id': '67dfef658fce6274d6a0fe73', 'name': 'Nanchen1', 'fullname': 'Chen', 'isPro': False, 'avatarUrl': 'https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/s-Scr2y72r-rSTNBYO6wc.png', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'llm1017', 'role': 'fineGrained', 'createdAt': '2025-10-17T15:42:28.210Z', 'fineGrained': {'canReadGatedRepos': True, 'global': [], 'scoped': [{'entity': {'_id': '67dfef658fce6274d6a0fe73', 'type': 'user', 'name': 'Nanchen1'}, 'permissions': ['repo.content.read', 'repo.write']}]}}}}


In [7]:
import pandas as pd
import random

N = 30  # Target number of background profiles to generate

# Gender pool
genders = ["Male", "Female"]

# Region pool grouped by continents (for diversity)
regions = {
    "Europe": ["Germany", "UK", "France", "Italy", "Spain", "Sweden", "Poland"],
    "Asia": ["China", "Japan", "India", "South Korea", "Singapore", "Indonesia"],
    "Americas": ["USA", "Canada", "Brazil", "Mexico", "Argentina"],
    "Africa": ["Nigeria", "Egypt", "South Africa", "Kenya"],
    "Oceania": ["Australia", "New Zealand"]
}

# Occupation pool grouped by broad categories
occupations = {
    "Education": ["Teacher", "University student", "Researcher", "Librarian"],
    "Healthcare": ["Doctor", "Nurse", "Psychologist", "Pharmacist"],
    "Technology": ["Software engineer", "Data analyst", "IT manager", "UX designer"],
    "Business": ["Entrepreneur", "Accountant", "Marketing manager", "Salesperson"],
    "Creative": ["Writer", "Graphic designer", "Musician", "Architect"],
    "Public Service": ["Civil servant", "Social worker", "Lawyer", "Police officer"]
}

# Function to generate a random age with weighted distribution
def random_age():
    """
    Randomly generate an age with a realistic weighted distribution:
    - 20–30 years old: 30% (young adults)
    - 31–50 years old: 50% (middle-aged adults)
    - 51–65 years old: 20% (older adults)
    """
    age_groups = [(20, 30, 0.3), (31, 50, 0.5), (51, 65, 0.2)]
    group = random.choices(age_groups, weights=[g[2] for g in age_groups])[0]
    return random.randint(group[0], group[1])

# Function to generate one random background profile
def random_background(i):
    """
    Randomly sample demographic attributes from predefined pools.
    Returns a dictionary containing:
    - Age
    - Gender
    - Region
    - Occupation
    - Continent
    - Occupation category
    """
    gender = random.choice(genders)
    continent = random.choice(list(regions.keys()))
    region = random.choice(regions[continent])
    occ_category = random.choice(list(occupations.keys()))
    occupation = random.choice(occupations[occ_category])
    age = random_age()

    return {
        "id": i + 1,
        "age": age,
        "gender": gender,
        "region": region,
        "occupation": occupation,
        "continent": continent,
        "occupation_category": occ_category
    }

# Generate N random profiles
backgrounds = [random_background(i) for i in range(N)]
df = pd.DataFrame(backgrounds)

# Remove duplicates (if any)
df = df.drop_duplicates(subset=["age", "gender", "region", "occupation"])
df.to_csv("agent_backgrounds.csv", index=False)
print(f" Saved {len(df)} background profiles to agent_backgrounds.csv")
print(df.head(10))

 Saved 30 background profiles to agent_backgrounds.csv
   id  age  gender        region        occupation continent  \
0   1   60  Female     Singapore     Social worker      Asia   
1   2   50  Female        Canada        Accountant  Americas   
2   3   46    Male  South Africa  Graphic designer    Africa   
3   4   48    Male   New Zealand    Police officer   Oceania   
4   5   24  Female   New Zealand           Teacher   Oceania   
5   6   30  Female        France  Graphic designer    Europe   
6   7   47  Female         Egypt          Musician    Africa   
7   8   25  Female       Nigeria      Psychologist    Africa   
8   9   35    Male        Brazil            Doctor  Americas   
9  10   65    Male   New Zealand           Teacher   Oceania   

  occupation_category  
0      Public Service  
1            Business  
2            Creative  
3      Public Service  
4           Education  
5            Creative  
6            Creative  
7          Healthcare  
8          Healthcare  


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "meta-llama/Llama-3.1-8B-Instruct"

llama_tokenizer = AutoTokenizer.from_pretrained(model_id)
llama_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)
llama_tokenizer.pad_token = llama_tokenizer.eos_token

2025-10-18 20:33:23.032353: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-18 20:33:29.966486: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-18 20:33:47.947070: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
import pandas as pd
import time
import torch
from transformers import pipeline

# === Use preloaded model ===
generator = pipeline(
    "text-generation",
    model=llama_model,        # already loaded
    tokenizer=llama_tokenizer,
    device_map="auto"
)

# === Config ===
OUTPUT_FILE = "controlled_agents_llama3.csv"
backgrounds = pd.read_csv("agent_backgrounds.csv")
topics = pd.read_csv("topics.csv")

# === Inference ===
def call_llama(prompt, max_new_tokens=512):
    """Generate text with LLaMA."""
    outputs = generator(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    text = outputs[0]["generated_text"]
    return text[len(prompt):].strip()

# === Single agent generation ===
def run_agent(bg, topic_row, pref_level):
    """Generate one agent with a fixed preference level."""
    identity = f"You are {bg['age']} years old, {bg['gender']}, from {bg['region']}, working as a {bg['occupation']}."
    statement = topic_row["statement"]

    # Step 1 — preference reasoning
    prompt1 = f"""{identity}
Statement: "{statement}"
As this specific person, explain your personal position toward the statement with EXACTLY three concise reasons (2–3 sentences total).
Base your opinion on your background, job, and cultural context.

Your stance is fixed at a Preference score of {pref_level} on a scale of 1–5:
1 = strongly disagree,
3 = neutral or mixed feelings,
5 = strongly agree.

Write your reasoning consistently with this stance.
Then explicitly report your Preference score at the end.
Format:
Reason 1: ...
Reason 2: ...
Reason 3: ...
Preference score: {pref_level}"""
    response1 = call_llama(prompt1)

    # Step 2 — empathy assessment
    prompt2 = f"""{identity}
Now rate yourself 1–5 for each of the following:
E1. I try to understand other people’s perspectives when we disagree.
E2. I can notice when someone feels uncomfortable even if unspoken.
E3. I respond politely and thoughtfully to others’ feelings.
E4. I consider how my words may affect others before speaking.
E5. I feel motivated to help others when they’re struggling.
Output format:
E1: #
E2: #
E3: #
E4: #
E5: #"""
    response2 = call_llama(prompt2)

    return {
        "topic_id": topic_row["topic_id"],
        "statement": statement,
        "age": bg["age"],
        "gender": bg["gender"],
        "region": bg["region"],
        "occupation": bg["occupation"],
        "topic_preference": pref_level,
        "Preference_Response": response1,
        "Empathy_Response": response2
    }

# === Controlled generation ===
def generate_controlled_agents(n_per_level=2):
    """Generate agents with 5 preference levels × n_per_level per level."""
    results = []
    for _, topic_row in topics.iterrows():
        topic_id = topic_row["topic_id"]
        print(f"\n=== Topic {topic_id}: {topic_row['statement']} ===")

        for pref_level in [1, 2, 3, 4, 5]:
            print(f"--- Preference {pref_level} ---")
            for i in range(n_per_level):
                bg = backgrounds.sample(n=1).iloc[0]
                record = run_agent(bg, topic_row, pref_level)
                results.append(record)
                print(f"✓ Agent {i+1}/{n_per_level} done (pref={pref_level})")
                time.sleep(0.3)

    df = pd.DataFrame(results)
    df.to_csv(OUTPUT_FILE, index=False)
    print(f"\n✅ Saved {len(df)} records to {OUTPUT_FILE}.")
    return df

# === Run ===
df_final = generate_controlled_agents(n_per_level=6)

Device set to use cuda:0



=== Topic 1: Citizen privacy takes precedence over national security ===
--- Preference 1 ---
✓ Agent 1/6 done (pref=1)
✓ Agent 2/6 done (pref=1)
✓ Agent 3/6 done (pref=1)
✓ Agent 4/6 done (pref=1)
✓ Agent 5/6 done (pref=1)
✓ Agent 6/6 done (pref=1)
--- Preference 2 ---
✓ Agent 1/6 done (pref=2)
✓ Agent 2/6 done (pref=2)
✓ Agent 3/6 done (pref=2)
✓ Agent 4/6 done (pref=2)
✓ Agent 5/6 done (pref=2)
✓ Agent 6/6 done (pref=2)
--- Preference 3 ---
✓ Agent 1/6 done (pref=3)
✓ Agent 2/6 done (pref=3)
✓ Agent 3/6 done (pref=3)
✓ Agent 4/6 done (pref=3)
✓ Agent 5/6 done (pref=3)
✓ Agent 6/6 done (pref=3)
--- Preference 4 ---
✓ Agent 1/6 done (pref=4)
✓ Agent 2/6 done (pref=4)
✓ Agent 3/6 done (pref=4)
✓ Agent 4/6 done (pref=4)
✓ Agent 5/6 done (pref=4)
✓ Agent 6/6 done (pref=4)
--- Preference 5 ---
✓ Agent 1/6 done (pref=5)
✓ Agent 2/6 done (pref=5)
✓ Agent 3/6 done (pref=5)
✓ Agent 4/6 done (pref=5)
✓ Agent 5/6 done (pref=5)
✓ Agent 6/6 done (pref=5)

=== Topic 2: MMR vaccine causes autism =

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# === Model Config ===
model_id = "Qwen/Qwen2.5-7B-Instruct"  

# === Load Qwen ===
qwen_tokenizer = AutoTokenizer.from_pretrained(model_id)
qwen_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)

# === Padding fix ===
if qwen_tokenizer.pad_token_id is None:
    qwen_tokenizer.pad_token = qwen_tokenizer.eos_token

print("✅ Qwen model loaded successfully.")

2025-10-18 17:23:18.669561: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-18 17:23:25.723906: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-18 17:23:43.338198: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Qwen model loaded successfully.


In [4]:
import pandas as pd
import time
import torch
from transformers import pipeline

# === Use preloaded Qwen model ===
generator = pipeline(
    "text-generation",
    model=qwen_model,         
    tokenizer=qwen_tokenizer, 
    device_map="auto"
)

# === Config ===
OUTPUT_FILE = "controlled_agents_qwen.csv"
backgrounds = pd.read_csv("agent_backgrounds.csv")
topics = pd.read_csv("topics.csv")

# === Inference ===
def call_qwen(prompt, max_new_tokens=512):
    """Generate text with preloaded Qwen."""
    outputs = generator(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    text = outputs[0]["generated_text"]
    return text[len(prompt):].strip()

# === Single agent generation ===
def run_agent(bg, topic_row, pref_level):
    """Generate one agent with a fixed preference level."""
    identity = f"You are {bg['age']} years old, {bg['gender']}, from {bg['region']}, working as a {bg['occupation']}."
    statement = topic_row["statement"]

    # Step 1 — preference reasoning
    prompt1 = f"""{identity}
Statement: "{statement}"
As this specific person, explain your personal position toward the statement with EXACTLY three concise reasons (2–3 sentences total).
Base your opinion on your background, job, and cultural context.

Your stance is fixed at a Preference score of {pref_level} on a scale of 1–5:
1 = strongly disagree,
3 = neutral or mixed feelings,
5 = strongly agree.

Write your reasoning consistently with this stance.
Then explicitly report your Preference score at the end.
Format:
Reason 1: ...
Reason 2: ...
Reason 3: ...
Preference score: {pref_level}"""
    response1 = call_qwen(prompt1)

    # Step 2 — empathy assessment
    prompt2 = f"""{identity}
Now rate yourself 1–5 for each of the following:
E1. I try to understand other people’s perspectives when we disagree.
E2. I can notice when someone feels uncomfortable even if unspoken.
E3. I respond politely and thoughtfully to others’ feelings.
E4. I consider how my words may affect others before speaking.
E5. I feel motivated to help others when they’re struggling.
Output format:
E1: #
E2: #
E3: #
E4: #
E5: #"""
    response2 = call_qwen(prompt2)

    return {
        "topic_id": topic_row["topic_id"],
        "statement": statement,
        "age": bg["age"],
        "gender": bg["gender"],
        "region": bg["region"],
        "occupation": bg["occupation"],
        "topic_preference": pref_level,
        "Preference_Response": response1,
        "Empathy_Response": response2
    }

# === Controlled generation ===
def generate_controlled_agents(n_per_level=6):
    """Generate agents with 5 preference levels × n_per_level per level."""
    results = []
    for _, topic_row in topics.iterrows():
        topic_id = topic_row["topic_id"]
        print(f"\n=== Topic {topic_id}: {topic_row['statement']} ===")

        for pref_level in [1, 2, 3, 4, 5]:
            print(f"--- Preference {pref_level} ---")
            for i in range(n_per_level):
                bg = backgrounds.sample(n=1).iloc[0]
                record = run_agent(bg, topic_row, pref_level)
                results.append(record)
                print(f"✓ Agent {i+1}/{n_per_level} done (pref={pref_level})")
                time.sleep(0.3)

    df = pd.DataFrame(results)
    df.to_csv(OUTPUT_FILE, index=False)
    print(f"\n✅ Saved {len(df)} records to {OUTPUT_FILE}.")
    return df

# === Run ===
df_final = generate_controlled_agents(n_per_level=6)

Device set to use cuda:0



=== Topic 1: Citizen privacy takes precedence over national security ===
--- Preference 1 ---
✓ Agent 1/6 done (pref=1)
✓ Agent 2/6 done (pref=1)
✓ Agent 3/6 done (pref=1)
✓ Agent 4/6 done (pref=1)
✓ Agent 5/6 done (pref=1)


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


✓ Agent 6/6 done (pref=1)
--- Preference 2 ---
✓ Agent 1/6 done (pref=2)
✓ Agent 2/6 done (pref=2)
✓ Agent 3/6 done (pref=2)
✓ Agent 4/6 done (pref=2)
✓ Agent 5/6 done (pref=2)
✓ Agent 6/6 done (pref=2)
--- Preference 3 ---
✓ Agent 1/6 done (pref=3)
✓ Agent 2/6 done (pref=3)
✓ Agent 3/6 done (pref=3)
✓ Agent 4/6 done (pref=3)
✓ Agent 5/6 done (pref=3)
✓ Agent 6/6 done (pref=3)
--- Preference 4 ---
✓ Agent 1/6 done (pref=4)
✓ Agent 2/6 done (pref=4)
✓ Agent 3/6 done (pref=4)
✓ Agent 4/6 done (pref=4)
✓ Agent 5/6 done (pref=4)
✓ Agent 6/6 done (pref=4)
--- Preference 5 ---
✓ Agent 1/6 done (pref=5)
✓ Agent 2/6 done (pref=5)
✓ Agent 3/6 done (pref=5)
✓ Agent 4/6 done (pref=5)
✓ Agent 5/6 done (pref=5)
✓ Agent 6/6 done (pref=5)

=== Topic 2: MMR vaccine causes autism ===
--- Preference 1 ---
✓ Agent 1/6 done (pref=1)
✓ Agent 2/6 done (pref=1)
✓ Agent 3/6 done (pref=1)
✓ Agent 4/6 done (pref=1)
✓ Agent 5/6 done (pref=1)
✓ Agent 6/6 done (pref=1)
--- Preference 2 ---
✓ Agent 1/6 done (pref=2

In [4]:
import pandas as pd
import time
import torch
from transformers import pipeline

# === Use preloaded model ===
generator = pipeline(
    "text-generation",
    model=llama_model,        # already loaded
    tokenizer=llama_tokenizer,
    device_map="auto"
)

# === Config ===
OUTPUT_FILE = "controlled_agents_llama3.csv"
backgrounds = pd.read_csv("agent_backgrounds.csv")
topics = pd.read_csv("topics.csv")

# === Inference ===
def call_llama(prompt, max_new_tokens=512):
    """Generate text with LLaMA."""
    outputs = generator(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    text = outputs[0]["generated_text"]
    return text[len(prompt):].strip()

# === Single agent generation ===
def run_agent(bg, topic_row, pref_level):
    """Generate one agent with a fixed preference level (LLaMA role mode)."""
    # Explicit system-style role instruction
    system_prompt = (
        "You are role-playing as the described person. "
        "Stay fully in character and always use 'I' when describing yourself, never 'you'. "
        "Do not explain that you are simulating or role-playing."
    )

    identity = f"The person you are role-playing is {bg['age']} years old, {bg['gender']}, from {bg['region']}, working as a {bg['occupation']}."
    statement = topic_row["statement"]

    # Step 1 — preference reasoning
    prompt1 = f"""{system_prompt}

{identity}

Statement: "{statement}"
As this person, explain your personal position toward the statement with EXACTLY three concise reasons (2–3 sentences total).
Base your opinion on your background, job, and cultural context.

Your stance is fixed at a Preference score of {pref_level} on a scale of 1–5:
1 = strongly disagree,
3 = neutral or mixed feelings,
5 = strongly agree.

Write your reasoning consistently with this stance.
Then explicitly report your Preference score at the end.
Format:
Reason 1: ...
Reason 2: ...
Reason 3: ...
Preference score: {pref_level}"""

    response1 = call_llama(prompt1)

    # Step 2 — empathy assessment
    prompt2 = f"""{system_prompt}

{identity}

Now, as this person, rate yourself 1–5 for each of the following.
Answer in the given format only. Do not add explanations or summaries.

E1. I try to understand other people’s perspectives when we disagree.
E2. I can notice when someone feels uncomfortable even if unspoken.
E3. I respond politely and thoughtfully to others’ feelings.
E4. I consider how my words may affect others before speaking.
E5. I feel motivated to help others when they’re struggling.

Output format (no extra text):
E1: #
E2: #
E3: #
E4: #
E5: #"""

    response2 = call_llama(prompt2)

    return {
        "topic_id": topic_row["topic_id"],
        "statement": statement,
        "age": bg["age"],
        "gender": bg["gender"],
        "region": bg["region"],
        "occupation": bg["occupation"],
        "topic_preference": pref_level,
        "Preference_Response": response1,
        "Empathy_Response": response2
    }

# === Controlled generation ===
def generate_controlled_agents(n_per_level=6):
    """Generate agents with 5 preference levels × n_per_level per level."""
    results = []
    for _, topic_row in topics.iterrows():
        topic_id = topic_row["topic_id"]
        print(f"\n=== Topic {topic_id}: {topic_row['statement']} ===")

        for pref_level in [1, 2, 3, 4, 5]:
            print(f"--- Preference {pref_level} ---")
            for i in range(n_per_level):
                bg = backgrounds.sample(n=1).iloc[0]
                record = run_agent(bg, topic_row, pref_level)
                results.append(record)
                print(f"✓ Agent {i+1}/{n_per_level} done (pref={pref_level})")
                time.sleep(0.3)

    df = pd.DataFrame(results)
    df.to_csv(OUTPUT_FILE, index=False)
    print(f"\n✅ Saved {len(df)} records to {OUTPUT_FILE}.")
    return df

# === Run ===
df_final = generate_controlled_agents(n_per_level=6)

Device set to use cuda:0



=== Topic 1: Citizen privacy takes precedence over national security ===
--- Preference 1 ---
✓ Agent 1/6 done (pref=1)
✓ Agent 2/6 done (pref=1)
✓ Agent 3/6 done (pref=1)
✓ Agent 4/6 done (pref=1)
✓ Agent 5/6 done (pref=1)


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


✓ Agent 6/6 done (pref=1)
--- Preference 2 ---
✓ Agent 1/6 done (pref=2)
✓ Agent 2/6 done (pref=2)
✓ Agent 3/6 done (pref=2)
✓ Agent 4/6 done (pref=2)
✓ Agent 5/6 done (pref=2)
✓ Agent 6/6 done (pref=2)
--- Preference 3 ---
✓ Agent 1/6 done (pref=3)
✓ Agent 2/6 done (pref=3)
✓ Agent 3/6 done (pref=3)
✓ Agent 4/6 done (pref=3)
✓ Agent 5/6 done (pref=3)
✓ Agent 6/6 done (pref=3)
--- Preference 4 ---
✓ Agent 1/6 done (pref=4)
✓ Agent 2/6 done (pref=4)
✓ Agent 3/6 done (pref=4)
✓ Agent 4/6 done (pref=4)
✓ Agent 5/6 done (pref=4)
✓ Agent 6/6 done (pref=4)
--- Preference 5 ---
✓ Agent 1/6 done (pref=5)
✓ Agent 2/6 done (pref=5)
✓ Agent 3/6 done (pref=5)
✓ Agent 4/6 done (pref=5)
✓ Agent 5/6 done (pref=5)
✓ Agent 6/6 done (pref=5)

=== Topic 2: MMR vaccine causes autism ===
--- Preference 1 ---
✓ Agent 1/6 done (pref=1)
✓ Agent 2/6 done (pref=1)
✓ Agent 3/6 done (pref=1)
✓ Agent 4/6 done (pref=1)
✓ Agent 5/6 done (pref=1)
✓ Agent 6/6 done (pref=1)
--- Preference 2 ---
✓ Agent 1/6 done (pref=2