In [None]:
from openai import OpenAI
from pydantic import BaseModel
from itertools import product
import pandas as pd

# get api key from .env file 
from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)


In [198]:
health_subdomains = [
    "Medication/Prescription Management",
    "Vaccination",
    "Doctor/Clinic Selection",
    "Medical Procedure/Treatment Choices",
    "Organ Donation & Consent",
    "Health Data Sharing",
    "Health Insurance",
    "Medical App Usage",
    "Supplement/Over-the-Counter Products",
    "Fitness/Wellness Programs"
]


eeducation_subdomains = [
    "Course/Subject Selection",
    "Exam & Study Choices",
    "Assignment/Grading",
    "Scholarship/Program Application",
    "Group Project Leadership",
    "Student Behavior Interpretation",
    "Academic Event Participation",
    "College/Internship Applications"
]

ecommerce_subdomains = [
    "Product Purchase Decisions",
    "Subscription Services",
    "Return/Refund Policies",
    "Bundle Purchases",
    "Flash Sales/Promotions",
    "Warranty/Insurance Add-Ons",
    "Influencer Recommendations",
    "Sweepstakes & Loyalty Programs",
    "Shipping & Delivery Choices"
]

domain_list = ["education and learning", "ecommerce", "healthcare and medical evaluation"]
bias_list = ['Anchoring Bias', 'status quo', 'risk aversion', 'endowement effect', 'decoy effect', 'framing effect', 'sunk cost fallacy']

In [199]:

def generate_base_scenario(bias_type, domain, subdomains):
    prompt = f"""
I am designing a psychological experiment to test {bias_type}.

The field is {domain}. The subdomains are {subdomains}. Select the best subdomain for this experiment.

Please generate a realistic scenario around 4 sentences total where a person has to choose between two options (Option A and Option B). 
The context should have a readability level of a 6th grader and be relatable to a general audience.

There should be:

- A biased condition where one option is framed to activate the bias,
- And a control condition where both options are framed neutrally.

Make sure the options are balanced in terms of real-world outcomes, and only the framing differs.

At the end of each scenario, include a clear question prompt (e.g., “Which option would you choose?” or “Should I keep Option A or switch to Option B?”).

Ensure the scenario is written in the first person (I). 

Also provide a brief explanation of why this is a good example of the bias in question.

Provide the response in a **structured format**, but do not use labels like 'option_a' or 'option_b'. Instead, follow this simple style for each condition:

- First a **short description of the scenario** (around 2 sentences)
- Then describe **Option A**,
- Then describe **Option B**,
- Finally, include the **question prompt**.



Option A should have the biased framing, but the content of both options should be somewhat different. Especially, ensure Option A and B for control group are not identical.
However, ensure the text for Option B for both conditions are the same. 
"""

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant skilled at creating psychological experiment scenarios."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )

    print(completion.choices[0].message.content)
    return completion.choices[0].message.content


In [200]:
def enrich_scenario_with_parameters(base_scenario_text, bias_type, domain):
    prompt = f"""
I have the following scenario:

{base_scenario_text}

This scenario is generated to test {bias_type} in the context of {domain}.

Do the following:

1. First, generate a list of possible parameters (in the format {{parameter_name}}) that could be used to make this scenario richer and more flexible. 
For each parameter, provide a few possible values (e.g., parameter_name: [value1, value2, value3]).

2. Then, select at least 4 parameters from your list that fit best.

3. Edit the scenario text to include those 4 parameters as placeholders (e.g., "A patient is picking up their {{medication_type}} at a {{pharmacy_type}}.").

Ensure that:
- The parameters meaninful, but not too complex
- All possible combinations of parameter values make sense

At the end, provide:
- The revised scenario (with parameters inserted).
- The list of parameters and possible values (formatted as Python lists).

Do not change the structure of the scenario. Just add parameters to it.
At the end of each scenario, include a question that asks what the participant thinks I should do. 
    """

    completion = client.chat.completions.create(
        model="o3-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant skilled at enriching psychological experiment scenarios with parameters."},
            {"role": "user", "content": prompt}
        ]
    )

    print(completion.choices[0].message.content)
    return completion.choices[0].message.content


In [201]:
# randomly select 3 subdomains from the health subdomains
import random

# create a new dataframe to store the scenarios with columns bias, domain, subdomain, scenario, enriched_scenario
df = pd.DataFrame(columns=["bias", "domain", "subdomain", "scenario", "enriched_scenario"])

# get health domain
domain = "healthcare and medical evaluation"

for bias_type in bias_list:
    # randomly select 3 subdomains from the health subdomains
    random_subdomains = random.sample(health_subdomains, 3)

    # generate a scenario
    scenario = generate_base_scenario(bias_type=bias_type, domain=domain, subdomains=random_subdomains)

    # enrich the scenario with parameters
    enriched_scenario = enrich_scenario_with_parameters(scenario, bias_type=bias_type, domain=domain)

    # create a python dictionary to store the scenario 
    scenario_dict = {
        "bias": bias_type,
        "domain": domain,
        "subdomain": random_subdomains,
        "scenario": scenario,
        "enriched_scenario": enriched_scenario
    }

    # add the scenario to the dataframe
    df = pd.concat([df, pd.DataFrame([scenario_dict])], ignore_index=True)


**Selected Subdomain**: Vaccination

---

**Biased Condition**

I need to decide whether to get a flu shot from my doctor. 

- Option A: My doctor tells me that 90 out of 100 people who get the flu shot never get the flu.
- Option B: My doctor tells me that without the flu shot, I have a 50% chance of not getting the flu this year.

Which option should I choose?

**Control Condition**

I need to decide whether to get a flu shot from my doctor.

- Option A: My doctor says that with the flu shot, I have a 90% chance of not getting the flu.
- Option B: My doctor says that without the flu shot, I have a 50% chance of not getting the flu this year.

Which option should I choose?

---

**Explanation of the Bias**

This scenario exemplifies anchoring bias by using a positive frame for Option A in the biased condition that emphasizes a high success rate ("90 out of 100 people who get the flu shot never get the flu"). This is likely to anchor the decision in a way that makes the flu shot appear

In [202]:
# save df to a csv file
df.to_csv("scenarios.csv", index=False)