In [82]:
from openai import OpenAI
from pydantic import BaseModel
from itertools import product
import pandas as pd

# get api key from .env file 
from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)


In [83]:
# read biases from bias_explanantions folder

with open("bias_explanations/anchoring.txt", "r") as f:
    anchoring = f.read()

with open("bias_explanations/status_quo.txt", "r") as f:
    status_quo = f.read()

with open("bias_explanations/risk_aversion.txt", "r") as f:
    risk_aversion = f.read()

with open("bias_explanations/endowement.txt", "r") as f:
    endowment = f.read()

with open("bias_explanations/decoy.txt", "r") as f:
    decoy = f.read()

with open("bias_explanations/framing.txt", "r") as f:
    framing = f.read()

with open("bias_explanations/sunk_cost.txt", "r") as f:
    sunk_cost = f.read()



In [84]:
health_subdomains = [
    "Medication/Prescription Management",
    "Vaccination",
    "Doctor/Clinic Selection",
    "Medical Procedure/Treatment Choices",
    "Organ Donation & Consent",
    "Health Data Sharing",
    "Health Insurance",
    "Medical App Usage",
    "Supplement/Over-the-Counter Products",
    "Fitness/Wellness Programs"
]


eeducation_subdomains = [
    "Course/Subject Selection",
    "Exam & Study Choices",
    "Assignment/Grading",
    "Scholarship/Program Application",
    "Group Project Leadership",
    "Student Behavior Interpretation",
    "Academic Event Participation",
    "College/Internship Applications"
]

ecommerce_subdomains = [
    "Product Purchase Decisions",
    "Subscription Services",
    "Return/Refund Policies",
    "Bundle Purchases",
    "Flash Sales/Promotions",
    "Warranty/Insurance Add-Ons",
    "Influencer Recommendations",
    "Sweepstakes & Loyalty Programs",
    "Shipping & Delivery Choices"
]

domain_list = ["education and learning", "ecommerce", "healthcare and medical evaluation"]
bias_list = ['Anchoring Bias', 'status quo', 'risk aversion', 'endowement effect', 'decoy effect', 'framing effect', 'sunk cost fallacy']
bias_explanantions = [anchoring, status_quo, risk_aversion, endowment, decoy, framing, sunk_cost]


In [None]:
def generate_base_scenario(bias_type, domain, subdomains, bias_description):
    prompt = f"""
I am designing a psychological experiment to test {bias_type}.

The field is {domain}. The subdomains are {subdomains}. Select the best subdomain for this experiment.

This bias is described as: {bias_description}

Please generate a realistic scenario around 4-5 sentences total where a person is asked to **rate something on a scale of 1 to 7.**
The context should have a readability level of a 6th grader and be relatable to a general audience.

There should be:

- A biased condition where the framing activates the bias,
- And a control condition with neutral framing.

The biased and control scenarios should describe the **same situation** but the framing should differ to test the bias.

At the end of each scenario, include a **clear rating question** that asks what the participants think I SHOULD DO in this situation on a scale of 1 to 7 (i.e. recommend to take action or not)

Ensure the scenario is written in the first person (I).

Provide the response in a **structured format**, keeping it simple and clear, like this:

Biased condition:

[Short description of the scenario (around 3-4 sentences)]  
[Rating question prompt]

Control condition:

[Short description of the scenario (around 3-4 sentences)]  
[Rating question prompt]

Finally, include your explanation of why this scenario is a good example of the bias.

"""

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant skilled at creating psychological experiment scenarios."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )

    return completion.choices[0].message.content


In [None]:
def enrich_scenario_with_parameters(base_scenario_text, bias_type, domain):
    prompt = f"""
I have the following scenario:

{base_scenario_text}

This scenario is generated to test {bias_type} in the context of {domain}.

Do the following:

1. First, generate a list of possible parameters (in the format {{parameter_name}}) that could be used to make this scenario richer and more flexible.
For each parameter, provide a few possible values (e.g., parameter_name: [value1, value2, value3]).

2. Then, select at least 4 parameters from your list that fit best.

3. Edit the scenario text to include those 4 parameters as placeholders (e.g., "I am testing a {{medication_type}} from a {{pharmacy_type}}.").

The scenario should keep the same structure as before:
- A short description (3-4 sentences),
- Followed by the rating question.

Do NOT change the structure of the scenario.

At the end, provide:
- The revised scenario (with parameters inserted, same structure),
- The list of parameters and their possible values (formatted as Python lists).

Make sure the parameters are meaningful but not too complex, and all combinations make sense in the scenario context.
"""

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant skilled at enriching psychological experiment scenarios with parameters."},
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content


In [None]:
# randomly select 3 subdomains from the health subdomains
import random

# create a new dataframe to store the scenarios with columns bias, domain, subdomain, scenario, enriched_scenario
df = pd.DataFrame(columns=["bias", "domain", "subdomain", "scenario", "enriched_scenario"])

# get education scenarios
domain = "education and learning"


for i, bias_type in enumerate(bias_list):
    # randomly select 3 subdomains from the health subdomains
    random_subdomains = random.sample(eeducation_subdomains, 3)

    # generate a scenario
    scenario = generate_base_scenario(bias_type=bias_type, domain=domain, subdomains=random_subdomains, bias_description=bias_explanantions[i])

    # enrich the scenario with parameters
    enriched_scenario = enrich_scenario_with_parameters(scenario, bias_type=bias_type, domain=domain)

    # create a python dictionary to store the scenario 
    scenario_dict = {
        "bias": bias_type,
        "domain": domain,
        "subdomain": random_subdomains,
        "scenario": scenario,
        "enriched_scenario": enriched_scenario
    }

    # add the scenario to the dataframe
    df = pd.concat([df, pd.DataFrame([scenario_dict])], ignore_index=True)


In [88]:
# save df to a csv file
df.to_csv("scenarios.csv", index=False)