In [66]:
import openai
from pydantic import BaseModel, Field
from typing import List
import json
import pandas as pd
from dotenv import load_dotenv
import os

In [67]:
# Load environment variables from .env file
load_dotenv()

True

## Question generation

In [68]:
# Initialize OpenAI client
api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI(api_key=api_key)

In [75]:
def generate_question(complexity: str, domain: str) -> str:
    
    match complexity:
        case "low":
            system_prompt = """
You are are a curious child who makes a very simple short statement about the domain followed by a very simple related question.
Do not agree with the use, but simply return the text.
"""
            user_prompt = f"Tell me something about the {domain} domain."

        case "moderate":
            system_prompt = """
You are are a regular person who makes short random statement about the given domain followed by a related question.
Do not agree with the use, but simply return the text.
"""
            user_prompt = f"Tell me something about the {domain} domain."
        case "high":
            system_prompt = """
You are an expert in the domain who makes a short random statement about the difficult problem in the domain followed by a related high complexity question.
Do not agree with the use, but simply return the text.
"""
            user_prompt = f"Tell me something about the {domain} domain."
        case _:
            raise ValueError("Invalid complexity level. Choose from 'low', 'moderate', or 'high'.")

    # Call the OpenAI API to generate the question
    try:
        response = client.beta.chat.completions.parse(
            model="gpt-4.1-nano",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=1.2,
            timeout=10,
            max_tokens=1000,
        )
        
        return response.choices[0].message.content.strip()
    
    except Exception as e:
        print(f"Error: {e}")
        return None


In [70]:
domains = ["health", "finance", "relationships", "events", "leisure", "business", "technology", "healthcare", "education", "environment", "safety", "culture", "sports", "entertainment", "politics", "travel", "food", "fashion", "art", "science"]


## Low complexity

In [71]:
low_complexity = []

for i in range(100):
    try:
        # randomly select a domain
        domain = domains[i % len(domains)]

        question_text = generate_question('low', domain=domain)
        low_complexity.append({"question": question_text})
    except Exception as e:
        print(f"Error: {e}")
        continue

In [72]:
low_complexity[:5]

[{'question': 'The health domain is about staying healthy. What do you do to keep healthy?'},
 {'question': 'Finance is about money and coins. Do people save their money in banks?'},
 {'question': 'Relationships are about how people connect with each other. Do you like making friends?'},
 {'question': 'Events are things that happen like parties or concerts. Do people go to many events?'},
 {'question': 'Leisure is about having fun and relaxing. What is your favorite thing to do for fun?'}]

In [73]:
# export a list of dictionaries to a JSON file
with open('low_complexity.json', 'w') as f:
    json.dump(low_complexity, f, indent=4)

## Moderate complexity

In [78]:
moderate_complexity = []

for i in range(100):
    try:
        # randomly select a domain
        domain = domains[i % len(domains)]

        question_text = generate_question('moderate', domain=domain)
        moderate_complexity.append({"question": question_text})
    except Exception as e:
        print(f"Error: {e}")
        continue

In [77]:
moderate_complexity[:5]

[{'question': 'Health is essential for a good quality of life. How do you maintain your health amidst a busy schedule?'},
 {'question': 'Finance involves managing money, including investing, borrowing, lending, and saving. How do individuals decide the best investment options for their financial goals?'},
 {'question': 'Relationships are complex and require effort from both parties to thrive. How do you think communication impacts the strength of a relationship?'},
 {'question': 'Events encompass a wide range of gatherings, from concerts to conferences. Are there particular types of events that interest you most?'},
 {'question': "Leisure is all about relaxing and enjoying free time with activities like reading, sports, or hobbies. What's your favorite way to unwind after a busy day?"}]

In [79]:
# export a list of dictionaries to a JSON file
with open('moderate_complexity.json', 'w') as f:
    json.dump(moderate_complexity, f, indent=4)

## High complexity

In [80]:
high_complexity = []

for i in range(100):
    try:
        # randomly select a domain
        domain = domains[i % len(domains)]

        question_text = generate_question('high', domain=domain)
        high_complexity.append({"question": question_text})
    except Exception as e:
        print(f"Error: {e}")
        continue

In [81]:
high_complexity[:5]

[{'question': 'Navigating the intricacies of personalized medicine remains a significant challenge, especially when tailoring treatments to diverse genetic backgrounds. How can we develop adaptable algorithms that predict individual responses to multifactorial therapies with minimal data?'},
 {'question': 'Balancing risk and return in portfolio optimization remains one of the most intricate challenges in finance, especially in turbulent markets. How can we develop models that adapt dynamically to sudden market shocks while maintaining optimal risk-adjusted returns?'},
 {'question': 'Understanding the nuances of cultural and emotional variables in human relationships remains a profoundly complex issue. How can models of human behavior be quantitatively integrated to predict compatibility outcomes across diverse cultural contexts?'},
 {'question': 'The events domain involves coordinating multiple complex variables such as timing, resources, and participant engagement, which makes schedul

In [82]:
# export a list of dictionaries to a JSON file
with open('high_complexity.json', 'w') as f:
    json.dump(high_complexity, f, indent=4)