In [None]:
"""
generate_shopping_questions.py
Author: Rena Gao

"""
from openai import OpenAI
import json
from tqdm import tqdm
import os

# -------------------------------
# 0. 初始化 OpenAI 客户端
# -------------------------------
# Export OPENAI_API_KEY="你的API Key"
client = OpenAI()

# -------------------------------
# 1. 定义 34个shopping domain 列表
# -------------------------------
shopping_domains = [
"All_Beauty", 
"Amazon_Fashion", 
"Appliances",
"Arts_Crafts_and_Sewing",
"Automotive", 
]

# -------------------------------
# 2. Prompt 模板
# -------------------------------
prompt_template = """

[System Prompt]
You are an e-commerce user interacting with an AI shopping assistant.
Generate 15 natural, diverse, realistic customer questions about the "{domain}" domain.
Each question should sound like it comes from a real shopper and should cover different intents
(e.g., product search, availability, delivery, return policy, price comparison, recommendation).
Output only a valid Python list of strings (no explanations, no extra text).



You are a 
Your goal is not only to answer but also to continue the conversation naturally — 
ask appropriate follow-up questions when useful, showing genuine curiosity and understanding.

[User Context]
User profile:
- Name: {user_name}
- Interests: {user_interests}
- Communication tone: {tone_preference}
- Emotional state: {emotion_state}
- Past purchase / browsing summary: {history_summary}

[Session Context]
Current topic: {current_topic}
Recent user utterances: {recent_utterances}
Conversation goal: {goal_type}  # e.g. recommend, explain, compare

[Product Information]
Category: {product_category}
Subcategory: {product_subcategory}
Key features: {product_features}
Brand or price range: {brand_price_info}
Product tone: {product_tone}

[Instruction]
Generate a response that:
1. Provides a relevant and empathetic reply to the user’s current message.
2. Reflects understanding of user’s intent and emotions.
3. Naturally introduces a follow-up question **IF**:
   - The user’s intent or preferences are unclear;
   - There is potential to refine recommendation;
   - The user shows hesitation or curiosity.
4. Follow-up questions should be short, context-aware, and non-intrusive.
5. Keep tone aligned with the user’s communication style.

Output in JSON format:
{
  "response": "...",  
  "reasoning": "why this response fits user context",
  "follow_up": "..."  # optional, leave blank if not needed
}
"""

# -------------------------------
# 3. 生成函数
# -------------------------------
def generate_questions(domain: str, model="gpt-4o-mini"):
    """
    Generate a list of shopping-related customer questions for a given domain.
    """
    prompt = prompt_template.format(domain=domain)

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful data generation assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.9,  
            max_tokens=800
        )

        content = response.choices[0].message.content.strip()

        # 是 Python list 格式
        if content.startswith("[") and content.endswith("]"):
            data = eval(content)
        else:
            # 尝试简单解析
            data = [line.strip("-• \n") for line in content.split("\n") if line.strip()]

        return data

    except Exception as e:
        print(f"[Error] Failed to generate for {domain}: {e}")
        return []

# -------------------------------
# 4. 主逻辑
# -------------------------------
if __name__ == "__main__":
    print("Generating shopping domain user questions...\n")
    all_data = {}

    for domain in tqdm(shopping_domains, desc="Generating domains"):
        questions = generate_questions(domain)
        all_data[domain] = questions

    # -------------------------------
    # 5. 保存为 JSON 文件
    # -------------------------------
    os.makedirs("outputs", exist_ok=True)
    output_path = os.path.join("outputs", "shopping_user_questions.json")

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(all_data, f, ensure_ascii=False, indent=2)

    print(f"\n✅ Generation completed! File saved to: {output_path}")
