# OpenAI + Serp API

This notebook approaches to do the tasks for a prompt-driven microsite generation engine, specifically targeting the "Promote" use case. Each method is aligned with its respective deliverable, followed by observed outcomes and impact based on implementation results. The solution incorporates trend-aware context retrieval using a web search API with Retrieval-Augmented Generation (RAG) and leverages a lightweight language model for generating concise, skimmable microsites, optimized for cost and designed without bio-link dependencies.

In [None]:
%pip install --upgrade --quiet  langchain-community langchain-openai


# hjfvdsgcv

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.5/64.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import pprint
from langchain_community.utilities import GoogleSerperAPIWrapper

os.environ["SERPER_API_KEY"] = os.environ.get("SERPER_API_KEY")

search = GoogleSerperAPIWrapper()

def call_serper_api(user_query):
  search.run(user_query)
  return search.run

In [None]:
import json
from openai import OpenAI
from typing import Optional, List

# Set up your OpenAI credentials (or use environment variables)
os.environ["OPENAI_API_KEY"] = os.environ.get("OpenAI_API_KEY")

# Initialize OpenAI client
client = OpenAI(
    api_key=os.environ["OPENAI_API_KEY"],
    organization=os.environ.get("OpenAI_Organization_ID"),           # Replace with your real org ID
    project=os.environ.get("OpenAI_Project_ID")           # Replace with your real project ID
)

# **Build prompt templates per intent (Sell, Promote, etc.) & Define JSON schema**




In [None]:
def build_prompt(intent: str,
                 user_input: str,
                 nano_output: Optional[dict] = None,
                 retrieval_context: Optional[List[str]] = None) -> dict:
    """
    Constructs a structured GPT prompt config for the given intent and inputs.
    """
    system_prompt = ""
    instructions = ""

    if intent.lower() == "promote":
        system_prompt = (
            "You are a microsite generation assistant. Your task is to create a short, structured promotional "
            "microsite layout for a small business. Respond *only* in valid JSON. " # Emphasize ONLY JSON
            "Each microsite must include 5 elements: informational, benefits, optional trend (if data provided), CTA, and testimonial. "
            "Each element must have a 'type', 'title', and 'body'. Keep each body under 150 words, optimized for mobile."
        )
        instructions = (
            "Generate the JSON output now. " # Explicitly ask for JSON now
            "If 'retrieval_context' is provided, use it to add a 'trend' element. "
            "Format all output as a JSON object with 'intent': 'Promote' and a 5-element 'elements' array. "
            "Ensure the output is just the JSON object, starting with '{'." # Guide the format
        )

    elif intent.lower() == "sell":
        system_prompt = (
            "You are a microsite generator for product sales. Based on the input description and structured image analysis output, "
            "generate a short, structured JSON microsite with a maximum of 5 elements. These must include: informational, features, "
            "price, CTA, and testimonial. Keep all bodies under 150 words. Respond *only* in valid JSON." # Emphasize ONLY JSON
        )
        instructions = (
            "Generate the JSON output now. " # Explicitly ask for JSON now
            "Use the image description, category, tags, and price estimate to build the microsite layout. "
            "Return structured JSON only with 'intent': 'Sell' and an array of 5 elements. "
            "Ensure the output is just the JSON object, starting with '{'." # Guide the format
        )

    elif intent.lower() == "educate":
        system_prompt = (
            "You are a microsite content assistant focused on education. Create a 5-element microsite based on the topic provided. "
            "Always include an informational intro, 3-point bullet list, trend (if data available), embed, and CTA. "
            "Keep all content under 150 words each, clear, and scannable. Respond *only* in valid JSON." # Emphasize ONLY JSON
        )
        instructions = (
            "Generate the JSON output now. " # Explicitly ask for JSON now
            "If retrieval_context is present, include it as a 'trend' element. "
            "Output must be a JSON object with 'intent': 'Educate' and 5 elements. "
            "Ensure the output is just the JSON object, starting with '{'." # Guide the format
        )
    else: # Handle unknown intents gracefully
         logging.error(f"Unknown intent: {intent}")
         return {"system_prompt": "", "user_input": user_input, "instructions": "Invalid intent provided."}


    return {
        "system_prompt": system_prompt,
        "user_input": user_input,
        "nano_output": nano_output,
        "retrieval_context": retrieval_context,
        "instructions": instructions
    }

def generate_microsite_with_gpt(prompt_config: dict) -> dict:
    """
    Makes a gpt-4o-mini call using OpenAI v1.x SDK and returns parsed JSON microsite.
    Includes error handling for JSON parsing.
    """
    if not prompt_config.get("system_prompt"):
        logging.error("Prompt configuration is incomplete or invalid.")
        return {}

    messages = [
        {"role": "system", "content": prompt_config["system_prompt"]},
        {"role": "user", "content": prompt_config["user_input"]}
    ]

    if prompt_config.get("retrieval_context"):
        messages.append({
            "role": "user",
            "content": "Retrieved Context:\n" + "\n".join(f"- {line}" for line in prompt_config["retrieval_context"])
        })

    if prompt_config.get("nano_output"):
        messages.append({
            "role": "user",
            "content": f"Image Analysis:\n{json.dumps(prompt_config['nano_output'], indent=2)}"
        })

    messages.append({
        "role": "user",
        "content": prompt_config["instructions"]
    })

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.5,
            top_p=0.9,
            max_tokens=400,
            response_format={ "type": "json_object" } # Explicitly request JSON output type
        )
        content = response.choices[0].message.content.strip()

        # Attempt to parse JSON
        try:
            return json.loads(content), response
        except json.JSONDecodeError as e:
            logging.error(f"JSON Decode Error: {e}")
            logging.error(f"Content received from API was: {content}")
            # Optionally save the bad content to a file for later inspection
            # with open("bad_response.txt", "w") as f:
            #     f.write(content)
            return {"error": "Failed to parse JSON response from AI.", "raw_content": content}

    except Exception as e:
        logging.error(f"Error during OpenAI API call: {e}")
        return {"error": f"OpenAI API call failed: {e}"}



In [None]:
# Build prompt for a "Promote" use case
prompt_data = build_prompt(
    intent="Promote",
    user_input="Promote my home baking studio in Austin to increase Instagram cake orders this summer.",
    retrieval_context=[
        "Home-based dessert businesses peak during summer due to custom event demand. (Reddit)",
        "Instagram Reels showing behind-the-scenes baking have high engagement. (Quora)"
    ]
)

# Call GPT to generate microsite
microsite_json, response = generate_microsite_with_gpt(prompt_data)
r=response
# Display result
print(json.dumps(microsite_json, indent=2))

{
  "intent": "Promote",
  "elements": [
    {
      "type": "informational",
      "title": "Welcome to Sweet Treats Baking Studio",
      "body": "Located in the heart of Austin, our home baking studio specializes in custom cakes for all occasions. Whether it's a birthday, wedding, or any celebration, we craft delicious and beautiful cakes tailored to your vision."
    },
    {
      "type": "benefits",
      "title": "Why Choose Us?",
      "body": "Our cakes are made from the finest ingredients, ensuring rich flavors and moist textures. We offer personalized designs, quick turnaround times, and a commitment to quality that will make your event unforgettable."
    },
    {
      "type": "trend",
      "title": "Summer Baking Boom",
      "body": "Home-based dessert businesses peak during summer due to high demand for custom cakes for events. This is the perfect time to order your cake and make your celebrations even sweeter!"
    },
    {
      "type": "CTA",
      "title": "Order Y

#**Performance & Cost Targets**

**Prcing link:**

https://openai.com/api/pricing/

In [None]:
# print(r.usage)
# Cost configuration for GPT-4o Mini
GPT4O_MINI_PRICING = {
    "prompt_token_cost": 0.00001,    # $10 per million input tokens
    "completion_token_cost": 0.00002 # $20 per million output tokens
}

def print_gpt4o_mini_cost(response):
    usage = response.usage
    input_tokens = usage.prompt_tokens
    output_tokens = usage.completion_tokens
    total_tokens = usage.total_tokens

    # Calculate cost
    cost = (
        input_tokens * GPT4O_MINI_PRICING["prompt_token_cost"] +
        output_tokens * GPT4O_MINI_PRICING["completion_token_cost"]
    )

    print("\n--- Token Usage & Estimated Cost (GPT-4o Mini) ---")
    print(f"Input Tokens     : {input_tokens}")
    print(f"Output Tokens    : {output_tokens}")
    print(f"Total Tokens     : {total_tokens}")
    print(f"Estimated Cost   : ${cost:.6f} USD")
print_gpt4o_mini_cost(r)



--- Token Usage & Estimated Cost (GPT-4o Mini) ---
Input Tokens     : 222
Output Tokens    : 321
Total Tokens     : 543
Estimated Cost   : $0.008640 USD


In [None]:
import json

# Assuming microsite_json is the dictionary output from generate_microsite_with_gpt
microsite_data = microsite_json

# Check if the dictionary contains the expected structure
if isinstance(microsite_data, dict) and "intent" in microsite_data and "elements" in microsite_data:
    print(f"Microsite Intent: {microsite_data['intent']}")
    print("\nMicrosite Elements:")

    # Iterate through the list of elements
    for i, element in enumerate(microsite_data["elements"]):
        print(f"\n--- Element {i+1} ---")
        # Extract type, title, and body from each element dictionary
        element_type = element.get("type", "N/A") # Use .get() for safe access
        element_title = element.get("title", "N/A")
        element_body = element.get("body", "N/A")

        print(f"Type: {element_type}")
        print(f"Title: {element_title}")
        print(f"Body:\n{element_body}")

elif isinstance(microsite_data, dict) and "error" in microsite_data:
    print("Error generating microsite:")
    print(f"Error message: {microsite_data.get('error', 'Unknown error')}")
    print(f"Raw content: {microsite_data.get('raw_content', 'No raw content')}")

else:
    print("microsite_json is not a valid microsite dictionary.")
    print(f"Type received: {type(microsite_json)}")
    print(f"Content received: {microsite_json}")

Microsite Intent: Promote

Microsite Elements:

--- Element 1 ---
Type: informational
Title: Welcome to Sweet Treats Baking Studio
Body:
Located in the heart of Austin, our home baking studio specializes in custom cakes for all occasions. Whether it's a birthday, wedding, or any celebration, we craft delicious and beautiful cakes tailored to your vision.

--- Element 2 ---
Type: benefits
Title: Why Choose Us?
Body:
Our cakes are made from the finest ingredients, ensuring rich flavors and moist textures. We offer personalized designs, quick turnaround times, and a commitment to quality that will make your event unforgettable.

--- Element 3 ---
Type: trend
Title: Summer Baking Boom
Body:
Home-based dessert businesses peak during summer due to high demand for custom cakes for events. This is the perfect time to order your cake and make your celebrations even sweeter!

--- Element 4 ---
Type: CTA
Title: Order Your Custom Cake Today!
Body:
Ready to make your next event special? Visit our I

#Implement layout logic engine to assemble microsite structure based on intent





In [None]:
LAYOUT_RULES = {
    "sell": ["informational", "features", "price", "cta", "testimonial"],
    "promote": ["informational", "benefits", "trend", "cta", "testimonial"],
    "educate": ["informational", "bullet_points", "trend", "embed", "cta"]
}

def get_layout_for_intent(intent: str) -> List[str]:
    return LAYOUT_RULES.get(intent.lower(), ["informational", "cta", "text"])


In [None]:
LAYOUT_RULES = {
    "sell": ["informational", "features", "price", "cta", "testimonial"],
    "promote": ["informational", "benefits", "trend", "cta", "testimonial"],
    "educate": ["informational", "bullet_points", "trend", "embed", "cta"]
}

def get_layout_for_intent(intent: str) -> List[str]:
    return LAYOUT_RULES.get(intent.lower(), ["informational", "cta", "text"])

def build_prompt(intent: str,
                 user_input: str,
                 retrieval_context: Optional[List[str]] = None) -> dict:
    """
    Builds the prompt with dynamic layout structure based on intent.
    """
    system_prompt = (
        "You are a microsite generation assistant. "
        "Return a structured JSON response for a microsite with 5 elements. "
        "Each element must include: type, title, and body. Body must be <150 words."
    )

    layout = get_layout_for_intent(intent)
    layout_instruction = "\n".join([f"{i+1}. {el.replace('_', ' ').title()}" for i, el in enumerate(layout)])

    retrieved_block = ""
    if retrieval_context:
        retrieved_block = "Retrieved Context:\n" + "\n".join(f"- {s}" for s in retrieval_context)

    user_prompt = (
        f"Intent: {intent}\n"
        f"User Input: {user_input}\n\n"
        f"{retrieved_block}\n\n"
        f"Microsite layout:\n{layout_instruction}\n\n"
        f"Respond in the following JSON format:\n"
        f"{{\n"
        f'  "intent": "{intent}",\n'
        f'  "elements": [{{ "type": "...", "title": "...", "body": "..." }}]\n'
        f"}}"
    )

    return {
        "system_prompt": system_prompt,
        "user_input": user_prompt,
        "retrieval_context": retrieval_context,
        "instructions": ""  # not needed now; layout is embedded in user prompt
    }


In [None]:
# Test the updated build_prompt function with promote intent
test_prompt = build_prompt(
    intent="Promote",
    user_input="Promote my home-based custom cake business",
    retrieval_context=[
        "Home-based dessert businesses see increased demand in summer events. (Industry Report)",
        "Customers love personalized, handcrafted cakes. (Social Media Trends)"
    ]
)

# Print the result for inspection
print("--- SYSTEM PROMPT ---")
print(test_prompt["system_prompt"])

print("\n--- USER PROMPT ---")
print(test_prompt["user_input"])


--- SYSTEM PROMPT ---
You are a microsite generation assistant. Return a structured JSON response for a microsite with 5 elements. Each element must include: type, title, and body. Body must be <150 words.

--- USER PROMPT ---
Intent: Promote
User Input: Promote my home-based custom cake business

Retrieved Context:
- Home-based dessert businesses see increased demand in summer events. (Industry Report)
- Customers love personalized, handcrafted cakes. (Social Media Trends)

Microsite layout:
1. Informational
2. Benefits
3. Trend
4. Cta
5. Testimonial

Respond in the following JSON format:
{
  "intent": "Promote",
  "elements": [{ "type": "...", "title": "...", "body": "..." }]
}


In [None]:
import json

def parse_and_validate_gpt_output(gpt_response_text: str, expected_intent: str, layout_types: list):
    """
    Parse GPT output JSON text and validate the structure.
    Returns:
      - dict with parsed data if valid,
      - else raises ValueError with error details.
    """
    try:
        data = json.loads(gpt_response_text)
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON: {e}")

    # Check top-level keys
    if "intent" not in data or "elements" not in data:
        raise ValueError("JSON missing required keys 'intent' or 'elements'.")

    # Intent match check
    if data["intent"].lower() != expected_intent.lower():
        raise ValueError(f"Intent mismatch: expected '{expected_intent}', got '{data['intent']}'.")

    elements = data["elements"]
    if not isinstance(elements, list):
        raise ValueError("'elements' should be a list.")

    # Validate elements length
    if len(elements) != len(layout_types):
        raise ValueError(f"Number of elements {len(elements)} does not match layout length {len(layout_types)}.")

    # Validate each element
    for i, el in enumerate(elements):
        for key in ("type", "title", "body"):
            if key not in el:
                raise ValueError(f"Element {i} missing key '{key}'.")
            if not isinstance(el[key], str) or not el[key].strip():
                raise ValueError(f"Element {i} key '{key}' must be a non-empty string.")

        # Check body word count < 150
        word_count = len(el["body"].split())
        if word_count > 150:
            raise ValueError(f"Element {i} body exceeds 150 words ({word_count} words).")

    return data


In [None]:
def reorder_and_fill_elements(elements: list, layout_types: list):
    """
    Reorder elements by layout_types.
    Fill missing elements with placeholders.
    """
    elements_map = {el["type"].lower(): el for el in elements}
    final_elements = []

    for el_type in layout_types:
        el_type_lower = el_type.lower()
        if el_type_lower in elements_map:
            final_elements.append(elements_map[el_type_lower])
        else:
            # Placeholder element if missing
            final_elements.append({
                "type": el_type_lower,
                "title": f"Placeholder Title for {el_type.title()}",
                "body": f"This is placeholder content for the {el_type.title()} section."
            })

    return final_elements


In [None]:
# Assuming `gpt_response` is the raw JSON string from GPT

intent = "Promote"
layout = ["informational", "benefits", "trend", "cta", "testimonial"]

try:
    gpt_response = r.choices[0].message.content


    parsed_data = parse_and_validate_gpt_output(gpt_response, intent, layout)
    clean_elements = reorder_and_fill_elements(parsed_data["elements"], layout)
    parsed_data["elements"] = clean_elements
    print("Validated and reordered microsite elements:")
    print(parsed_data)
except ValueError as e:
    print("Error validating GPT output:", e)


Validated and reordered microsite elements:
{'intent': 'Promote', 'elements': [{'type': 'informational', 'title': 'Welcome to Sweet Treats Baking Studio', 'body': "Located in the heart of Austin, our home baking studio specializes in custom cakes for all occasions. Whether it's a birthday, wedding, or any celebration, we craft delicious and beautiful cakes tailored to your vision."}, {'type': 'benefits', 'title': 'Why Choose Us?', 'body': 'Our cakes are made from the finest ingredients, ensuring rich flavors and moist textures. We offer personalized designs, quick turnaround times, and a commitment to quality that will make your event unforgettable.'}, {'type': 'trend', 'title': 'Summer Baking Boom', 'body': 'Home-based dessert businesses peak during summer due to high demand for custom cakes for events. This is the perfect time to order your cake and make your celebrations even sweeter!'}, {'type': 'CTA', 'title': 'Order Your Custom Cake Today!', 'body': 'Ready to make your next event

In [None]:
def print_microsite(microsite_data: dict):
    print(f"Microsite Intent: {microsite_data.get('intent', 'N/A').title()}\n")
    print("Microsite Elements:\n")
    for i, element in enumerate(microsite_data.get("elements", []), start=1):
        print(f"--- Element {i} ---")
        print(f"Type: {element.get('type', '').capitalize()}")
        print(f"Title: {element.get('title', '')}")
        print("Body:")
        print(element.get('body', ''))
        print()  # extra newline for spacing

# Example usage:
print_microsite(parsed_data)


Microsite Intent: Promote

Microsite Elements:

--- Element 1 ---
Type: Informational
Title: Welcome to Sweet Treats Baking Studio
Body:
Located in the heart of Austin, our home baking studio specializes in custom cakes for all occasions. Whether it's a birthday, wedding, or any celebration, we craft delicious and beautiful cakes tailored to your vision.

--- Element 2 ---
Type: Benefits
Title: Why Choose Us?
Body:
Our cakes are made from the finest ingredients, ensuring rich flavors and moist textures. We offer personalized designs, quick turnaround times, and a commitment to quality that will make your event unforgettable.

--- Element 3 ---
Type: Trend
Title: Summer Baking Boom
Body:
Home-based dessert businesses peak during summer due to high demand for custom cakes for events. This is the perfect time to order your cake and make your celebrations even sweeter!

--- Element 4 ---
Type: Cta
Title: Order Your Custom Cake Today!
Body:
Ready to make your next event special? Visit our I

# Set up API orchestration logic for dynamic calls to Mini
This code cleans, fixes, and finalizes the microsite content generated by GPT, ensuring it's valid, complete, and in the correct format directly fulfilling layout engine and partially supporting prompt logic validation.



In [None]:
import json
from typing import List, Optional

def parse_and_validate_gpt_output(gpt_response_text: str, expected_intent: str, layout_types: List[str]) -> dict:
    """
    Parse GPT output JSON string and validate the structure and intent.
    """
    try:
        data = json.loads(gpt_response_text)
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON: {e}")

    # Basic validation of keys
    if "intent" not in data or "elements" not in data:
        raise ValueError("Missing required keys 'intent' or 'elements' in GPT response")

    if data["intent"].lower() != expected_intent.lower():
        raise ValueError(f"Intent mismatch: expected {expected_intent}, got {data['intent']}")

    if not isinstance(data["elements"], list) or len(data["elements"]) != len(layout_types):
        raise ValueError(f"Elements count mismatch: expected {len(layout_types)}, got {len(data['elements'])}")

    return data

def reorder_and_fill_elements(elements: List[dict], layout_types: List[str]) -> List[dict]:
    """
    Reorder elements according to layout_types and fill any missing with placeholders.
    """
    elements_dict = {el['type'].lower(): el for el in elements}

    reordered = []
    for lt in layout_types:
        el = elements_dict.get(lt.lower())
        if el is None:
            el = {
                "type": lt,
                "title": f"Placeholder for {lt.title()}",
                "body": "Content coming soon."
            }
        reordered.append(el)
    return reordered

def orchestrate(intent: str,
                user_input: str,
                gpt_raw_response,
                layout: List[str],
                retrieval_context: Optional[List[str]] = None):
    """
    Orchestrate the microsite generation pipeline using the existing GPT response 'gpt_raw_response'.

    Parameters:
    - intent: The microsite intent string (e.g., "Promote").
    - user_input: The user's initial input prompt.
    - gpt_raw_response: The raw GPT API response object (already obtained).
    - layout: The list of element types defining the microsite layout.
    - retrieval_context: Optional list of retrieved context strings.

    Returns:
    - Parsed, validated, and reordered microsite elements as a dictionary.
    """
    # Extract GPT-generated content string from the response object
    content = gpt_raw_response.choices[0].message.content.strip()

    # Parse and validate the GPT output JSON string
    parsed_data = parse_and_validate_gpt_output(content, expected_intent=intent, layout_types=layout)

    # Reorder elements according to the layout and fill missing ones with placeholders if any
    clean_elements = reorder_and_fill_elements(parsed_data["elements"], layout)
    parsed_data["elements"] = clean_elements

    # (Optional) Add enrichment or Serper API calls here if needed in future

    return parsed_data


# -----------------------------
LAYOUT_PROMOTE = ["informational", "benefits", "trend", "cta", "testimonial"]

final_output = orchestrate(
    intent="Promote",
    user_input="Promote my home-based custom cake business",
    gpt_raw_response=r,  # Your GPT raw response stored earlier
    layout=LAYOUT_PROMOTE,
    retrieval_context=[
        "Home-based dessert businesses see increased demand in summer events. (Industry Report)",
        "Customers love personalized, handcrafted cakes. (Social Media Trends)"
    ]
)

# You can then print or further process final_output as needed:
print("Microsite Intent:", final_output["intent"])
print("Microsite Elements:\n")
for idx, el in enumerate(final_output["elements"], start=1):
    print(f"--- Element {idx} ---")
    print(f"Type: {el['type'].title()}")
    print(f"Title: {el['title']}")
    print(f"Body:\n{el['body']}\n")


Microsite Intent: Promote
Microsite Elements:

--- Element 1 ---
Type: Informational
Title: Welcome to Sweet Treats Baking Studio
Body:
Located in the heart of Austin, our home baking studio specializes in custom cakes for all occasions. Whether it's a birthday, wedding, or any celebration, we craft delicious and beautiful cakes tailored to your vision.

--- Element 2 ---
Type: Benefits
Title: Why Choose Us?
Body:
Our cakes are made from the finest ingredients, ensuring rich flavors and moist textures. We offer personalized designs, quick turnaround times, and a commitment to quality that will make your event unforgettable.

--- Element 3 ---
Type: Trend
Title: Summer Baking Boom
Body:
Home-based dessert businesses peak during summer due to high demand for custom cakes for events. This is the perfect time to order your cake and make your celebrations even sweeter!

--- Element 4 ---
Type: Cta
Title: Order Your Custom Cake Today!
Body:
Ready to make your next event special? Visit our In

#Build caching middleware for prompt

In [None]:
import json
from typing import List, Optional
import logging
import hashlib
import os
import time # Import time for retries

# Ensure logging is configured if it hasn't been already
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


# Assuming these are defined elsewhere in the notebook
# from openai import OpenAI
# client = OpenAI(...)
CACHE_DIR = "./cache_microsite"
os.makedirs(CACHE_DIR, exist_ok=True)


# Redefine utility functions if they are not available in the current cell block
def get_cache_key(prompt_config: dict) -> str:
    key_string = json.dumps({
        "system_prompt": prompt_config.get("system_prompt", ""),
        "user_input": prompt_config.get("user_input", ""),
        "retrieval_context": prompt_config.get("retrieval_context", []),
        "nano_output": prompt_config.get("nano_output", {}),
        "instructions": prompt_config.get("instructions", "")
    }, sort_keys=True)
    return hashlib.sha256(key_string.encode('utf-8')).hexdigest()

def load_from_cache(cache_key: str):
    cache_path = os.path.join(CACHE_DIR, f"{cache_key}.json")
    if os.path.isfile(cache_path):
        try:
            with open(cache_path, "r") as f:
                return json.load(f)
        except Exception as e:
            logging.warning(f"Failed to load cache file {cache_path}: {e}")
    return None

def save_to_cache(cache_key: str, data: dict):
    cache_path = os.path.join(CACHE_DIR, f"{cache_key}.json")
    try:
        with open(cache_path, "w") as f:
            json.dump(data, f, indent=2)
    except Exception as e:
        logging.warning(f"Failed to save cache file {cache_path}: {e}")

def generate_microsite_with_gpt(prompt_config: dict, max_tokens: int = 500): # Increased default max_tokens
    """
    Makes a gpt-4o-mini call with caching and returns parsed JSON dictionary or error dict.
    """
    if not prompt_config.get("system_prompt"):
        logging.error("Prompt configuration is incomplete or invalid.")
        return {"error": "Prompt configuration invalid."}

    # Cache key includes relevant prompt components
    cache_key = get_cache_key(prompt_config)
    cached_response_dict = load_from_cache(cache_key)
    if cached_response_dict:
        logging.info("Using cached GPT response.")
        return cached_response_dict # Return dictionary directly

    messages = [
        {"role": "system", "content": prompt_config["system_prompt"]},
        {"role": "user", "content": prompt_config["user_input"]}
    ]

    # Add retrieval context and nano output if present
    if prompt_config.get("retrieval_context"):
        messages.append({
            "role": "user",
            "content": "Retrieved Context:\n" + "\n".join(f"- {line}" for line in prompt_config["retrieval_context"])
        })

    if prompt_config.get("nano_output"):
        messages.append({
            "role": "user",
            "content": f"Image Analysis:\n{json.dumps(prompt_config['nano_output'], indent=2)}"
        })

    # Add explicit instructions if present
    if prompt_config.get("instructions"):
         messages.append({
            "role": "user",
            "content": prompt_config["instructions"]
        })


    try:
        logging.info("Calling OpenAI API...")
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.7, # Slightly increased temperature for variety, adjust as needed
            top_p=0.9,
            max_tokens=max_tokens, # Use passed max_tokens
            response_format={ "type": "json_object" } # Explicitly request JSON output type
        )
        content = response.choices[0].message.content.strip()

        try:
            # Attempt to parse JSON
            parsed_json_from_api = json.loads(content)
            # Save successful parsed JSON to cache
            save_to_cache(cache_key, parsed_json_from_api)
            # Return the parsed dictionary
            return parsed_json_from_api
        except json.JSONDecodeError as e:
            logging.error(f"JSON Decode Error: {e}")
            logging.error(f"Content received from API was:\n{content}") # Print raw content for debugging
            # Return an error dictionary including the raw content
            return {"error": "Failed to parse JSON response from AI.", "raw_content": content}

    except Exception as e:
        logging.error(f"Error during OpenAI API call: {e}")
        # Return an error dictionary
        return {"error": f"OpenAI API call failed: {e}"}


def parse_and_validate_gpt_output(gpt_data: dict, expected_intent: str, layout_types: List[str]) -> dict:
    """
    Validate the structure and intent of a *parsed* GPT output dictionary.
    """
    # Input 'gpt_data' is already a dictionary

    # Basic validation of keys
    if "intent" not in gpt_data or "elements" not in gpt_data:
        raise ValueError("Missing required keys 'intent' or 'elements' in GPT response dictionary")

    # Validate intent (case-insensitive match)
    if gpt_data.get("intent", "").lower() != expected_intent.lower():
        raise ValueError(f"Intent mismatch: expected '{expected_intent}', got '{gpt_data.get('intent')}'.")

    elements = gpt_data["elements"]
    if not isinstance(elements, list):
         raise ValueError("'elements' should be a list.")


    # Validate each element structure (type, title, body)
    for i, el in enumerate(elements):
        if not isinstance(el, dict):
            raise ValueError(f"Element {i} is not a dictionary.")

        # Use .get() for safer access during validation
        el_type = el.get("type")
        el_title = el.get("title")
        el_body = el.get("body")

        if not isinstance(el_type, str) or not el_type.strip():
             logging.warning(f"Element {i} missing or invalid 'type'. Got: {el_type}")
             # Optionally raise error if type is mandatory
             # raise ValueError(f"Element {i} missing or invalid 'type'.")

        if not isinstance(el_title, str) or not el_title.strip():
             logging.warning(f"Element {i} missing or invalid 'title'. Got: {el_title}")
             # Optionally raise error
             # raise ValueError(f"Element {i} missing or invalid 'title'.")


        if not isinstance(el_body, str):
             logging.warning(f"Element {i} missing or invalid 'body'. Expected string, got: {type(el_body)}")
             # Optionally raise error
             # raise ValueError(f"Element {i} missing or invalid 'body'. Expected string.")

        # Check body word count < 150 if body is a string
        if isinstance(el_body, str):
            word_count = len(el_body.split())
            if word_count > 150:
                logging.warning(f"Element {i} body exceeds 150 words ({word_count} words).")
                # Optionally raise an error if strict
                # raise ValueError(f"Element {i} body exceeds 150 words ({word_count} words).")


    # Return the validated dictionary
    return gpt_data


def reorder_and_fill_elements(elements: List[dict], layout_types: List[str]) -> List[dict]:
    """
    Reorder elements according to layout_types and fill any missing with placeholders.
    Assumes input elements list might not be complete or correctly ordered.
    Handles potential non-dictionary elements gracefully.
    """
    # Create a dictionary mapping lowercase element type from input to the element dict
    elements_dict = {}
    for el in elements:
        if isinstance(el, dict):
            el_type = el.get('type')
            if isinstance(el_type, str) and el_type.strip():
                 elements_dict[el_type.strip().lower()] = el
            else:
                 logging.warning(f"Skipping element in reordering due to missing or invalid 'type': {el}")
        else:
            logging.warning(f"Skipping non-dictionary item in elements list: {el}")


    reordered = []
    for lt in layout_types:
        lt_lower = lt.lower()
        el = elements_dict.get(lt_lower)
        if el is None:
            # Placeholder element if missing in the GPT output or invalid input
            reordered.append({
                "type": lt_lower, # Use lowercase for consistency
                "title": f"Placeholder for {lt.title()}",
                "body": "Content coming soon."
            })
        else:
            # Use the element from GPT output, but ensure type is lowercase and strip whitespace
            el['type'] = el.get('type', '').strip().lower()
            # Ensure title and body are strings, provide defaults if needed
            el['title'] = str(el.get('title', '')).strip()
            el['body'] = str(el.get('body', '')).strip()
            reordered.append(el)
    return reordered


def orchestrate(prompt_config: dict, intent: str, layout: List[str], retries: int = 3, delay: int = 5):
    """
    Orchestrate microsite generation workflow with retries.
    Uses the cache-enabled GPT call function.
    Parses, validates, and reorders elements.
    """
    for attempt in range(retries):
        logging.info(f"Attempt {attempt + 1}/{retries} to generate microsite.")

        # Call GPT with caching enabled - this function now returns a dictionary
        microsite_data = generate_microsite_with_gpt(prompt_config)

        # Check if generate_microsite_with_gpt returned an error dictionary
        if "error" in microsite_data:
            logging.error(f"Attempt {attempt + 1} failed: GPT microsite generation failed or returned invalid JSON string.")
            logging.error(f"Error details: {microsite_data.get('error', 'Unknown Error')}")
            if "raw_content" in microsite_data:
                 logging.error(f"Raw content from API:\n{microsite_data['raw_content']}")

            if attempt < retries - 1:
                logging.info(f"Retrying in {delay} seconds...")
                time.sleep(delay)
                continue # Try the next attempt

            # If this was the last attempt, return the error
            logging.error("Max retries reached. Failed to generate valid microsite data.")
            return microsite_data # Return the final error dictionary

        # If no error from generate_microsite_with_gpt, proceed to validation and processing
        try:
            # Validate the structure of the parsed dictionary
            validated_data = parse_and_validate_gpt_output(microsite_data, intent, layout)

            # Reorder elements based on layout and fill missing ones
            clean_elements = reorder_and_fill_elements(validated_data.get("elements", []), layout) # Use .get() for safety
            validated_data["elements"] = clean_elements

            logging.info("Microsite data generated and validated successfully.")
            return validated_data # Success! Return the processed data

        except ValueError as e:
            logging.error(f"Attempt {attempt + 1} failed: Validation or processing error: {e}")
            # Log the raw data that caused the validation error
            logging.error(f"Data causing validation error:\n{json.dumps(microsite_data, indent=2)}")

            if attempt < retries - 1:
                logging.info(f"Retrying in {delay} seconds...")
                time.sleep(delay)
                continue # Try the next attempt

            # If this was the last attempt and validation failed, return an error
            logging.error("Max retries reached. Failed to validate generated microsite data.")
            return {"error": f"Microsite data validation failed after retries: {e}", "raw_data": microsite_data}

    # This part should theoretically not be reached if retries > 0, but included as fallback
    return {"error": "Orchestration failed after retries."}


# Assuming LAYOUT_PROMOTE is defined elsewhere or define it here if needed
LAYOUT_PROMOTE = ["informational", "benefits", "trend", "cta", "testimonial"]

intent = "Promote"
layout = LAYOUT_PROMOTE # Use the defined layout variable

# Define user input and retrieval context
user_input = "Promote my home-based custom cake business in Austin to increase Instagram cake orders this summer."
retrieval_context = [
    "Home-based dessert businesses peak during summer due to custom event demand. (Reddit)",
    "Instagram Reels showing behind-the-scenes baking have high engagement. (Quora)"
]

# Call build_prompt to generate the prompt_config
prompt_config = build_prompt(
    intent=intent,
    user_input=user_input,
    retrieval_context=retrieval_context
)


# Assuming 'client' and 'logging' are initialized correctly in the notebook environment
# Assuming CACHE_DIR and os.makedirs(CACHE_DIR, exist_ok=True) are run

# Call the updated orchestrate function with retries
result = orchestrate(prompt_config, intent, layout, retries=5, delay=10) # Added retries and delay


# Print the result nicely or handle errors
if "error" not in result:
    print(f"Microsite Intent: {result.get('intent', 'N/A')}\n")
    print("Microsite Elements:\n")
    # Use get with default empty list in case 'elements' key is missing after processing
    for idx, el in enumerate(result.get("elements", []), 1):
        print(f"--- Element {idx} ---")
        # Use get with default 'N/A' for safer access to element keys
        print(f"Type: {el.get('type', 'N/A').capitalize()}")
        print(f"Title: {el.get('title', 'N/A')}")
        print("Body:")
        print(el.get('body', 'N/A'))
        print("\n" + "-"*20) # Add a separator
else:
    print("Error orchestrating microsite:")
    print(f"Error message: {result.get('error', 'Unknown error')}")
    # Print raw data/content if available in the error dictionary
    if "raw_data" in result:
        print("\nRaw Data (if available):")
        # Use json.dumps for pretty printing the dictionary
        print(json.dumps(result["raw_data"], indent=2))
    elif "raw_content" in result:
         print("\nRaw Content (if available):")
         print(result["raw_content"])

Microsite Intent: Promote

Microsite Elements:

--- Element 1 ---
Type: Informational
Title: Welcome to Your Custom Cake Destination
Body:
At our home-based cake business in Austin, we specialize in creating custom cakes for every occasion. Whether it's a birthday, wedding, or a summer gathering, our cakes are made from the finest ingredients and tailored to your unique vision. Explore our delicious offerings and let us help you celebrate life's special moments with a sweet touch!

--------------------
--- Element 2 ---
Type: Benefits
Title: Why Choose Our Custom Cakes?
Body:
Choosing our custom cakes means opting for quality, personalization, and creativity. We offer a wide variety of flavors and designs, ensuring that your cake is as unique as your event. Our home-based approach allows for fresh, made-to-order cakes that cater to dietary needs, ensuring everyone can enjoy a slice of happiness this summer!

--------------------
--- Element 3 ---
Type: Trend
Title: Summer Baking Trends