In [1]:
# -*- coding: utf-8 -*-
"""
Colab Notebook: AI Imagery Pipeline - Phase 2: Creative Expert Agent & Image Generation

This notebook implements Steps 1-4 of the Phase 2 development plan:
1. Define Pydantic Model for Image Prompt (Structured Output)
2. Implement the "Creative Expert" Agent Function (`generate_image_prompt_for_strategy`)
   - Includes input refinement, reasoning, and handling missing branding/image reference.
3. Implement Prompt Assembly function (`assemble_final_prompt`)
   - Refined adaptive logic based on image reference/instructions.
4. Implement Image Generation/Editing function (`generate_image`) using the specified "gpt-image-1" model via OpenAI API.
   - Handles reference image input using client.images.edit.
   - Handles additional aspect ratios (2:3, 3:4) by mapping to closest supported size.
   - Uses a separate client/API key for image generation.
   - Fixed invalid parameter for image edit API call.
   - Added handling for b64_json response from edit API.
   - **Saves outputs (images and metadata JSON) to a timestamped run directory.**

It loads the JSON output from the previous phase, generates structured concepts,
assembles final text prompts, calls the image generation/editing API, and stores/displays results.
"""

'\nColab Notebook: AI Imagery Pipeline - Phase 2: Creative Expert Agent & Image Generation\n\nThis notebook implements Steps 1-4 of the Phase 2 development plan:\n1. Define Pydantic Model for Image Prompt (Structured Output)\n2. Implement the "Creative Expert" Agent Function (`generate_image_prompt_for_strategy`)\n   - Includes input refinement, reasoning, and handling missing branding/image reference.\n3. Implement Prompt Assembly function (`assemble_final_prompt`)\n   - Refined adaptive logic based on image reference/instructions.\n4. Implement Image Generation/Editing function (`generate_image`) using the specified "gpt-image-1" model via OpenAI API.\n   - Handles reference image input using client.images.edit.\n   - Handles additional aspect ratios (2:3, 3:4) by mapping to closest supported size.\n   - Uses a separate client/API key for image generation.\n   - Fixed invalid parameter for image edit API call.\n   - Added handling for b64_json response from edit API.\n   - **Saves ou

In [3]:
# @title Setup: Install Libraries
# !pip install openai pydantic instructor python-dotenv tenacity requests -q

In [4]:
# @title Setup: Import Libraries
import json
import os
import time
import traceback
from typing import List, Optional, Dict, Any, Tuple # Added Tuple
import base64 # Added for decoding b64_json
import requests # ** Added for downloading images from URL **

# Import necessary libraries for Colab, file handling, and API calls
# from google.colab import drive
from dotenv import load_dotenv
from pydantic import BaseModel, Field, field_validator # Import Pydantic
import instructor # Import instructor
from IPython.display import Image as IPImage, display # For displaying images

# Import OpenAI specific classes
try:
    from openai import OpenAI, APIConnectionError, RateLimitError, APIStatusError
    from openai.types.chat import ChatCompletion # To access usage attribute type hints if needed
    from openai.types.images_response import ImagesResponse # Type hint for image generation response
except ImportError:
    print("ERROR: openai library not found or old version. Please install with `pip install -U openai`")
    OpenAI = None # Set to None if import fails
    ChatCompletion = None # Set to None if import fails
    ImagesResponse = None # Set to None if import fails

print("Libraries imported.")
if not OpenAI:
    print("WARNING: OpenAI library import failed. API calls will not work.")
if not instructor:
    print("WARNING: instructor library import failed. Structured LLM calls will not work.")
if not BaseModel:
    print("WARNING: pydantic library import failed. Pydantic models cannot be defined.")


Libraries imported.


In [5]:
try:

    # --- Define Base Path on Google Drive (primarily for .env file) ---
    # ** IMPORTANT: Update this path if your .env file is elsewhere **
    DRIVE_BASE_PATH = r'D:\Self-Project\LLM\agent_ads' # <--- UPDATE IF NEEDED for .env

    # --- Define Path for Pipeline Outputs ---
    # Directory containing outputs from previous steps (JSON with strategies, reference image)
    PIPELINE_UPSTREAM_OUTPUT_DIR = r'D:\Self-Project\LLM\agent_ads\pipeline_upstream_outputs' # <--- Specific output directory from Phase 1/Step 2
    PIPELINE_DOWNSTREAM_DIR = r'D:\Self-Project\LLM\agent_ads\pipeline_donwstream_outputs' # <--- Base directory for final outputs

    # Define path for the input JSON from the previous phase
    # ** CORRECTED: Use the JSON file containing strategies from upstream **
    INPUT_JSON_FILENAME_PHASE1 = 'output_20250504_142856.json' # <--- JSON file from Phase 1 (contains strategies)
    INPUT_JSON_PATH_PHASE1 = os.path.join(PIPELINE_UPSTREAM_OUTPUT_DIR, INPUT_JSON_FILENAME_PHASE1)

    # Define path for the input JSON from the previous phase
    # This JSON contains the generated structured prompts from Step 2 run
    INPUT_JSON_FILENAME_WITH_PROMPTS = 'output_with_prompts_20250504_144134.json' # <-- Specific JSON filename from previous cell run
    INPUT_JSON_PATH_WITH_PROMPTS = os.path.join(PIPELINE_UPSTREAM_OUTPUT_DIR, INPUT_JSON_FILENAME_WITH_PROMPTS)

    # Define path for the reference image folder (where original ref image is stored)
    IMAGE_INPUT_DIR = PIPELINE_UPSTREAM_OUTPUT_DIR # Reference image is saved in the same upstream output directory

    # --- Directory Checks ---
    path_error = False
    # Check upstream dir
    if not os.path.isdir(PIPELINE_UPSTREAM_OUTPUT_DIR):
        print(f"❌ ERROR: Upstream output directory not found: {PIPELINE_UPSTREAM_OUTPUT_DIR}")
        PIPELINE_UPSTREAM_OUTPUT_DIR = None
        INPUT_JSON_PATH_WITH_PROMPTS = None
        IMAGE_INPUT_DIR = None
        path_error = True
    else:
        print(f"✅ Upstream output directory found: {PIPELINE_UPSTREAM_OUTPUT_DIR}")
        if INPUT_JSON_PATH_WITH_PROMPTS:
             print(f"   Expecting input JSON (with prompts) at: {INPUT_JSON_PATH_WITH_PROMPTS}")
             if not os.path.exists(INPUT_JSON_PATH_WITH_PROMPTS):
                  print(f"⚠️ Warning: Input JSON file with prompts not found at {INPUT_JSON_PATH_WITH_PROMPTS}.")
             else:
                  print(f"✅ Input JSON file with prompts found.")
        if IMAGE_INPUT_DIR:
             print(f"   Expecting reference images (if any) in: {IMAGE_INPUT_DIR}")

    # Check/Create downstream dir
    if not os.path.isdir(PIPELINE_DOWNSTREAM_DIR):
        print(f"⚠️ Downstream output directory not found: {PIPELINE_DOWNSTREAM_DIR}. Attempting to create.")
        try:
            os.makedirs(PIPELINE_DOWNSTREAM_DIR, exist_ok=True)
            print(f"✅ Created downstream output directory: {PIPELINE_DOWNSTREAM_DIR}")
        except Exception as mkdir_e:
            print(f"❌ Error creating downstream directory {PIPELINE_DOWNSTREAM_DIR}: {mkdir_e}")
            PIPELINE_DOWNSTREAM_DIR = None
            path_error = True
    else:
         print(f"✅ Downstream output directory found: {PIPELINE_DOWNSTREAM_DIR}")

    if path_error:
        print("ERROR: Halting due to missing critical directories.")
        # Optional: raise an exception here to stop execution
        # raise FileNotFoundError("Critical directories missing, cannot proceed.")


except Exception as e:
    print(f"❌ An error occurred during Google Drive mounting or path setting: {e}")
    DRIVE_BASE_PATH = None # Still needed for .env logic below
    PIPELINE_UPSTREAM_OUTPUT_DIR = None
    PIPELINE_DOWNSTREAM_DIR = None
    INPUT_JSON_PATH_WITH_PROMPTS = None
    IMAGE_INPUT_DIR = None

✅ Upstream output directory found: D:\Self-Project\LLM\agent_ads\pipeline_upstream_outputs
   Expecting input JSON (with prompts) at: D:\Self-Project\LLM\agent_ads\pipeline_upstream_outputs\output_with_prompts_20250504_144134.json
   Expecting reference images (if any) in: D:\Self-Project\LLM\agent_ads\pipeline_upstream_outputs
✅ Downstream output directory found: D:\Self-Project\LLM\agent_ads\pipeline_donwstream_outputs


#### Load Dotenv

In [6]:
# @title Setup: Load API Keys and Configure LLM Client
# --- Load API Keys ---
# Place a .env file in your DRIVE_BASE_PATH or a subfolder (e.g., 'secrets')
# The .env file should contain:
# OPENAI_API_KEY=your_openai_api_key_for_text_llm_here
# IMAGE_GEN_API_KEY=your_openai_api_key_for_image_gen_here # <-- ADD THIS KEY FOR IMAGE MODEL
# OPENROUTER_API_KEY=your_openrouter_api_key_here # Optional, if using OpenRouter

# Look for .env in DRIVE_BASE_PATH (can be adjusted if needed)
dotenv_path = os.path.join(DRIVE_BASE_PATH, ".env_colab") if DRIVE_BASE_PATH else None

if dotenv_path and os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path)
    print(f"✅ Loaded .env file from path: {dotenv_path}")
elif DRIVE_BASE_PATH: # Only warn if base path was set correctly
    print(f"⚠️ Warning: .env file not found at {dotenv_path} (relative to DRIVE_BASE_PATH). API keys should be set as environment variables.")
else: # Drive mount or base path failed
    print(f"⚠️ Warning: Cannot check for .env file as DRIVE_BASE_PATH is not set. API keys should be set as environment variables.")

✅ Loaded .env file from path: D:\Self-Project\LLM\agent_ads\.env_colab


#### Constants

In [9]:
model_pricing = {
    "x-ai/grok-3-beta": {"input": 3.00, "output": 15.00},
    "gemini-2.5-pro-exp-03-25": {"input": 0.00, "output": 0.00},
    "google/gemini-2.5-pro-preview-03-25": {"input": 1.25, "output": 10.00},
    "google/gemini-2.5-flash-preview": {"input": 0.15, "output": 0.60},
    "openai/o4-mini": {"input": 1.10, "output": 4.40},
    "openai/gpt-4.1-mini": {"input": 0.40, "output": 1.60},
    "gpt-image-1": {"input_text": 5.00, "input_img": 10.00, "output": 40.00}
    }


#### Configure LLM Client

In [10]:
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY_1")
IMAGE_GEN_API_KEY = os.getenv("OPENAI_API_KEY")

# --- Configure LLM Client (Instructor + OpenAI) ---
# We need an LLM client for the Creative Expert agent (Step 2)
# We will use the OpenAI client patched with Instructor
# Separate client for image generation (can use the same API key)
instructor_client = None
image_client = None

# Set max retries for LLM calls (e.g., 3 attempts)
MAX_LLM_RETRIES = 0 

# Define the model to use for the Creative Expert agent
# GPT-4o is recommended for its strong reasoning and potential vision capabilities if needed later
# Select LLM service provider
LLM_SERVICE_PROVIDER = "OpenRouter" # or "Gemini" or "openai" or "OpenRouter"

CREATIVE_EXPERT_MODEL = "openai/o4-mini" # Or "gemini-2.5-pro-exp-03-25" "openai/gpt-4.1-mini" "google/gemini-2.5-flash-preview" "google/gemini-2.5-pro-preview-03-25" "x-ai/grok-3-beta" "openai/o4-mini"

# ** MODIFIED: Define the Image Generation Model based on user documentation **
IMAGE_GENERATION_MODEL = "gpt-image-1" # Using identifier provided by user documentation


if OpenAI and instructor: # Check for packages
  if OPENROUTER_API_KEY or GEMINI_API_KEY:
    if LLM_SERVICE_PROVIDER == "OpenRouter":
      BASE_API_URL = "https://openrouter.ai/api/v1"
      BASE_API_KEY = OPENROUTER_API_KEY
    elif LLM_SERVICE_PROVIDER == "Gemini":
      BASE_API_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
      BASE_API_KEY = GEMINI_API_KEY
    try:
        # Initialize the base OpenAI client
        # Configure with retries using Tenacity settings integrated into the client
        text_base_client = OpenAI(
            api_key=BASE_API_KEY,
            base_url=BASE_API_URL,
            max_retries=MAX_LLM_RETRIES, # Configure retries directly in the client
        )
        # Patch the client with instructor
        instructor_client = instructor.patch(text_base_client)
        print(f"✅ Instructor client configured.")
        print(f"   - Creative Expert Model: {CREATIVE_EXPERT_MODEL}")

    except Exception as e:
        print(f"❌ Error initializing Instructor client: {e}")
        instructor_client = None

elif not OPENROUTER_API_KEY:
      print("⚠️ OpenRouter API Key not found.")
      # OPENROUTER_API_KEY = input("Enter your OpenRouter API Key: ")
elif not GEMINI_API_KEY:
      print("⚠️ Gemini API Key not found.")
      # GEMINI_API_KEY = input("Enter your Gemini API Key: ")
else: # Libraries missing
     print("⚠️ OpenAI/Instructor library not available for Instructor client.")


if OpenAI and IMAGE_GEN_API_KEY:
    try:
        # Initialize a separate OpenAI client instance for image generation
        image_client = OpenAI(
            api_key=IMAGE_GEN_API_KEY,
             # Retries can also be configured here if needed, separate from text client
            max_retries=MAX_LLM_RETRIES
        )
        print(f"✅ Image Generation client configured (using IMAGE_GEN_API_KEY).")
        print(f"   - Image Generation Model: {IMAGE_GENERATION_MODEL}")
    except Exception as e:
        print(f"❌ Error initializing Image Generation client: {e}")
        image_client = None
elif not IMAGE_GEN_API_KEY:
     print("⚠️ IMAGE_GEN_API_KEY not found for Image Generation client.")
else: # OpenAI library missing
     print("⚠️ OpenAI library not available for Image Generation client.")

# Final Check
if not instructor_client:
    print("   LLM-dependent steps will fail.")
if not image_client:
     print("   Image generation steps will fail.")

✅ Instructor client configured.
   - Creative Expert Model: openai/o4-mini
✅ Image Generation client configured (using IMAGE_GEN_API_KEY).
   - Image Generation Model: gpt-image-1


#### Pydantic Models

In [12]:

if BaseModel:
    # Define a structured format for the visual concept
    class VisualConceptDetails(BaseModel):
        """Detailed breakdown of the visual concept."""
        main_subject: str = Field(..., description="Detailed description of the primary subject(s) and their interaction within the scene.")
        composition_and_framing: str = Field(..., description="Description of the composition, camera angle, shot type (e.g., close-up, wide shot), and framing.")
        background_environment: str = Field(..., description="Description of the background, setting, or environment.")
        foreground_elements: Optional[str] = Field(None, description="Description of any significant foreground elements.")
        lighting_and_mood: str = Field(..., description="Description of the lighting style (e.g., natural, studio, dramatic) and the overall mood or atmosphere.")
        color_palette: str = Field(..., description="Description of the key colors, color harmony (e.g., analogous, complementary), and overall color tone.")
        visual_style: str = Field(..., description="Description of the artistic or visual style (e.g., photorealistic, illustration, graphic design, vintage). This should include key style descriptors.")
        # Dedicated fields for text/branding visuals
        promotional_text_visuals: Optional[str] = Field(None, description="Description of how promotional text (from task_description) should be visualized, including content, style, font characteristics, and placement suggestions.")
        branding_visuals: Optional[str] = Field(None, description="Description of how branding elements (logo placeholders, taglines, specific brand fonts/colors mentioned in branding_elements input) should be visually incorporated.")
        texture_and_details: Optional[str] = Field(None, description="Specific notes on textures, materials, or fine details.")
        negative_elements: Optional[str] = Field(None, description="Specific elements or concepts to actively avoid in the image.")
        # Field for creative reasoning
        creative_reasoning: Optional[str] = Field(None, description="Brief explanation connecting the key visual choices (style, mood, composition, subject focus) back to the marketing strategy (audience, niche, objective, voice) and user inputs.")


    # Main output model now contains the structured concept
    class ImageGenerationPrompt(BaseModel):
        """
        Structured prompt details generated by the Creative Expert agent,
        containing a breakdown of the visual concept. This structure will be
        processed later to create the final prompt for the text-to-image model.
        """
        visual_concept: VisualConceptDetails = Field(..., description="The detailed, structured breakdown of the visual concept.")
        aspect_ratio: str = Field(..., description="The target aspect ratio string (e.g., '1:1', '9:16', '16:9').")
        source_strategy_index: Optional[int] = Field(None, description="Index linking back to the source marketing strategy in the input JSON.")

        @field_validator('aspect_ratio')
        def check_aspect_ratio(cls, v):
            # ** MODIFIED: Added 2:3 and 3:4 to validation **
            valid_ratios = ['1:1', '9:16', '16:9', '2:3', '3:4']
            if v not in valid_ratios:
                # Allow flexibility but warn if not standard image generation ratio
                print(f"Warning: Aspect ratio '{v}' is not one of the expected values {valid_ratios}. Mapping to size might fail or use default.")
            return v

    # Define the structure for a single marketing strategy (input to Creative Expert)
    # This should match the output structure from the previous phase.
    class MarketingGoalSetFinal(BaseModel):
        """Represents a complete set of marketing goals for a creative direction."""
        target_audience: str
        target_niche: str
        target_objective: str
        target_voice: str

    print("✅ Pydantic models 'VisualConceptDetails', 'ImageGenerationPrompt', and 'MarketingGoalSetFinal' defined.")

else:
    VisualConceptDetails = None
    ImageGenerationPrompt = None
    MarketingGoalSetFinal = None
    print("⚠️ Pydantic models cannot be defined because the library is not available.")


✅ Pydantic models 'VisualConceptDetails', 'ImageGenerationPrompt', and 'MarketingGoalSetFinal' defined.


In [13]:
# @title Step 2: Implement the "Creative Expert" Agent Function

# Define the core function to generate the image prompt
def generate_image_prompt_for_strategy(
    generated_json: Dict[str, Any],
    strategy: Dict[str, Any],
    strategy_index: int,
    aspect_ratio: str,
    llm_client: instructor.Instructor # Expecting the patched client
) -> Optional[Tuple[Dict[str, Any], Optional[Dict[str, int]]]]: # Return type includes usage info
    """
    Generates a structured visual concept for a specific marketing strategy
    using the Creative Expert LLM agent. Includes input refinement and reasoning.

    Args:
        generated_json: The full JSON dictionary output from the previous phase.
        strategy: A dictionary representing one marketing strategy
                  (matching MarketingGoalSetFinal structure).
        strategy_index: The index of this strategy in the original list.
        aspect_ratio: The target aspect ratio string (e.g., "1:1").
        llm_client: The initialized and patched Instructor client.

    Returns:
        A tuple containing:
          - A dictionary representing the generated ImageGenerationPrompt object, or None if an error occurs.
          - A dictionary containing token usage info (`prompt_tokens`, `completion_tokens`), or None if unavailable/error.
    """
    # Check for Pydantic models as well
    if not llm_client or not ImageGenerationPrompt or not VisualConceptDetails:
        print("Error: LLM client or Pydantic models not available for Creative Expert.")
        return None, None # Return None for both prompt data and usage

    # --- Extract Context from Input JSON ---
    request_details = generated_json.get("request_details", {})
    user_inputs = generated_json.get("user_inputs", {})
    processing_context = generated_json.get("processing_context", {})

    task_type = request_details.get("task_type", "N/A")
    platform_name = request_details.get("target_platform", {}).get("name", "N/A")

    user_prompt_original = user_inputs.get("prompt")
    image_reference = user_inputs.get("image_reference") # Dict or None
    branding_elements = user_inputs.get("branding_elements")
    task_description = user_inputs.get("task_description") # Contains potential text for graphics

    image_analysis = processing_context.get("image_analysis_result") # Dict or None

    # --- Handle Image Reference Context ---
    has_image_reference = image_reference is not None
    image_instruction = image_reference.get("instruction") if has_image_reference else None
    # Get saved image path if reference exists
    saved_image_filename = image_reference.get("saved_image_path") if has_image_reference else None
    image_subject_from_analysis = None
    image_style_from_analysis = None # Placeholder for potential future analysis field

    if has_image_reference and isinstance(image_analysis, dict):
        image_subject_from_analysis = image_analysis.get("main_subject")
        # Example: if image_analysis included style:
        # image_style_from_analysis = image_analysis.get("style_mood")

    # --- Construct Prompt for Creative Expert LLM ---

    # REFINED System prompt with reasoning instruction
    system_prompt = """
    You are an expert Creative Director and Digital Marketing Strategist specializing in F&B social media visuals.
    Your task is to generate a highly detailed, creative, and effective *structured visual concept* based on the provided marketing strategy and context.
    This structured concept will later be used to generate a prompt for a text-to-image generation model.

    **Input Refinement:** Critically review the user's inputs (Original User Prompt Hint, Specific Task Content/Description, Branding Guidelines, Image Instruction). If any input is brief, vague, contains grammatical errors, or seems misaligned with the core strategy/task, interpret the user's likely intent, refine it, expand upon it creatively, and clearly explain your refined interpretation within the relevant structured output fields. Ensure the final concept is coherent and aligns with the marketing strategy and task type.

    **Core Task:** Embody creativity, imagination, and a deep understanding of visual design principles (color, composition, lighting, typography), F&B marketing trends, and image generation capabilities. Generate diverse, high-impact visual concepts tailored to the specific task type and marketing goals. Fill in all the fields of the requested Pydantic JSON output format (`ImageGenerationPrompt` containing `VisualConceptDetails`). Be specific, descriptive, and justify design choices implicitly through the descriptions. The `main_subject` field should describe all key subjects and their interaction clearly. The `visual_style` field should comprehensively describe the overall aesthetic, including relevant style descriptors.

    **Handling Image Reference (CRITICAL):**
    - If an image reference IS provided AND a specific user `instruction` IS given: Interpret the instruction and apply it when describing the concept (e.g., describe the style in `visual_style`, describe the subject in `main_subject`, describe a new background in `background_environment`).
    - If an image reference IS provided BUT NO specific user `instruction` is given: The **primary subject** of the visual concept MUST be the analyzed subject from the reference image. Your main creative task is to design the *context* around this subject (composition, background, lighting, style, etc.) aligned with the marketing strategy. The `main_subject` field in your output MUST start with a detailed description of this reference subject, followed by its interaction with the scene you design.
    - If NO image reference is provided: Generate the entire visual concept based on the marketing strategy and other inputs.

    **Text & Branding:**
    - If the task requires text (e.g., promotions, recipes, tips) or text is mentioned in the `task_description`, populate the `promotional_text_visuals` field. Describe the refined text content, its visual style (e.g., headline, caption), suggested font characteristics (e.g., bold sans-serif, elegant script), placement hierarchy, and integration with the overall visual.
    - Analyze the `Branding Guidelines` input. If provided, describe how these elements (logo placeholders, taglines, specific brand fonts/colors) should be visually incorporated in the `branding_visuals` field. Refine branding input if necessary for clarity.
    - **If `Branding Guidelines` are NOT provided, state this clearly in the `branding_visuals` field (e.g., "No specific branding guidelines provided; visual style derived from strategy and task.") and derive the visual style, colors, etc., primarily from the marketing strategy and task type.**

    **Creative Reasoning:** After defining the visual concept, provide a brief explanation in the `creative_reasoning` field, connecting the key visual choices (style, mood, composition, subject focus, color palette) back to the core marketing strategy (audience, niche, objective, voice) and any significant user inputs or refinements made, especially noting how the image reference was handled.

    Adhere strictly to the requested Pydantic JSON output format. Ensure all descriptions are detailed enough to guide image generation effectively.
    """

    # User prompt asks for structured output
    user_prompt_parts = [
        f"Generate a structured visual concept for an image targeting the '{platform_name}' platform ({aspect_ratio}).",
        f"The core marketing strategy for this image is:",
        f"- Target Audience: {strategy.get('target_audience', 'N/A')}",
        f"- Target Niche: {strategy.get('target_niche', 'N/A')}",
        f"- Target Objective: {strategy.get('target_objective', 'N/A')}",
        f"- Target Voice: {strategy.get('target_voice', 'N/A')}",
        f"\nConsider the overall task context:",
        f"- Task Type: {task_type}",
    ]

    # Add notes about potential refinement
    if user_prompt_original:
        user_prompt_parts.append(f"- Original User Prompt Hint: '{user_prompt_original}' (Interpret and refine this hint if it's brief or unclear, integrating its essence into the concept).")
    if task_description:
        user_prompt_parts.append(f"- Specific Task Content/Description: '{task_description}' (Interpret and refine this content if brief or unclear. If it includes text to be displayed, describe its visualization in `promotional_text_visuals`).")
    if branding_elements:
        user_prompt_parts.append(f"- Branding Guidelines: '{branding_elements}' (Interpret and refine these guidelines if brief or unclear. Describe their visualization in `branding_visuals`).")
    else:
        # Explicitly note if branding is missing for the LLM's context
        user_prompt_parts.append("- Branding Guidelines: Not Provided (Derive style from strategy/task).")


    # Image Reference Logic Integration into Prompt Instructions
    user_prompt_parts.append("\nImage Reference Context:")
    if has_image_reference:
        user_prompt_parts.append(f"- An image reference was provided (Filename: {saved_image_filename}).") # Mention filename for context
        if image_subject_from_analysis:
             user_prompt_parts.append(f"- Analysis identified the main subject as: '{image_subject_from_analysis}'.")
        # Add more analysis details if available (style, setting etc.)
        # if image_style_from_analysis:
        #     user_prompt_parts.append(f"- Analysis suggested style/mood: '{image_style_from_analysis}'.")

        if image_instruction:
            user_prompt_parts.append(f"- User Instruction for reference image: '{image_instruction}' (Interpret and refine this instruction if brief or unclear). Apply the refined instruction carefully when describing the visual concept fields for the *new* image. For example, if asked to 'use the style', describe that style in the `visual_style` field. If asked to 'use the subject', describe that subject in the `main_subject` field. If asked to 'replace background', describe the original subject in `main_subject` but describe a new background in `background_environment`.")
        else:
            # ** REFINED: Default behavior instruction strengthened **
            user_prompt_parts.append(f"- No specific instruction provided for the reference image. **Default behavior: The primary subject MUST be the analyzed subject ('{image_subject_from_analysis or 'Unknown'}').** Describe this subject in detail within the `main_subject` field, then focus your creative effort on designing the surrounding context (composition, background, lighting, style, etc.) to align with the marketing strategy.")
    else:
        user_prompt_parts.append("- No image reference was provided.")

    # Final instruction asks for structured fields including creative reasoning
    user_prompt_parts.append(f"""
\nBased on ALL the above context and your expertise (refining user inputs as needed), generate the `ImageGenerationPrompt` JSON object, ensuring the nested `VisualConceptDetails` object is fully populated with rich, descriptive details suitable for guiding a text-to-image model.
- Describe the `main_subject` clearly (following image reference logic if applicable). This field should encompass all key subjects in the scene and their interactions.
- Detail the `composition_and_framing`, including camera angle and shot type.
- Describe the `background_environment` or setting.
- Mention any important `foreground_elements`.
- Specify the `lighting_and_mood`.
- Define the `color_palette` and color harmony.
- Articulate the `visual_style` (e.g., photorealistic, illustration, graphic design). Ensure this field comprehensively describes the desired aesthetic.
- **Describe any required promotional text visualization in `promotional_text_visuals`.**
- **Describe any required branding visualization (logos, taglines, brand fonts/colors) in `branding_visuals`. Handle the case where no branding guidelines were provided.**
- Add notes on `texture_and_details` if relevant.
- List any `negative_elements` to avoid.
- **Provide a brief `creative_reasoning` explaining how the main visual choices connect to the marketing strategy and user inputs.**

Ensure the overall visual concept aligns strongly with the marketing strategy, task type '{task_type}', and incorporates the image reference context as instructed above. Set `aspect_ratio` to '{aspect_ratio}'.
""")

    final_user_prompt = "\n".join(user_prompt_parts)
    print(f"Final User Prompt : {final_user_prompt}")
    
    # --- Make LLM Call ---
    usage_info = None # Initialize usage info
    try:
        print(f"\n--- Generating structured prompt for Strategy {strategy_index} ---")
        # print(f"DEBUG: Sending User Prompt to Creative Expert:\n{final_user_prompt}") # Uncomment for debugging prompts

        # Use the globally configured CREATIVE_EXPERT_MODEL
        completion = llm_client.chat.completions.create(
            model=CREATIVE_EXPERT_MODEL,
            response_model=ImageGenerationPrompt, # Request the main model
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": final_user_prompt}
            ],
            temperature=0.7, # Allow for creativity
            max_tokens=2300, # Increase tokens slightly more for reasoning field
        )

        # Add the strategy index to the result before returning
        prompt_data = completion.model_dump()
        prompt_data['source_strategy_index'] = strategy_index

        print(f"✅ Successfully generated structured prompt object for Strategy {strategy_index}.")
        # print(f"   Prompt Details: {prompt_data}") # Uncomment for detailed output

        # ** ADDED: Extract token usage **
        try:
            # Access the raw response potentially wrapped by instructor
            raw_response = getattr(completion, '_raw_response', None)
            if raw_response and hasattr(raw_response, 'usage') and raw_response.usage:
                usage_data = raw_response.usage
                usage_info = {
                    "prompt_tokens": usage_data.prompt_tokens,
                    "completion_tokens": usage_data.completion_tokens,
                    "total_tokens": usage_data.total_tokens,
                }
                print(f"  Token Usage (Strategy {strategy_index}): Input={usage_info['prompt_tokens']}, Output={usage_info['completion_tokens']}, Total={usage_info['total_tokens']}")
            else:
                 print("  Token usage data not available in response object.")
        except Exception as usage_ex:
            print(f"  Warning: Could not extract token usage - {usage_ex}")


        return prompt_data, usage_info # Return both prompt and usage

    except (APIConnectionError, RateLimitError, APIStatusError) as api_error:
        print(f"❌ ERROR: API call failed for Strategy {strategy_index}: {api_error}")
        return None, None # Return None for both
    except Exception as e:
        # Catch potential Pydantic validation errors or other issues
        print(f"❌ ERROR: Failed to generate or validate prompt for Strategy {strategy_index}: {e}")
        print(traceback.format_exc())
        return None, None # Return None for both

print("✅ 'Creative Expert' agent function 'generate_image_prompt_for_strategy' defined.")



✅ 'Creative Expert' agent function 'generate_image_prompt_for_strategy' defined.


In [14]:
# @title Example Usage: Load JSON and Generate Prompts

# Ensure Pydantic models and function are defined
if ImageGenerationPrompt and MarketingGoalSetFinal and 'generate_image_prompt_for_strategy' in globals():

    # Load the input JSON data
    pipeline_data_phase1 = None # Use a distinct variable name
    generated_prompts = [] # List to store successfully generated prompt dicts
    total_prompt_tokens = 0 # Initialize token counters
    total_completion_tokens = 0
    total_tokens = 0

    # Check if INPUT_JSON_PATH was set correctly
    if INPUT_JSON_PATH_PHASE1 and os.path.exists(INPUT_JSON_PATH_PHASE1):
        try:
            with open(INPUT_JSON_PATH_PHASE1, 'r') as f:
                pipeline_data_phase1 = json.load(f)
            print(f"✅ Successfully loaded Phase 1 input JSON from: {INPUT_JSON_PATH_PHASE1}")
        except Exception as e:
            print(f"❌ Error loading JSON file '{INPUT_JSON_PATH_PHASE1}': {e}")
            pipeline_data_phase1 = None
    elif INPUT_JSON_PATH_PHASE1: # Path was defined but file doesn't exist
        print(f"❌ Phase 1 Input JSON file not found at '{INPUT_JSON_PATH_PHASE1}'. Cannot proceed with example.")
    else: # Path wasn't even defined (e.g., Drive mount failed or PIPELINE_OUTPUT_DIR check failed)
         print(f"❌ Phase 1 Input JSON path not defined or accessible. Cannot proceed with example.")


    # Proceed only if JSON loaded and LLM client is ready
    if pipeline_data_phase1 and instructor_client:
        # Ensure 'processing_context' exists before trying to access it
        if "processing_context" not in pipeline_data_phase1:
            pipeline_data_phase1["processing_context"] = {}
            print("⚠️ Initialized empty 'processing_context' in loaded data.")

        strategies = pipeline_data_phase1.get("processing_context", {}).get("suggested_marketing_strategies", [])
        target_platform_info = pipeline_data_phase1.get("request_details", {}).get("target_platform", {})
        aspect_ratio = target_platform_info.get("resolution", {}).get("aspect_ratio", "1:1") # Default to 1:1 if missing

        if not strategies:
            print("⚠️ No suggested marketing strategies found in the input JSON.")
        else:
            print(f"\nFound {len(strategies)} marketing strategies. Generating structured prompts...")

            # Iterate through each strategy and generate a prompt
            for index, strategy_dict in enumerate(strategies):
                # Validate the strategy dict structure (optional but good practice)
                try:
                    validated_strategy = MarketingGoalSetFinal(**strategy_dict)
                except Exception as val_err:
                    print(f"⚠️ Warning: Skipping strategy {index} due to invalid format: {val_err}")
                    continue

                # ** MODIFIED: Capture prompt data and usage info **
                prompt_result_dict, usage_info = generate_image_prompt_for_strategy(
                    generated_json=pipeline_data_phase1, # Use Phase 1 data
                    strategy=validated_strategy.model_dump(), # Pass strategy as dict
                    strategy_index=index,
                    aspect_ratio=aspect_ratio,
                    llm_client=instructor_client
                )
                time.sleep(1)

                if prompt_result_dict:
                    generated_prompts.append(prompt_result_dict)
                    # ** ADDED: Aggregate token usage **
                    if usage_info:
                        total_prompt_tokens += usage_info.get("prompt_tokens", 0)
                        total_completion_tokens += usage_info.get("completion_tokens", 0)
                        total_tokens += usage_info.get("total_tokens", 0)
                else:
                    # Log failure for this specific strategy
                    print(f"--- Failed to generate structured prompt for Strategy {index} ---")

            print("\n--- Prompt Generation Complete ---")
            print(f"Successfully generated {len(generated_prompts)} structured prompts.")

            # Store generated prompts back into the main data structure
            pipeline_data_phase1["processing_context"]["generated_image_prompts"] = generated_prompts
            print(f"✅ Stored generated prompts in pipeline_data_phase1['processing_context']['generated_image_prompts']")

            # ** ADDED: Display token usage and estimated cost **
            print("\n--- Token Usage & Cost Estimation (Creative Expert Stage) ---")
            print(f"Model Used: {CREATIVE_EXPERT_MODEL}")
            print(f"Total Input Tokens: {total_prompt_tokens}")
            print(f"Total Output Tokens: {total_completion_tokens}")
            print(f"Total Tokens: {total_tokens}")

            # --- Pricing (Example for GPT-4o as of May 2024 - CHECK CURRENT PRICING) ---
            # Input: $5.00 / 1M tokens
            # Output: $15.00 / 1M tokens
            # NOTE: Pricing can change frequently. Verify current rates on OpenAI's website.

            try:
                price_per_million_input = model_pricing[CREATIVE_EXPERT_MODEL]["input"] if CREATIVE_EXPERT_MODEL in model_pricing else 0.0
                price_per_million_output = model_pricing[CREATIVE_EXPERT_MODEL]["output"] if CREATIVE_EXPERT_MODEL in model_pricing else 0.0
                estimated_cost = ((total_prompt_tokens / 1_000_000) * price_per_million_input) + \
                                 ((total_completion_tokens / 1_000_000) * price_per_million_output)
                print(f"Estimated Cost: ${estimated_cost:.6f}")
                if CREATIVE_EXPERT_MODEL in model_pricing:
                    print(f"(Based on {CREATIVE_EXPERT_MODEL} pricing: ${price_per_million_input}/1M input, ${price_per_million_output}1M output. Please verify current pricing.)")
                else:
                    print(f"(No pricing information available for {CREATIVE_EXPERT_MODEL}).")
            except Exception as cost_e:
                print(f"Could not calculate cost: {cost_e}")
            # --- End Cost Estimation ---


            # Display the generated prompts (optional)
            if generated_prompts:
                 print("\nGenerated Structured Prompt Objects (showing first one as example):")
                 if generated_prompts: # Check again in case generation failed for all
                     print(f"\n--- Structured Prompt for Strategy {generated_prompts[0].get('source_strategy_index', 0)} ---")
                     # Pretty print the dictionary
                     print(json.dumps(generated_prompts[0], indent=2))
                     if len(generated_prompts) > 1:
                         print(f"\n...(plus {len(generated_prompts) - 1} more)")
                 else:
                     print("No prompts were successfully generated.")


            # Option to save the updated JSON data
            save_output = True # Set to False to disable saving
            if save_output and PIPELINE_UPSTREAM_OUTPUT_DIR: # Save to upstream dir for next step
                try:
                    # Create a new filename for the output with prompts
                    output_filename = f"output_with_prompts_{time.strftime('%Y%m%d_%H%M%S')}.json"
                    output_path = os.path.join(PIPELINE_UPSTREAM_OUTPUT_DIR, output_filename)
                    with open(output_path, 'w') as f:
                        json.dump(pipeline_data_phase1, f, indent=2) # Save the updated phase 1 data
                    print(f"\n✅ Successfully saved updated pipeline data with prompts to: {output_path}")
                    # ** Set this path for the next cell **
                    INPUT_JSON_PATH_WITH_PROMPTS = output_path
                except Exception as save_e:
                    print(f"\n❌ Error saving updated JSON file: {save_e}")


    elif not pipeline_data_phase1:
        # Message already printed above if file not found/path undefined
        pass
    else: # instructor_client is None
         print("Example usage skipped because the LLM client is not configured.")

else:
    print("⚠️ Example usage skipped because Pydantic models or the generation function are not defined (check setup steps).")



✅ Successfully loaded Phase 1 input JSON from: D:\Self-Project\LLM\agent_ads\pipeline_upstream_outputs\output_20250504_142856.json

Found 5 marketing strategies. Generating structured prompts...

--- Generating structured prompt for Strategy 0 ---
✅ Successfully generated structured prompt object for Strategy 0.
  Token Usage (Strategy 0): Input=1903, Output=1445, Total=3348

--- Generating structured prompt for Strategy 1 ---
✅ Successfully generated structured prompt object for Strategy 1.
  Token Usage (Strategy 1): Input=1910, Output=1714, Total=3624

--- Generating structured prompt for Strategy 2 ---
✅ Successfully generated structured prompt object for Strategy 2.
  Token Usage (Strategy 2): Input=1905, Output=994, Total=2899

--- Generating structured prompt for Strategy 3 ---
✅ Successfully generated structured prompt object for Strategy 3.
  Token Usage (Strategy 3): Input=1904, Output=1073, Total=2977

--- Generating structured prompt for Strategy 4 ---
✅ Successfully genera

In [20]:
# @title Step 3 & 4: Prompt Assembly and Image Generation Functions

# Added user_inputs parameter and refined logic
def assemble_final_prompt(structured_prompt_data: Dict[str, Any], user_inputs: Dict[str, Any]) -> str:
    """
    Assembles the final text prompt string from the structured visual concept details,
    adapting based on original user inputs for image reference.

    Args:
        structured_prompt_data: A dictionary representing the ImageGenerationPrompt object.
        user_inputs: The user_inputs dictionary from the main pipeline data.

    Returns:
        A string suitable for input to the image generation API.
    """
    if not structured_prompt_data or "visual_concept" not in structured_prompt_data:
        return "Error: Invalid structured prompt data."

    vc = structured_prompt_data["visual_concept"]
    aspect_ratio = structured_prompt_data.get("aspect_ratio", "1:1") # Get aspect ratio for context

    # Check original user inputs for image reference context
    image_reference = user_inputs.get("image_reference")
    has_reference = image_reference is not None
    has_instruction = has_reference and image_reference.get("instruction")
    instruction_text = image_reference.get("instruction", "") if has_instruction else ""

    # --- Assemble Core Description ---
    # Combine the detailed descriptions generated by the LLM
    core_description_parts = [
        vc.get("main_subject"),
        vc.get("composition_and_framing"),
        f"Background: {vc.get('background_environment')}",
        f"Foreground elements: {vc.get('foreground_elements')}" if vc.get("foreground_elements") else None,
        f"Lighting & Mood: {vc.get('lighting_and_mood')}",
        f"Color Palette: {vc.get('color_palette')}",
        f"Visual Style: {vc.get('visual_style')}",
        f"Textures & Details: {vc.get('texture_and_details')}" if vc.get("texture_and_details") else None,
        f"Text Visualization: {vc.get('promotional_text_visuals')}" if vc.get("promotional_text_visuals") else None,
        f"Branding Visualization: {vc.get('branding_visuals')}" if vc.get("branding_visuals") else None,
        f"Avoid the following elements: {vc.get('negative_elements')}" if vc.get("negative_elements") else None,
    ]
    core_description = ". ".join(filter(None, core_description_parts))

    # --- Add Contextual Prefix based on Reference/Instruction ---
    # This prefix provides clearer intent to the image model when editing vs generating
    prefix = ""
    if has_reference:
        if has_instruction:
            # If editing based on specific instruction
             prefix = f"Based on the provided reference image, modify it according to the user instruction '{instruction_text}' to achieve the following visual concept: "
        else:
            # If editing using reference subject as primary (default)
             prefix = f"Using the primary subject from the provided reference image, create a new image with the following visual concept: "
    # else: No prefix needed for pure generation

    # --- Combine Prefix and Core Description ---
    final_prompt = f"{prefix}{core_description} Ensure the image strictly adheres to a {aspect_ratio} aspect ratio."

    # print(f"DEBUG: Assembled Prompt:\n{final_prompt}\n") # Uncomment for debugging
    return final_prompt


def map_aspect_ratio_to_size(aspect_ratio: str) -> Optional[str]:
    """
    Maps aspect ratio string to size parameter supported by OpenAI Images API
    (e.g., DALL-E 3 sizes). Returns None if ratio is unsupported by standard sizes.
    """
    # Added 2:3 and 3:4 mapping
    if aspect_ratio == "9:16":
        return "1024x1792"
    elif aspect_ratio == "16:9":
        return "1792x1024"
    elif aspect_ratio == "1:1":
        return "1024x1024"
    elif aspect_ratio == "2:3":
        print(f"Warning: Mapping aspect ratio '2:3' to closest supported vertical size '1024x1792' (approx 9:16).")
        return "1024x1792"
    elif aspect_ratio == "3:4":
        print(f"Warning: Mapping aspect ratio '3:4' to closest supported vertical size '1024x1792' (approx 9:16).")
        return "1024x1792"
    else:
        print(f"Warning: Unsupported aspect ratio '{aspect_ratio}' for standard sizes. Cannot map to size.")
        return None # Indicate unsupported ratio

# @retry(stop=stop_after_attempt(MAX_LLM_RETRIES), wait=wait_exponential(multiplier=1, min=4, max=10)) # Optional: Add tenacity retry decorator for image gen
# ** MODIFIED: Added run_directory and strategy_index args **
def generate_image(
    final_prompt: str,
    aspect_ratio: str,
    client: OpenAI, # Expecting the base OpenAI client
    run_directory: str, # Target directory for saving edited images
    strategy_index: int, # Index for naming saved images
    reference_image_path: Optional[str] = None
) -> Optional[Tuple[str, Optional[str]]]: # Returns (status, url_or_filepath)
    """
    Generates or edits an image using the OpenAI Images API (model specified by IMAGE_GENERATION_MODEL)
    via the specified client. Uses client.images.edit if reference_image_path is provided,
    otherwise uses client.images.generate. Saves edited images to the run_directory.

    Args:
        final_prompt: The assembled text prompt string describing the desired outcome or edit.
        aspect_ratio: The aspect ratio string ('1:1', '9:16', '16:9', '2:3', '3:4').
        client: The initialized OpenAI client.
        run_directory: The path to the directory where outputs for this run are saved.
        strategy_index: The index of the current strategy (for filename).
        reference_image_path: Optional path to the reference image for editing.

    Returns:
        A tuple containing:
          - status: "success" or "error"
          - url_or_filepath: The image URL (for generation) or local file path (for edits)
                             if successful, or an error message string.
    """
    if not client:
        return "error", "Image generation client not available."
    if not final_prompt or final_prompt.startswith("Error:"):
         return "error", f"Invalid final prompt provided: {final_prompt}"
    if not run_directory or not os.path.isdir(run_directory):
         return "error", f"Invalid run_directory provided: {run_directory}"

    try:
        # Map aspect ratio to size, handle potential None
        image_size = map_aspect_ratio_to_size(aspect_ratio)
        if not image_size:
            return "error", f"Unsupported aspect ratio '{aspect_ratio}' for image generation/editing."

        response: Optional[ImagesResponse] = None # Initialize response
        operation_type = "generation" # Default

        # Conditional logic for generate vs edit
        if reference_image_path and os.path.exists(reference_image_path):
            operation_type = "editing"
            print(f"--- Calling Image Editing API ({IMAGE_GENERATION_MODEL}) with reference image ---")
            print(f"   Reference Image: {reference_image_path}")
            # print(f"   Edit Prompt: {final_prompt[:200]}...") # Print start of prompt for debug
            try:
                with open(reference_image_path, "rb") as image_file:
                    # ** FIXED: Removed response_format from edit call **
                    # ** NOTE: Edit API defaults to b64_json response format **
                    response = client.images.edit(
                        model=IMAGE_GENERATION_MODEL, # Use the model specified in setup
                        image=image_file,
                        prompt=final_prompt, # Prompt describes the desired edit/final state
                        n=1,
                        size=image_size # Use mapped size
                    )
            except FileNotFoundError:
                 return "error", f"Reference image not found at path: {reference_image_path}"
            except Exception as file_err:
                 return "error", f"Error opening reference image: {file_err}"

        else:
            operation_type = "generation"
            if reference_image_path: # Path provided but file doesn't exist
                 print(f"⚠️ Warning: Reference image path provided but file not found: {reference_image_path}. Falling back to generation.")
            print(f"--- Calling Image Generation API ({IMAGE_GENERATION_MODEL}) ---")
            # print(f"   Generation Prompt: {final_prompt[:200]}...") # Print start of prompt for debug

            response = client.images.generate(
                model=IMAGE_GENERATION_MODEL, # Use the model specified in setup
                prompt=final_prompt,
                size=image_size, # Use mapped size
                quality="high",  # or "hd" - check if supported by gpt-image-1 if different from DALL-E
                n=1,
                # style="vivid" # 'style' parameter is specific to DALL-E 3, may not apply to gpt-image-1
                # response_format="url" # Request URL for generation
            )

        # --- Process Response (Handles different formats) ---
        if response and response.data and len(response.data) > 0:
            image_data = response.data[0]
            if operation_type == "generation" and image_data.url:
                image_url = image_data.url
                print(f"✅ Image generation successful.")
                return "success", image_url # Return URL for generated images
            elif operation_type == "editing" and image_data.b64_json:
                # Handle b64_json response from edit
                print(f"✅ Image editing successful (received base64 data).")
                try:
                    image_bytes = base64.b64decode(image_data.b64_json)
                    # ** MODIFIED: Save edited image to the run_directory **
                    timestamp = time.strftime('%Y%m%d_%H%M%S')
                    local_filename = f"edited_image_strategy_{strategy_index}_{timestamp}.png"
                    local_filepath = os.path.join(run_directory, local_filename) # Save in run dir
                    with open(local_filepath, "wb") as f:
                        f.write(image_bytes)
                    print(f"   Saved edited image to: {local_filepath}")
                    return "success", local_filepath # Return the persistent local path
                except Exception as decode_save_err:
                    print(f"❌ Error decoding/saving base64 image: {decode_save_err}")
                    return "error", f"Error processing base64 response: {decode_save_err}"
            else:
                # Handle unexpected response format
                error_msg = f"Image API response format mismatch for {operation_type}."
                print(f"❌ {error_msg}")
                revised_prompt = image_data.revised_prompt if hasattr(image_data, 'revised_prompt') and image_data.revised_prompt else "N/A"
                error_msg += f" Revised prompt (if available): {revised_prompt}"
                return "error", error_msg
        else:
            error_msg = "Image API response did not contain expected data structure."
            print(f"❌ {error_msg}")
            return "error", error_msg

    except APIConnectionError as e:
        print(f"❌ ERROR: Image API connection error: {e}")
        return "error", f"Connection error: {e}"
    except RateLimitError as e:
        print(f"❌ ERROR: Image API rate limit exceeded: {e}")
        return "error", f"Rate limit error: {e}"
    except APIStatusError as e:
        print(f"❌ ERROR: Image API status error: {e.status_code} - {e.response}")
        # Extract error message if possible
        error_message = f"API status error {e.status_code}"
        try:
            error_details = e.response.json()
            if 'error' in error_details and 'message' in error_details['error']:
                error_message += f": {error_details['error']['message']}"
        except:
            pass # Ignore if response parsing fails
        return "error", error_message
    except Exception as e:
        print(f"❌ ERROR: Unexpected error during image operation: {e}")
        print(traceback.format_exc())
        return "error", f"Unexpected error: {e}"

print("✅ Prompt assembly and Image generation/editing functions defined.")


✅ Prompt assembly and Image generation/editing functions defined.


In [21]:
# @title Step 5 & 6: Update Pipeline - Generate Images and Display Results

# ** MODIFIED: Explicitly load the JSON containing generated prompts **
pipeline_data = None

# Make sure this filename matches the output from the previous cell run
# Example: prompts_json_path = "/content/drive/MyDrive/AI Imagery Marketing Tool/Colab Notebook/pipeline_upstream_outputs/output_with_prompts_20250503_032100.json"
# ** You might need to manually update this path based on the actual output filename **
if 'output_path' in globals() and os.path.exists(output_path): # Check if path from previous cell exists
    prompts_json_path = output_path
    print(f"Output Path of Stratefy with Prompts exists in {output_path}")
else:
    # # Fallback to the user-provided path if the variable doesn't exist (e.g., running cells separately)
    # prompts_json_dir = "/content/drive/MyDrive/AI Imagery Marketing Tool/Colab Notebook/pipeline_upstream_outputs" # <-- Path to JSON with prompts
    # prompts_json_filename = "output_with_prompts_20250504_144134.json"
    # prompts_json_path = os.path.join(prompts_json_dir, prompts_json_filename)
    raise LookupError("Output of Strategy with Prompts Json File Not Found !!!")

print(f"--- Loading pipeline data with generated prompts from: {prompts_json_path} ---")
try:
    with open(prompts_json_path, 'r') as f:
        pipeline_data = json.load(f)
    print(f"✅ Successfully loaded data.")
except Exception as e:
    print(f"❌ Error loading JSON file '{prompts_json_path}': {e}")
    pipeline_data = None


# Ensure functions are defined and clients/data are ready
if ('assemble_final_prompt' in globals() and
    'generate_image' in globals() and
    pipeline_data and # Check if data was loaded successfully above
    image_client and   # Check if image client is configured
    PIPELINE_DOWNSTREAM_DIR): # Check if downstream dir is set

    # --- Create Run-Specific Output Directory ---
    run_timestamp = time.strftime('%Y%m%d_%H%M%S')
    current_run_dir = os.path.join(PIPELINE_DOWNSTREAM_DIR, f"run_{run_timestamp}")
    try:
        os.makedirs(current_run_dir, exist_ok=True)
        print(f"\n✅ Created run output directory: {current_run_dir}")
    except Exception as run_dir_e:
        print(f"❌ ERROR: Could not create run directory '{current_run_dir}': {run_dir_e}")
        raise SystemExit("Halting execution due to output directory creation failure.") from run_dir_e


    # Get the structured prompts generated in the previous cell
    structured_prompts = pipeline_data.get("processing_context", {}).get("generated_image_prompts", [])
    image_generation_results = [] # List to store results (url or error)
    final_assembled_prompts = [] # List to store the assembled prompts used

    # Get reference image info from the main JSON
    user_inputs = pipeline_data.get("user_inputs", {}) # Get user inputs dict
    reference_image_info = user_inputs.get("image_reference")
    base_reference_image_path = None
    if reference_image_info and IMAGE_INPUT_DIR: # Use IMAGE_INPUT_DIR for source
        saved_image_filename = reference_image_info.get("saved_image_path")
        if saved_image_filename:
            # Construct path assuming image is in the UPSTREAM output dir
            base_reference_image_path = os.path.join(IMAGE_INPUT_DIR, saved_image_filename)
            if not os.path.exists(base_reference_image_path):
                 print(f"⚠️ Warning: Base reference image file not found at calculated path: {base_reference_image_path}")
                 base_reference_image_path = None # Reset if not found
            else:
                 print(f"✅ Found reference image: {base_reference_image_path}")

    if not structured_prompts:
        print("⚠️ No structured prompts found in pipeline_data to generate images from.")
    else:
        print(f"\nFound {len(structured_prompts)} structured prompts. Assembling final prompts and generating/editing images...")

        # Iterate through structured prompts
        for structured_prompt_dict in structured_prompts:
            strategy_index = structured_prompt_dict.get("source_strategy_index", "N/A")
            print(f"\n--- Processing Strategy {strategy_index} ---")

            # Pass user_inputs to assemble_final_prompt
            final_prompt_str = assemble_final_prompt(structured_prompt_dict, user_inputs)
            final_assembled_prompts.append({"index": strategy_index, "prompt": final_prompt_str}) # Store assembled prompt

            if final_prompt_str.startswith("Error:"):
                print(f"   Skipping image operation due to prompt assembly error: {final_prompt_str}")
                image_generation_results.append({"index": strategy_index, "status": "error", "result": final_prompt_str, "saved_path": None})
                continue

            # Step 4: Generate or Edit the image
            aspect_ratio = structured_prompt_dict.get("aspect_ratio", "1:1")
            # Pass reference image path and RUN DIRECTORY to generate_image
            status, result_data = generate_image(
                final_prompt=final_prompt_str,
                aspect_ratio=aspect_ratio,
                client=image_client, # Use the separate image client
                run_directory=current_run_dir, # Pass the specific run directory
                strategy_index=strategy_index, # Pass index for filename
                reference_image_path=base_reference_image_path # Pass the path
            )

            # ** MODIFIED: Handle URL download and saving **
            saved_image_path = None # Initialize path for this iteration
            if status == "success":
                if result_data.startswith("http"): # It's a URL from generation
                    try:
                        print(f"   Downloading generated image from URL...")
                        image_response = requests.get(result_data, stream=True)
                        image_response.raise_for_status() # Raise an exception for bad status codes
                        # Create filename
                        img_filename = f"generated_image_strategy_{strategy_index}_{run_timestamp}.png" # Or use original extension if detectable
                        saved_image_path = os.path.join(current_run_dir, img_filename)
                        with open(saved_image_path, "wb") as f:
                            for chunk in image_response.iter_content(chunk_size=8192):
                                f.write(chunk)
                        print(f"   Saved generated image to: {saved_image_path}")
                        result_data = saved_image_path # Update result to be the local path
                    except requests.exceptions.RequestException as req_err:
                         print(f"❌ Error downloading image URL {result_data}: {req_err}")
                         status = "error"
                         result_data = f"Download error: {req_err}"
                    except IOError as io_err:
                         print(f"❌ Error saving downloaded image to {saved_image_path}: {io_err}")
                         status = "error"
                         result_data = f"File save error: {io_err}"
                    except Exception as download_err:
                         print(f"❌ Unexpected error during image download/save: {download_err}")
                         status = "error"
                         result_data = f"Unexpected download/save error: {download_err}"
                elif os.path.exists(result_data): # It's already a local path (from edit)
                    saved_image_path = result_data # Already saved in generate_image
                else: # Should not happen if status is success, but handle defensively
                    print(f"⚠️ Success status but invalid result data: {result_data}")
                    status = "error"
                    result_data = "Invalid success result data"

            image_generation_results.append({"index": strategy_index, "status": status, "result_path": saved_image_path, "original_result": result_data}) # Store path

        print("\n--- Image Generation/Editing/Saving Complete ---")

        # Store results back into the main data structure (now with local paths)
        pipeline_data["processing_context"]["final_assembled_prompts"] = final_assembled_prompts
        pipeline_data["processing_context"]["generated_image_results"] = image_generation_results
        print(f"✅ Stored assembled prompts and image generation results (with paths) in pipeline_data.")

        # --- Display Results ---
        print("\n--- Generated Image Results ---")
        num_success = 0
        if image_generation_results:
            for result_info in image_generation_results:
                idx = result_info.get("index", "N/A")
                status = result_info.get("status")
                saved_path = result_info.get("result_path")
                original_result = result_info.get("original_result")
                print(f"\nStrategy {idx}:")
                if status == "success" and saved_path and os.path.exists(saved_path):
                    num_success += 1
                    print(f"  Status: Success")
                    print(f"  Saved Path: {saved_path}")
                    # Display the image using IPython.display from the saved path
                    try:
                        display(IPImage(filename=saved_path, width=256)) # Display smaller image
                    except Exception as display_e:
                        print(f"  Error displaying image from path {saved_path}: {display_e}")
                elif status == "success": # Success reported but path invalid
                     print(f"  Status: Error (Post-processing)")
                     print(f"  Message: Success reported but saved path is invalid: {saved_path}")
                     print(f"  Original Result Data: {original_result}")
                else: # Status was 'error'
                    print(f"  Status: Error")
                    print(f"  Message: {original_result}") # Show original error message
            print(f"\nSuccessfully generated and saved {num_success} out of {len(image_generation_results)} images.")
        else:
            print("No image generation attempts were made.")
        # --- End Display ---


        # ** MODIFIED: Save the final metadata JSON inside the run directory **
        save_output = True # Set to False to disable saving
        if save_output and current_run_dir: # Check if run dir was created
            try:
                # Save the metadata JSON within the run-specific directory
                metadata_filename = f"run_metadata_{run_timestamp}.json"
                output_path = os.path.join(current_run_dir, metadata_filename)
                with open(output_path, 'w') as f:
                    json.dump(pipeline_data, f, indent=2)
                print(f"\n✅ Successfully saved final pipeline metadata to: {output_path}")
            except Exception as save_e:
                print(f"\n❌ Error saving final metadata JSON file: {save_e}")


elif not pipeline_data:
    print("⚠️ Cannot proceed: Pipeline data not loaded from the specified file.")
elif not image_client:
    print("⚠️ Cannot proceed: Image generation client (OpenAI client) is not configured.")
elif not PIPELINE_DOWNSTREAM_DIR:
     print("⚠️ Cannot proceed: Downstream output directory path is not valid.")
else:
    print("⚠️ Cannot proceed: Required functions ('assemble_final_prompt', 'generate_image') not defined.")



Output Path of Stratefy with Prompts exists in D:\Self-Project\LLM\agent_ads\pipeline_donwstream_outputs\run_20250506_220147\run_metadata_20250506_220147.json
--- Loading pipeline data with generated prompts from: D:\Self-Project\LLM\agent_ads\pipeline_donwstream_outputs\run_20250506_220147\run_metadata_20250506_220147.json ---
✅ Successfully loaded data.

✅ Created run output directory: D:\Self-Project\LLM\agent_ads\pipeline_donwstream_outputs\run_20250506_220227

Found 5 structured prompts. Assembling final prompts and generating/editing images...

--- Processing Strategy 0 ---
--- Calling Image Generation API (gpt-image-1) ---
❌ Image API response format mismatch for generation.

--- Processing Strategy 1 ---
--- Calling Image Generation API (gpt-image-1) ---
❌ Image API response format mismatch for generation.

--- Processing Strategy 2 ---
--- Calling Image Generation API (gpt-image-1) ---
❌ Image API response format mismatch for generation.

--- Processing Strategy 3 ---
--- Call