In [1]:
import google.generativeai as genai
# import requests # No longer needed for Ollama
import json
import os
import base64
from docx import Document
# from docx.shared import Inches
from PIL import Image, UnidentifiedImageError
import io
import re
# from dotenv import load_dotenv # Optional

# --- Configuration ---
# load_dotenv() # Optional
# OLLAMA specific config removed
# GOOGLE_API_KEY is fetched and checked in main execution block

# --- Load AEM Block Definitions ---
available_blocks = ["hero", "columns", "cards", "fragment", "header", "footer"] # Default fallback
try:
    codebase_json_path = 'codebase_representation.json' # Assumes file is in the same directory
    with open(codebase_json_path, 'r') as f:
        aem_blocks_data = json.load(f)
    loaded_blocks = list(aem_blocks_data.get('blocks', {}).keys())
    if loaded_blocks:
        available_blocks = loaded_blocks
        print(f"Successfully loaded AEM block data from {codebase_json_path}. Available blocks: {', '.join(available_blocks)}")
    else:
         print(f"Warning: No blocks found in {codebase_json_path}. Using default fallback: {', '.join(available_blocks)}")

except FileNotFoundError:
    print(f"Warning: Could not find {codebase_json_path}. Using default fallback block list: {', '.join(available_blocks)}")
except json.JSONDecodeError:
    print(f"Warning: Could not parse {codebase_json_path}. Check JSON validity. Using default fallback block list: {', '.join(available_blocks)}")
except Exception as e:
    print(f"An unexpected error occurred loading block data: {e}. Using default fallback block list: {', '.join(available_blocks)}")

# --- Helper Functions ---

def analyze_image_with_gemini(image_path, api_key):
    """Analyzes the image using a Gemini multimodal model."""
    print(f"Analyzing image: {image_path} using Gemini Vision...")

    # --- Configure Gemini API ---
    # It's good practice to configure within the function or ensure it's done globally
    try:
        genai.configure(api_key=api_key)
    except Exception as e:
        print(f"Error configuring Gemini API for vision: {e}")
        return f"Error: Could not configure Gemini API ({e})."

    # --- Prepare Image Data ---
    try:
        img = Image.open(image_path)
        # You might consider basic validation here (size, format) if needed
    except FileNotFoundError:
        print(f"Error: Image file not found at {image_path}")
        return "Error: Image file not found."
    except UnidentifiedImageError:
         print(f"Error: Cannot identify image file. Is {image_path} a valid image?")
         return "Error: Invalid image file."
    except Exception as e:
        print(f"Error opening image {image_path}: {e}")
        return f"Error: Could not open image ({e})."

    # --- Prepare Prompt ---
    prompt_parts = [
        "Analyze this webpage design cue image.",
        "Describe the layout and content in detail.",
        "Identify distinct visual sections and their components (like headers, text paragraphs, images, buttons, cards, columns).",
        "Be specific about the arrangement and appearance.",
        "\n", # Separator
        img, # The image object itself
    ]

    try:
        # --- Generate Content ---
        # Use a model that supports multimodal input (image + text)
        # gemini-1.5-pro-latest is recommended as of late 2024/early 2025
        model = genai.GenerativeModel('gemini-1.5-pro-latest')

        # Standard generation config and safety settings
        generation_config = genai.types.GenerationConfig(temperature=0.4) # Slightly lower temp for description
        safety_settings = [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ]

        response = model.generate_content(
            prompt_parts,
            generation_config=generation_config,
            safety_settings=safety_settings
            )

        # --- Process Response ---
        if not response.candidates:
             print("Error: Gemini vision response has no candidates. Content might be blocked.")
             if hasattr(response, 'prompt_feedback'):
                 print(f"Prompt Feedback: {response.prompt_feedback}")
             return "Error: Could not get description from Gemini Vision (No candidates)."

        if hasattr(response.candidates[0].content, 'parts') and response.candidates[0].content.parts:
             description = response.candidates[0].content.parts[0].text.strip()
             print("Gemini vision analysis successful.")
             return description
        else:
             print("Error: Gemini vision response candidate does not contain expected text part.")
             print(f"Finish Reason: {response.candidates[0].finish_reason}")
             if hasattr(response.candidates[0], 'safety_ratings'):
                     print(f"Safety Ratings: {response.candidates[0].safety_ratings}")
             return "Error: Could not get description from Gemini Vision (Candidate missing text)."

    except Exception as e:
        print(f"Error during Gemini vision analysis: {e}")
        return f"Error: An unexpected error occurred with Gemini Vision ({e})."


def generate_word_structure_with_gemini(image_description, blocks_list, api_key):
    """Uses Gemini (text-only) to generate the AEM Word document structure from a description."""
    print("Generating Word structure with Gemini (Text)...")

    # --- Configure Gemini API (can be redundant but safe) ---
    try:
        genai.configure(api_key=api_key)
    except Exception as e:
        print(f"Error configuring Gemini API for text generation: {e}")
        return f"Error: Could not configure Gemini API ({e})."

    # --- Prepare Prompt ---
    # Prompt remains the same as before, using the description generated by the vision call
    # References AEM conventions and example doc structure [cite: 2, 3, 4]
    prompt = f"""
    Analyze the following description of a webpage design cue and generate the corresponding content structure for an Adobe Experience Manager (AEM) Word document.

    **Webpage Design Description:**
    {image_description}

    **AEM Authoring Conventions:**
    * Use standard paragraphs for regular text content.
    * Represent AEM blocks using Word tables.
    * A block table's first row, first cell MUST contain the block name (case-insensitive, choose from the available list below). Other cells in the first row are usually empty.
    * Subsequent rows in the table contain the block's content (text, image placeholders, list items, etc.), distributed across columns as appropriate for the block type.
    * Use placeholder text like "[Placeholder Image: describe image if possible]" or "[Placeholder Text: describe content type]" where specific text/images aren't detailed in the description.
    * Separate distinct blocks or major content sections with a line containing only '---' (this represents a horizontal rule in Word).
    * Available AEM block names for use: {', '.join(blocks_list)}

    **Task:**
    Generate the sequence of paragraphs, AEM block tables, and separators ('---') that accurately represent the described design in the AEM Word document format. Output ONLY this structure. Ensure table formats are clear.

    **Output Format for Tables (Example based on user files):**
    ```text
    ---
    Table Start: cards
    Row: | cards | | |
    Row: | [Placeholder Image: Card 1 image] | Card 1 Title | Card 1 Body Text [Placeholder Text] |
    Row: | [Placeholder Image: Card 2 image] | Card 2 Title | Card 2 Body Text [Placeholder Text] |
    Table End
    ---
    This is a regular paragraph following the block.
    ---
    Table Start: columns
    Row: | columns | |
    Row: | Left column content [Placeholder Text] | [Placeholder Image: Right column image] |
    Table End
    ---
    ```

    **Begin Output:**
    """

    try:
        # --- Generate Content ---
        # Using a text-focused model here is fine, but 1.5 Pro also works well
        model = genai.GenerativeModel('gemini-1.5-pro-latest')
        generation_config = genai.types.GenerationConfig(temperature=0.7) # Slightly higher temp for generation
        safety_settings = [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ]

        response = model.generate_content(
            prompt,
            generation_config=generation_config,
            safety_settings=safety_settings
            )

        # --- Robust Response Cleaning ---
        if not response.candidates:
             print("Error: Gemini text response has no candidates. Content might be blocked.")
             if hasattr(response, 'prompt_feedback'):
                 print(f"Prompt Feedback: {response.prompt_feedback}")
             return "Error: Could not generate Word structure from Gemini (No candidates)."

        if hasattr(response.candidates[0].content, 'parts') and response.candidates[0].content.parts:
             cleaned_response = response.candidates[0].content.parts[0].text.strip()
             cleaned_response = re.sub(r'^\s*```[a-zA-Z]*\n?', '', cleaned_response)
             cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
             cleaned_response = re.sub(r'^\s*Begin Output:\s*\n', '', cleaned_response, flags=re.IGNORECASE)
             print("Gemini Word structure generation successful.")
             return cleaned_response.strip()
        else:
             print("Error: Gemini text response candidate does not contain expected text part.")
             print(f"Finish Reason: {response.candidates[0].finish_reason}")
             if hasattr(response.candidates[0], 'safety_ratings'):
                     print(f"Safety Ratings: {response.candidates[0].safety_ratings}")
             return "Error: Could not generate Word structure from Gemini (Candidate missing text)."

    except Exception as e:
        print(f"Error during Gemini text generation: {e}")
        return f"Error: An unexpected error occurred with Gemini text generation ({e})."

# --- create_word_doc and add_table_to_doc functions remain unchanged ---
# (Assuming they worked correctly based on the structured text output)

def create_word_doc(structure, output_filename="authored_page.docx"):
    """Creates a Word document from the Gemini-generated structure."""
    print(f"Creating Word document: {output_filename}")
    doc = Document()

    lines = structure.strip().split('\n')
    in_table = False
    table_rows_data = [] # Store lists of cell strings for the current table

    for line in lines:
        line = line.strip()
        if not line: # Skip empty lines
            continue

        # --- Handle Separators ---
        if line == '---':
            if in_table:
                print("Warning: Found '---' separator while inside a table definition. Finalizing table before separator.")
                if table_rows_data:
                    try:
                       add_table_to_doc(doc, table_rows_data)
                    except Exception as e:
                        print(f"  Error adding table to doc before separator: {e}")
                else:
                    print("  Table definition ended by separator, but no rows were parsed.")
                in_table = False
                table_rows_data = []
            doc.add_paragraph('---')
            continue

        # --- Handle Table Start ---
        match_start = re.match(r'Table Start:\s*(.*)', line, re.IGNORECASE)
        if match_start:
            if in_table:
                print("Warning: Found 'Table Start' while already inside a table. Finalizing previous table.")
                if table_rows_data:
                     try:
                        add_table_to_doc(doc, table_rows_data)
                     except Exception as e:
                           print(f"  Error adding previous table to doc: {e}")
                else:
                     print("  Previous table definition ended by new table start, but no rows parsed.")
            # Start new table
            block_name = match_start.group(1).strip()
            if not block_name:
                 print("Warning: Detected 'Table Start:' but couldn't parse block name. Skipping.")
                 in_table = False
            else:
                 print(f"Starting table for block: {block_name}")
                 in_table = True
                 table_rows_data = []
            continue

        # --- Handle Table End ---
        if re.match(r'Table End', line, re.IGNORECASE):
            if in_table:
                print("Ending table.")
                if table_rows_data:
                    try:
                        add_table_to_doc(doc, table_rows_data)
                    except Exception as e:
                        print(f"  Error adding table to doc at Table End: {e}")
                else:
                     print("  Table definition ended, but no rows were parsed.")
                in_table = False
                table_rows_data = []
            else:
                print("Warning: Found 'Table End' but wasn't processing a table.")
            continue

        # --- Handle Table Rows (if inside a table) ---
        if in_table:
            match_row = re.match(r'Row:\s*\|(.*)\|', line)
            if match_row:
                cells_str = match_row.group(1)
                cells = [cell.strip() for cell in cells_str.split('|')]
                table_rows_data.append(cells)
            else:
                print(f"  Warning: Line '{line}' inside table doesn't match 'Row: |...|' format. Treating as single cell row.")
                table_rows_data.append([line])
            continue

        # --- Handle Regular Paragraphs ---
        if not in_table:
            if "[Placeholder Image:" in line:
                 run = doc.add_paragraph().add_run(line)
                 # run.italic = True
            else:
                 doc.add_paragraph(line)

    # --- Final Check: End of loop ---
    if in_table:
        print("Warning: Reached end of input while still processing a table. Attempting to finalize last table.")
        if table_rows_data:
            try:
                add_table_to_doc(doc, table_rows_data)
            except Exception as e:
                print(f"  Error adding final table to doc: {e}")
        else:
            print("  Final table definition had no rows parsed.")

    # --- Save Document ---
    try:
        doc.save(output_filename)
        print(f"Document saved successfully: {output_filename}")
    except PermissionError:
         print(f"Error: Permission denied when trying to save '{output_filename}'. Is the file open or directory write-protected?")
    except Exception as e:
        print(f"Error saving Word document '{output_filename}': {e}")

def add_table_to_doc(doc, table_data):
    """Helper to add a table with data to the docx Document object."""
    if not table_data:
        print("  Attempted to add table with no data.")
        return

    max_cols = 0
    for row in table_data:
        max_cols = max(max_cols, len(row))

    if max_cols == 0:
        print("  Table data found, but no columns detected.")
        return

    table = doc.add_table(rows=1, cols=max(1, max_cols))
    table.style = 'Table Grid'

    hdr_cells = table.rows[0].cells
    first_row_data = table_data[0]
    for i, cell_text in enumerate(first_row_data):
         if i < max_cols:
              hdr_cells[i].text = str(cell_text)
    for i in range(len(first_row_data), max_cols):
        hdr_cells[i].text = ''

    for row_data in table_data[1:]:
        row_cells = table.add_row().cells
        for i, cell_text in enumerate(row_data):
            if i < max_cols:
                row_cells[i].text = str(cell_text)
            else:
                 print(f"  Warning: Row data '{row_data}' exceeded max columns ({max_cols}). Truncating.")
                 break
        for i in range(len(row_data), max_cols):
            row_cells[i].text = ''
    print(f"  Successfully added table with {len(table_data)} rows and {max_cols} columns.")


# --- Main Execution ---
if __name__ == "__main__":
    # --- Get API Key ---
    # api_key = os.getenv("GOOGLE_API_KEY")
    api_key = "AIzaSyDDhFgguOdVkjAvQ_ldmfTQcdOGs0Hebnk"
    if not api_key:
        print("\nERROR: GOOGLE_API_KEY environment variable not set.")
        print("Please set the GOOGLE_API_KEY environment variable before running.")
        exit(1)
    else:
        print("Google AI API Key found.")

    # --- Get Image Path ---
    # image_file_path = input("Enter the path to the cue image file: ")
    image_file_path = "/Users/deepankar/FranklinAutomation/download.png"

    if not os.path.exists(image_file_path):
        print(f"\nError: File not found at '{image_file_path}'")
    elif not os.path.isfile(image_file_path):
         print(f"\nError: Path '{image_file_path}' is not a file.")
    else:
        # 1. Analyze image with Gemini Vision
        print("\n--- Step 1: Analyzing Image with Gemini Vision ---")
        description = analyze_image_with_gemini(image_file_path, api_key) # Changed function call
        print("\n--- Gemini Vision Image Description ---")
        print(description)
        print("-------------------------------------\n")

        if description.startswith("Error:"):
             print("Exiting due to error during Gemini Vision analysis.")
        else:
            # 2. Generate Word structure with Gemini Text
            print("--- Step 2: Generating Word Structure with Gemini ---")
            word_content_structure = generate_word_structure_with_gemini(description, available_blocks, api_key)
            print("\n--- Gemini Generated Structure ---")
            print(word_content_structure)
            print("------------------------------\n")

            if word_content_structure.startswith("Error:"):
                print("Exiting due to error during Gemini Word structure generation.")
            else:
                # 3. Create the Word document
                print("--- Step 3: Creating Word Document ---")
                base_name = os.path.splitext(os.path.basename(image_file_path))[0]
                output_doc_name = f"authored_{base_name}.docx"
                create_word_doc(word_content_structure, output_filename=output_doc_name)

    print("\nScript finished.")

  from .autonotebook import tqdm as notebook_tqdm


Successfully loaded AEM block data from codebase_representation.json. Available blocks: footer, cards, fragment, columns, hero, header
Google AI API Key found.

--- Step 1: Analyzing Image with Gemini Vision ---
Analyzing image: /Users/deepankar/FranklinAutomation/download.png using Gemini Vision...
Gemini vision analysis successful.

--- Gemini Vision Image Description ---
The webpage follows a predominantly single-column layout with several distinct sections:

**1. Header:**
* A simple header bar at the top contains a "hamburger" menu icon (three horizontal lines) on the left, the text "Boilerplate" in a plain font, and a magnifying glass icon (search) on the right.

**2. Hero Section:**
* This section features a large, dark blue background with a stylized, glowing teal DNA double helix graphic.
* Overlaid on the image is the congratulatory headline "Congrats, you are ready to go!" in a bold, white sans-serif font.

**3. Introductory Text:**
* Below the hero section is a block of exp