In [8]:
import os 
from google import genai
from google.genai import types
from google.genai.types import Part
from openai import OpenAI
import base64
from io import BytesIO
from PIL import Image as PILImage 
from dotenv import load_dotenv

## Configuration
- Loads API keys from .env
- Sets OpenAI and Gemini clients
- Defines which model to use first (primary_service) and which to use it it fails(fallback_service).
- Keeps output file name and model names as variables for flexibility. 

In [9]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
gemini_api_key = os.getenv("GEMINI_API_KEY")

#Export to environment
os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["GEMINI_API_KEY"] = gemini_api_key

In [10]:
#Initialize clients
openai_client = OpenAI(api_key=openai_api_key)
gemini_client = genai.Client(api_key=gemini_api_key)

In [22]:
#Configuration
primary_service = "openai"  #  or "imagen"
fallback_service = "imagen" #  or "openai"
output_file = "comic_image.png"
openai_model = "gpt-image-1"
imagen_model = "imagen-4.0-generate-001"  #"imagen-4.0-generate-preview-06-06"  
gemini_model = "gemini-2.0-flash" 

## Video Analysis and Prompt Enhancement
- Takes a video URL and user's input description.
- Sends both to Gemini, asking it to enhance and rewrite the input as a single structured prompt for a comic strip .
- Returns the final enhanced prompt. 

In [28]:
def extract_comic_prompt_and_enhance(video_url, user_input):
    """Extract and enhance comic prompt using Gemini."""
    prompt = f"""
    You are an expert comic strip prompt creator. Your task is to generate a single, detailed ready-to-use prompt for an image generation AI. 
    You will use the user-provided panel descriptions and a video to create a complete prompt.

    ### User Inputs:
    {user_input}
    
    ### Your Task & Instructions:

    1. **Analyze and Enhance:** Watch the video at the provided URL: {video_url}. Based on the video's context, add relevant details to each of the four panel descriptions. Enhance the descriptions to make them more vivid and engaging.

    2. **Generate a Single Prompt:** Your entire output must be one single, cohesive prompt for the image generator.

    3. **Specify Format, Style, and Composition in the Final Prompt:** The final prompt you generate must explicitly instruct the image AI to create a comic strip with the following specifications:
       - **Layout:** A **4-panel** layout, arranged in a **2x2 grid**. Each panel must be **clearly framed and equally spaced**.
       - **Art Style:** A distinct **comic book art style only**. Strictly instruct for **no photorealism**.
       - **Text Elements:** Appropriate **dialogue in speech bubbles** and descriptive **caption text** for each panel.
       - **Compositional Integrity:** Ensure that **no part of the characters, speech bubbles, or dialogue is cropped or cut off** by the panel borders. All text inside speech bubbles must be **fully visible and legible**.

    4. **Set the Tone:** The comic must be **humorous**. Instruct the image AI to capture **exaggerated timing and over-the-top reactions**, in the style of classic internet memes.  

    5. **Content Moderation:**
       * You must strictly filter the final prompt to **remove any harmful or inappropriate content**.
       * **Handle Copyright-adjacent Material:**
            * If the user's input mentions a movie title, you **must remove the movie title** from your generated prompt. 
            * However, if a character name is mentioned, **keep the character name**. Your prompt should instruct the image AI to create a character that *resembles* the mentioned character, but is not an exact replica.
            * Replace any specific brands or logos with generic equivalents (e.g., "a soda can" instead of "Coca-Cola can").
            
    6. **Final Output:** Return only the final, complete prompt for the image AI without any of your own commentary, introductions, or extra text.
    """
    
    # FIXED: Correct way to pass video URL to Gemini
    response = gemini_client.models.generate_content(
        model=gemini_model,
        contents=types.Content(
            parts=[
                types.Part(text=prompt),
                types.Part(
                    file_data=types.FileData(file_uri=video_url)  # FIXED!
                )
            ]
        )
    )
    
    return response.text

## Image Generation Functions
- Uses OpenAI's gpt-image-1 model to generate an image from the prompt.
- Converts base64-encoded response into raw image bytes.
- Uses Google Imagen to generate a single comic image. 
- Returns the image object. 

In [33]:
# OpenAI Image Generation
def generate_with_openai(prompt):
    """Generate image using OpenAI's gpt-image-1 model."""
    print(f"Calling OpenAI with prompt length: {len(prompt)} chars")
    result = openai_client.images.generate(
        model=openai_model,
        prompt=prompt,
        size="1024x1024"  
    )

    # gpt-image-1 always returns b64_json by default
    image_data = result.data[0].b64_json
    image_bytes = base64.b64decode(image_data)
    return image_bytes

# Google Imagen Generation
def generate_with_imagen(prompt):
    """Generate image using Google Imagen."""
    print(f"Calling Imagen with prompt length: {len(prompt)} chars")
    response = gemini_client.models.generate_images(
        model=imagen_model,
        prompt=prompt,
        config=types.GenerateImagesConfig(
            number_of_images=1,
            aspect_ratio="1:1"  # Added aspect ratio
        )
    )
    
    return response.generated_images[0].image 


## Main Generation Pipeline 
- Tries generating an image using primary service (e.g. OpenAI)
- If it fails, switches to fallback service (e.g. Imagen).
- Ensures program doesn't crash if one model is unavailable. 

In [34]:
def generate_image_with_fallback(prompt):
    """Generate image using primary service, fallback to secondary if needed."""
    services = {
        "openai": generate_with_openai,
        "imagen": generate_with_imagen
    }

    # Try primary service
    try:
        if primary_service in services:
            print(f"Attempting primary service: {primary_service}")
            return services[primary_service](prompt)
        
    except Exception as e:  # FIXED: Catch and print the actual error
        print(f"❌ Primary service ({primary_service}) failed with error:")
        print(f"   Error type: {type(e).__name__}")
        print(f"   Error message: {str(e)}")
        print(f"   Trying fallback service...")

    # Try fallback service
    try:
        if fallback_service in services and fallback_service != primary_service:
            print(f"Attempting fallback service: {fallback_service}")
            return services[fallback_service](prompt)        
    except Exception as e:  # FIXED: Catch and print the actual error
        print(f"❌ Fallback service ({fallback_service}) also failed with error:")
        print(f"   Error type: {type(e).__name__}")
        print(f"   Error message: {str(e)}")
        return None    

def save_image(image_data, filename=None): 
    """Save the generated image to a file."""
    output_path = filename or output_file

    # Handle both bytes and image objects
    if isinstance(image_data, bytes):
        img = PILImage.open(BytesIO(image_data))
    else:   
        img = image_data

    img.save(output_path)
    print(f"✅ Comic saved as '{output_path}'")
    return output_path 

## Complete Comic Generation 

In [35]:
def generate_comic(video_url, user_input):
    """Complete comic generation pipeline."""

    # Step 1: Analyse video and enhance prompt
    print("Analyzing video and enhancing prompt...")
    try:
        enhanced_prompt = extract_comic_prompt_and_enhance(video_url, user_input)
        print("✅ Prompt enhancement complete.")
        print(f"\n📝 Enhanced Prompt:\n{enhanced_prompt}\n")
    except Exception as e:
        print(f"❌ Failed to analyze video:")
        print(f"   Error type: {type(e).__name__}")
        print(f"   Error message: {str(e)}")
        return None

    # Step 2: Generate image 
    print("Generating image...")
    image_data = generate_image_with_fallback(enhanced_prompt)
    if image_data is None:
        print("❌ Failed to generate image.")
        return None

    # Step 3: Save the generated image
    print("Saving the generated comic image...")
    output_path = save_image(image_data)
    return output_path


### Usage Example

In [36]:
# Example usage
video_url = "https://www.youtube.com/shorts/_q_NIQwwClc"
user_input = """  
Make the Girl a princess and the boy a knight.
"""

# Generate the comic
output_path = generate_comic(video_url, user_input)

if output_path:
    print(f"Comic generated and saved to: {output_path}")

    # Display the generated comic
    img = PILImage.open(output_path)
    img.show()  # Opens the image in default image viewer

else:
    print("Comic generation failed.")    

Analyzing video and enhancing prompt...
✅ Prompt enhancement complete.

📝 Enhanced Prompt:
A four-panel comic strip in a vibrant comic book art style, not photorealistic, arranged in a 2x2 grid with clearly framed panels and equal spacing. Exaggerated timing and over-the-top reactions in the style of classic internet memes.

Panel 1: Caption: "Before the Quest." A beautiful, joyful princess with long flowing hair, resembling no one in particular, stands in a sunlit garden, her flowing gown sparkling with magical light. She holds a bouquet of flowers and smiles radiantly. Dialogue bubble: "Oh, what a lovely day for a stroll!"

Panel 2: Caption: "The Call to Adventure." The knight, in shining but slightly dented armor, resembling no one in particular, bursts through the garden gate, his face panicked. Dialogue bubble: "Princess! The kingdom is in peril! We must leave at once!"

Panel 3: Caption: "The Transformation." The princess, now with a determined look in her eyes, swaps her beautif

### Interactive Usage

In [None]:
# Interactive input version
video_url = input("Enter the video URL: ").strip()
user_input = input("Enter your comic description: ").strip()

if video_url and user_input:
    output_path = generate_comic(video_url, user_input)

    if output_path:
        print(f"Comic generated and saved to: {output_path}")

        # Display the generated comic
        img = PILImage.open(output_path)
        img.show()  # Opens the image in default image viewer

    else:
        print("Comic generation failed. Please provide both video URL and description.")