<a href="https://colab.research.google.com/github/KaifAhmad1/deepfake/blob/main/Product_Marketing_lmage_Generation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Product Marketing AI System


## Overview
This system helps create high-quality marketing images automatically. It takes in photos and optional audio or video, then processes, refines, and enhances them to produce beautiful marketing visuals for many different industries.

## Key Features

- **Easy Input:** Upload main and supplementary images, plus optional multimedia for extra context.
- **Smart Processing:** The system automatically cuts out key parts, improves image details, and boosts overall clarity.
- **Creative Prompts:** Custom prompts are generated to guide the image creation process, making it tailored to your needs.
- **Fast Generation:** Uses multiple AI models working together to generate and improve images quickly.
- **Quality Check:** Compares final images to the originals and provides simple quality feedback.
- **Simple Reports:** Automatically produces a brief report with the final prompt and quality scores.

## Benefits
- Saves time by automating the creation of professional marketing images.
- Provides consistent and attractive visuals optimized for your business.
- Easy to use with straightforward input and clear feedback.

Enjoy a seamless experience in making your marketing visuals stand out!

In [6]:
!pip install -q opencv-python numpy matplotlib pillow torch diffusers scikit-image tenacity langchain-google-genai openai groq accelerate pipecat-ai aiohttp python-dotenv

In [7]:
import os
import cv2
import numpy as np
import base64
import json
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter, ImageEnhance
import io
import sys
import torch
from skimage.metrics import structural_similarity as ssim, peak_signal_noise_ratio as psnr
from tenacity import retry, stop_after_attempt, wait_exponential, RetryError
import time
import warnings
import random
import asyncio
import aiohttp
import traceback
from dotenv import load_dotenv

# Langchain/LLM Imports
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages import HumanMessage
from groq import Groq, GroqError
from openai import OpenAI, OpenAIError

# Pipecat Imports
try:
    from pipecat.services.moondream.vision import MoondreamService
    from pipecat.services.google.image import GoogleImageGenService
    from pipecat.frames.frames import VisionImageRawFrame, TextFrame, URLImageRawFrame, ErrorFrame
    PIPECAT_AVAILABLE = True
    print("Pipecat AI library loaded successfully.")
except ImportError as e:
    print(f"Warning: Pipecat AI library not found or failed to import ({e}). Moondream and Google Imagen (via Pipecat) will be unavailable.")
    PIPECAT_AVAILABLE = False
    MoondreamService, GoogleImageGenService = None, None
    VisionImageRawFrame, TextFrame, URLImageRawFrame, ErrorFrame = None, None, None, None

# --- Global Settings and API Keys ---
warnings.filterwarnings("ignore", category=UserWarning, module='torchvision')
warnings.filterwarnings("ignore", category=FutureWarning)

# Load API keys from .env file if it exists, otherwise use environment variables or defaults
load_dotenv()

# Fetch keys - Prioritize .env, then environment variables, then placeholder
# *** IMPORTANT: REPLACE PLACEHOLDERS OR SET ENVIRONMENT VARIABLES / .env FILE ***
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "YOUR_GROQ_API_KEY_HERE")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "YOUR_GOOGLE_API_KEY_HERE")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY_HERE")

# --- Directories ---
OUTPUT_DIR = "outputs"
UPLOAD_DIR = "uploads"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(UPLOAD_DIR, exist_ok=True)

# --- Constants ---
DEFAULT_FAST_LLM_MODEL = "llama3-8b-8192" # For strategy definition, parsing
DEFAULT_CAPABLE_LLM_MODEL_GROQ = "llama3-70b-8192"
DEFAULT_CAPABLE_LLM_MODEL_GEMINI = "gemini-1.5-pro-latest"
DEFAULT_ANALYZER_MODEL_GEMINI = "gemini-1.5-flash-latest"
DEFAULT_IMAGE_GEN_MODEL_GEMINI = "imagen-3.0-generate-002" # Using Google Imagen via Pipecat preferably
DEFAULT_IMAGE_GEN_MODEL_OPENAI = "dall-e-3"

# --- Environment Detection ---
try:
    from google.colab import files
    ENV = "colab"
    print("Running in Colab environment.")
except ImportError:
    try:
        if 'IPython' in sys.modules and 'google.colab' not in sys.modules:
             # Check if running in Kaggle or similar Jupyter environments
             if os.path.exists("/kaggle/input"):
                  ENV = "kaggle"
                  print("Running in Kaggle environment.")
             else:
                  ENV = "jupyter"
                  print("Running in Jupyter environment.")
        else:
             raise ImportError("Not in Jupyter or Colab")
    except ImportError:
        ENV = "standalone"
        print("Running in standalone environment.")

# --- Device Setup ---
device = "cuda" if torch.cuda.is_available() else "cpu"
try: # Intel XPU
    import intel_extension_for_pytorch as ipex
    if torch.xpu.is_available(): device = "xpu"
except ImportError: pass
if device == "cpu" and hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): device = "mps" # Apple MPS

print(f"Using device: {device}")
if device == "cuda": torch.cuda.empty_cache()

Running in Colab environment.
Using device: cuda


In [8]:
# --- Model Initialization ---
def initialize_models():
    """Initializes all AI models and clients, checking API keys."""
    print("\n--- Initializing AI Models ---")
    model_status = {
        "groq_client": None, "groq_fast_llm": DEFAULT_FAST_LLM_MODEL, "groq_capable_llm": DEFAULT_CAPABLE_LLM_MODEL_GROQ,
        "gemini_analyzer": None, "gemini_capable_llm": None,
        "gemini_direct_image_gen": None, "gemini_direct_image_gen_model": DEFAULT_IMAGE_GEN_MODEL_GEMINI,
        "openai_client": None, "openai_image_gen_model": DEFAULT_IMAGE_GEN_MODEL_OPENAI,
        "moondream_service": None,
        "google_image_gen_service": None,
        "parsing_llm_client": None,
        "strategy_llm_client": None,
        "prompt_llm_client": None,
        "feedback_llm_client": None,
        "available": {"groq": False, "gemini": False, "openai": False, "moondream": False, "google_imagen": False}
    }
    api_key_warnings = []

    # Validate API Keys more strictly
    if not GROQ_API_KEY or "YOUR_" in GROQ_API_KEY or len(GROQ_API_KEY) < 50:
        api_key_warnings.append("Groq API Key missing/invalid.")
    else: model_status["available"]["groq"] = True
    if not GOOGLE_API_KEY or "YOUR_" in GOOGLE_API_KEY or len(GOOGLE_API_KEY) < 30:
        api_key_warnings.append("Google API Key missing/invalid (needed for Gemini & Google Imagen).")
    else: model_status["available"]["gemini"] = True; model_status["available"]["google_imagen"] = True
    if not OPENAI_API_KEY or "YOUR_" in OPENAI_API_KEY or len(OPENAI_API_KEY) < 50:
        api_key_warnings.append("OpenAI API Key missing/invalid.")
    else: model_status["available"]["openai"] = True

    if api_key_warnings:
        print("\n--- ⚠️ API KEY WARNINGS ---")
        for warning in api_key_warnings: print(f"- {warning}")
        print("--- Functionality will be limited. Please set keys in .env file or environment variables. ---\n")

    # Initialize Groq (if key valid)
    if model_status["available"]["groq"]:
        try:
            model_status["groq_client"] = Groq(api_key=GROQ_API_KEY, timeout=120.0)
            print(f"✅ Groq client initialized.")
        except Exception as e:
            print(f"❌ Error initializing Groq: {e}. Disabling Groq.")
            model_status["available"]["groq"] = False; model_status["groq_client"] = None

    # Initialize Google Gemini (if key valid)
    if model_status["available"]["gemini"]:
        try:
            # Analyzer (Flash)
            model_status["gemini_analyzer"] = ChatGoogleGenerativeAI(
                model=DEFAULT_ANALYZER_MODEL_GEMINI, temperature=0.3, max_retries=2, google_api_key=GOOGLE_API_KEY, request_timeout=120)
            # Capable LLM (Pro)
            model_status["gemini_capable_llm"] = ChatGoogleGenerativeAI(
                model=DEFAULT_CAPABLE_LLM_MODEL_GEMINI, temperature=0.6, max_retries=2, google_api_key=GOOGLE_API_KEY, request_timeout=180)
            print(f"✅ Gemini models initialized (Analyzer: {DEFAULT_ANALYZER_MODEL_GEMINI}, Capable: {DEFAULT_CAPABLE_LLM_MODEL_GEMINI}).")
        except Exception as e:
            print(f"❌ Error initializing Gemini models: {e}. Disabling Gemini.")
            model_status["available"]["gemini"] = False
            model_status["gemini_analyzer"], model_status["gemini_capable_llm"], model_status["gemini_direct_image_gen"] = None, None, None

    # Initialize OpenAI (if key valid)
    if model_status["available"]["openai"]:
        try:
            model_status["openai_client"] = OpenAI(api_key=OPENAI_API_KEY, timeout=120.0)
            print(f"✅ OpenAI client initialized (Model: {DEFAULT_IMAGE_GEN_MODEL_OPENAI}).")
        except Exception as e:
            print(f"❌ Error initializing OpenAI: {e}. Disabling OpenAI.")
            model_status["available"]["openai"] = False; model_status["openai_client"] = None

    # Initialize Pipecat Services (if library available)
    if PIPECAT_AVAILABLE:
        # Moondream (Local Vision Analysis)
        try:
            force_cpu = (device == "cpu")
            model_status["moondream_service"] = MoondreamService(use_cpu=force_cpu)
            print(f"✅ Pipecat MoondreamService initialized (will use {device if not force_cpu else 'CPU'}). Model download may occur on first use.")
            model_status["available"]["moondream"] = True
        except Exception as e:
            print(f"❌ Error initializing Pipecat MoondreamService: {e}. Moondream unavailable.")
            model_status["moondream_service"] = None

        # Google Imagen (via Pipecat - Preferred Image Gen if available)
        if model_status["available"]["google_imagen"]:
            try:
                model_status["google_image_gen_service"] = GoogleImageGenService(api_key=GOOGLE_API_KEY)
                print("✅ Pipecat GoogleImageGenService initialized.")
            except Exception as e:
                print(f"❌ Error initializing Pipecat GoogleImageGenService: {e}. Google Imagen (Pipecat) unavailable.")
                model_status["google_image_gen_service"] = None
                model_status["available"]["google_imagen"] = False
        else:
             print("Skipping Pipecat Google Imagen (Google API key invalid or unavailable).")
    else:
        print("Pipecat AI library not installed/imported. Moondream and Google Imagen (Pipecat) skipped.")
        model_status["available"]["moondream"] = False; model_status["available"]["google_imagen"] = False

    # --- Assign LLM Clients for Specific Tasks (Prioritization) ---
    # Priority: Groq (Fastest/Cheapest) -> Gemini Flash -> Gemini Pro

    # Parsing LLM (Needs to be fast)
    if model_status["available"]["groq"]: model_status["parsing_llm_client"] = model_status["groq_client"]
    elif model_status["available"]["gemini"]: model_status["parsing_llm_client"] = model_status["gemini_analyzer"]
    if model_status["parsing_llm_client"]: print(f"Selected Parsing LLM: {'Groq' if isinstance(model_status['parsing_llm_client'], Groq) else 'Gemini Flash'}")
    else: print("⚠️ No parsing LLM available - Moondream analysis will be disabled."); model_status["available"]["moondream"] = False

    # Strategy LLM (Needs to be fast and decent)
    if model_status["available"]["groq"]: model_status["strategy_llm_client"] = model_status["groq_client"]
    elif model_status["available"]["gemini"]: model_status["strategy_llm_client"] = model_status["gemini_analyzer"]
    if model_status["strategy_llm_client"]: print(f"Selected Strategy LLM: {'Groq' if isinstance(model_status['strategy_llm_client'], Groq) else 'Gemini Flash'}")
    else: print("⚠️ No strategy LLM available - Prompting might be less adaptive.")

    # Main Prompt LLM (Needs to be capable)
    if model_status["available"]["gemini"] and model_status["gemini_capable_llm"]: model_status["prompt_llm_client"] = model_status["gemini_capable_llm"]
    elif model_status["available"]["groq"]: model_status["prompt_llm_client"] = model_status["groq_client"]
    if model_status["prompt_llm_client"]: print(f"Selected Main Prompt LLM: {'Gemini Pro' if isinstance(model_status['prompt_llm_client'], ChatGoogleGenerativeAI) and 'pro' in model_status['prompt_llm_client'].model else 'Groq 70b'}")
    else: print("⚠️ CRITICAL: No capable LLM available for main prompt generation.")

    # Feedback LLM (Needs to be capable, preferably vision-aware if possible)
    if model_status["available"]["gemini"] and model_status["gemini_capable_llm"]: model_status["feedback_llm_client"] = model_status["gemini_capable_llm"]
    elif model_status["available"]["groq"]: model_status["feedback_llm_client"] = model_status["groq_client"]
    elif model_status["available"]["gemini"]: model_status["feedback_llm_client"] = model_status["gemini_analyzer"]
    if model_status["feedback_llm_client"]: print(f"Selected Feedback LLM: {'Gemini Pro' if isinstance(model_status['feedback_llm_client'], ChatGoogleGenerativeAI) and 'pro' in model_status['feedback_llm_client'].model else ('Groq 70b' if isinstance(model_status['feedback_llm_client'], Groq) else 'Gemini Flash')}")
    else: print("⚠️ No LLM available for AI feedback/evaluation.")

    print("--- Model Initialization Complete ---")
    # Check overall readiness
    can_analyze = any([model_status["available"]["moondream"], model_status["available"]["gemini"]])
    can_gen_prompt = model_status["prompt_llm_client"] is not None
    can_gen_image = any([model_status["available"]["openai"], model_status["available"]["google_imagen"]])

    if not (can_analyze and can_gen_prompt and can_gen_image):
         print("\n--- ⚠️ WARNING: Core functionality missing! ---")
         if not can_analyze: print("- Cannot analyze images.")
         if not can_gen_prompt: print("- Cannot generate detailed prompts.")
         if not can_gen_image: print("- Cannot generate images.")
         print("--- Please check API keys and model initialization logs. Exiting. ---")
         sys.exit(1)

    return model_status

models = initialize_models()


--- Initializing AI Models ---

- Groq API Key missing/invalid.
- Google API Key missing/invalid (needed for Gemini & Google Imagen).
- OpenAI API Key missing/invalid.
--- Functionality will be limited. Please set keys in .env file or environment variables. ---

Pipecat AI library not installed/imported. Moondream and Google Imagen (Pipecat) skipped.
⚠️ No parsing LLM available - Moondream analysis will be disabled.
⚠️ No strategy LLM available - Prompting might be less adaptive.
⚠️ CRITICAL: No capable LLM available for main prompt generation.
⚠️ No LLM available for AI feedback/evaluation.
--- Model Initialization Complete ---

- Cannot analyze images.
- Cannot generate detailed prompts.
- Cannot generate images.
--- Please check API keys and model initialization logs. Exiting. ---


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
