<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Product_Marketing_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Sample Data for AI Product Marketing System for Image Generation
# Product Marketing AI System

**Overview:**

The Product Marketing AI System automatically creates high-quality marketing visuals by processing, enhancing, and refining input images.

**Key Steps:**
- **Input & Preprocessing:**  
  Users provide a base image (e.g., a background or scene) and a secondary image (e.g., a product or model). The system enhances these images, adjusts resolution and contrast, and generates segmentation masks.

- **Context & Prompt Generation:**  
  It analyzes the images to extract contextual details and automatically generates a detailed, structured marketing prompt tailored to the product and target audience.

- **Image Generation & Refinement:**  
  Advanced AI models use the prompt to generate photorealistic marketing images. The system then refines the results based on user feedback to ensure the output meets professional quality standards.

- **Quality Evaluation:**  
  Final images are evaluated using metrics (such as SSIM, PSNR, and color histograms) to compare them with the original inputs. Additionally, AI-driven feedback helps further improve image quality.

**Applications:**
- **Digital Advertising & E-Commerce:**  
  Generate stunning visuals for online stores, social media campaigns, and ad banners.
- **Branding & Marketing:**  
  Enhance and standardize visual branding materials across various industries such as fashion, tech, automotive, and real estate.
- **Content Creation:**  
  Streamline production of professional digital content for websites, promotions, and digital signage.

This system provides a seamless, automated solution for creating visually appealing marketing assets, reducing manual effort and ensuring consistency and high quality across all outputs.

In [1]:
!pip install serpapi google-search-results



In [3]:
import os
import requests
import http.client
import json
import uuid
import csv
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
from PIL import Image
import numpy as np
import cv2
from serpapi import GoogleSearch

# Directories for images
BASE_DIR = "crawl4ai_images"
BASE_IMAGES_FOLDER = os.path.join(BASE_DIR, "base_images")
SECONDARY_IMAGES_FOLDER = os.path.join(BASE_DIR, "secondary_images")
METADATA_FILE = os.path.join(BASE_DIR, "image_metadata.csv")
os.makedirs(BASE_IMAGES_FOLDER, exist_ok=True)
os.makedirs(SECONDARY_IMAGES_FOLDER, exist_ok=True)

# SerpAPI key (replace with your actual key)
SERPAPI_KEY = "95c2797a69b167639c98ab054e8597d752c6fe6d"

In [4]:
# Industry templates for reference (to guide relevant image sourcing)
INDUSTRY_TEMPLATES = {
    "fashion": {
        "base": "High-end {product} featured in {setting}, {style} aesthetic, {lighting} lighting, emphasizing texture and detail, aspirational lifestyle",
        "modifiers": ["luxury", "trendy", "sophisticated", "vibrant", "elegant"]
    },
    "food": {
        "base": "Appetizing {product} in {setting}, rich colors, {lighting} lighting, showing texture and ingredients, steam and freshness indicators",
        "modifiers": ["delicious", "fresh", "gourmet", "homemade", "artisanal"]
    },
    "tech": {
        "base": "Modern {product} in {setting}, sleek design, {lighting} lighting, highlighting features, minimal and clean composition",
        "modifiers": ["innovative", "futuristic", "powerful", "sleek", "premium"]
    },
    "beauty": {
        "base": "{product} with model in {setting}, glowing skin effect, {lighting} lighting, focused on transformation and results",
        "modifiers": ["radiant", "flawless", "natural", "luxurious", "rejuvenating"]
    },
    "automotive": {
        "base": "Dynamic {product} in {setting}, dramatic angle, {lighting} lighting, highlighting curves and features, sense of motion",
        "modifiers": ["powerful", "luxurious", "rugged", "sleek", "innovative"]
    },
    "real_estate": {
        "base": "Welcoming {product} in {setting}, spacious feeling, {lighting} lighting, showcasing architectural features and lifestyle potential",
        "modifiers": ["spacious", "elegant", "modern", "cozy", "luxurious"]
    }
}

In [5]:
# Initialize metadata CSV
def init_metadata():
    with open(METADATA_FILE, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["image_path", "category", "source_url", "industry", "type"])

# Utility functions
def is_valid(url):
    parsed = urlparse(url)
    return bool(parsed.netloc) and bool(parsed.scheme)

def extract_images(html, base_url):
    soup = BeautifulSoup(html, "html.parser")
    image_urls = []
    for img in soup.find_all("img"):
        img_url = img.get("src")
        if not img_url:
            continue
        img_url = urljoin(base_url, img_url)
        if is_valid(img_url) and any(img_url.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png"]):
            image_urls.append(img_url)
    return image_urls

def download_image(url, folder, category, industry, image_type):
    try:
        response = requests.get(url, stream=True, timeout=10)
        response.raise_for_status()
        # Generate a unique name using uuid and preserve the extension.
        ext = os.path.splitext(url.split('?')[0])[1]
        if not ext:
            ext = ".jpg"
        image_name = f"{uuid.uuid4()}{ext}"
        save_path = os.path.join(folder, image_name)
        with open(save_path, "wb") as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)
        # Quality check - remove low quality images
        try:
            with Image.open(save_path) as img:
                img_array = np.array(img.convert("RGB"))
                gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
                variance = np.var(gray)
                laplacian = cv2.Laplacian(gray, cv2.CV_64F).var()
                if variance < 250 or laplacian < 150 or img.size[0] < 400 or img.size[1] < 400:
                    os.remove(save_path)
                    print(f"Removed {save_path}: Low quality (variance={variance}, Laplacian={laplacian}, size={img.size})")
                    return None
                # Log metadata
                with open(METADATA_FILE, "a", newline="") as f:
                    writer = csv.writer(f)
                    writer.writerow([save_path, category, url, industry, image_type])
                return save_path
        except Exception as e:
            os.remove(save_path)
            print(f"Removed {save_path}: Invalid image ({e})")
            return None
    except Exception as e:
        print(f"Error downloading {url}: {e}")
        return None

def fetch_page(url):
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
        resp = requests.get(url, headers=headers, timeout=10)
        resp.raise_for_status()
        return resp.text
    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return None

def google_image_search(query, limit=10):
    try:
        params = {
            "q": query,
            "tbm": "isch",
            "ijn": "0",
            "api_key": SERPAPI_KEY
        }
        search = GoogleSearch(params)
        results = search.get_dict()
        image_urls = [img["original"] for img in results.get("images_results", []) if "original" in img]
        return image_urls[:limit]
    except Exception as e:
        print(f"Error fetching Google Images for {query}: {e}")
        return []

def crawl_and_download(url, folder, limit=5, category="unknown", industry="unknown", image_type="unknown"):
    print(f"\nCrawling: {url}")
    content = fetch_page(url)
    if not content:
        print(f"Skipping {url} due to fetch error.")
        return []
    image_urls = extract_images(content, url)
    downloaded_paths = []
    for img_url in image_urls:
        if len(downloaded_paths) >= limit:
            break
        path = download_image(img_url, folder, category, industry, image_type)
        if path:
            downloaded_paths.append(path)
    return downloaded_paths

In [6]:
def main():
    # Initialize metadata file
    init_metadata()

    # Source URLs for base images (settings for all industries, including human-centric)
    base_sources = [
        ("google", "nature landscape photography high quality", "nature", "fashion"),
        ("google", "modern office interior high resolution", "office", "tech"),
        ("google", "urban cityscape photography", "urban", "automotive"),
        ("google", "people in nature photography high quality", "human_nature", "beauty"),
        ("google", "people in professional setting photography", "human_office", "tech"),
        ("flickr", "https://www.flickr.com/search/?text=nature+landscape", "nature", "real_estate"),
        ("flickr", "https://www.flickr.com/search/?text=people+in+urban+setting", "human_urban", "fashion"),
        ("adobe", "https://stock.adobe.com/search?k=modern+kitchen+background", "kitchen", "food"),
        ("adobe", "https://stock.adobe.com/search?k=luxury+home+interior", "home_interior", "real_estate")
    ]
    # Source URLs for secondary images (products, models, vehicles, food, accessories)
    secondary_sources = [
        ("google", "tech product photography high quality", "tech_product", "tech"),
        ("google", "fashion apparel photography studio", "fashion_apparel", "fashion"),
        ("google", "male model fashion photography", "male_model", "beauty"),
        ("google", "female model fashion photography", "female_model", "beauty"),
        ("google", "luxury watch photography high quality", "watch", "fashion"),
        ("google", "high-end headphones photography", "headphones", "tech"),
        ("google", "luxury purse photography", "purse", "fashion"),
        ("google", "designer sunglasses photography", "sunglasses", "fashion"),
        ("google", "designer shoes photography", "shoes", "fashion"),
        ("flickr", "https://www.flickr.com/search/?text=luxury+car", "vehicle", "automotive"),
        ("flickr", "https://www.flickr.com/search/?text=gourmet+food", "food", "food"),
        ("adobe", "https://stock.adobe.com/search?k=beauty+product", "beauty_product", "beauty"),
        ("adobe", "https://stock.adobe.com/search?k=luxury+accessories", "accessories", "fashion")
    ]

    # Download 30 base images
    base_image_paths = []
    for source_type, source, category, industry in base_sources:
        limit = 4 if "human" in category else 3  # Balance human-centric with other settings
        if source_type == "google":
            image_urls = google_image_search(source, limit=limit)
            for url in image_urls:
                if len(base_image_paths) >= 30:
                    break
                path = download_image(url, BASE_IMAGES_FOLDER, category, industry, "base")
                if path:
                    base_image_paths.append(path)
        else:
            paths = crawl_and_download(
                source, BASE_IMAGES_FOLDER, limit=limit, category=category, industry=industry, image_type="base"
            )
            base_image_paths.extend(paths)
            if len(base_image_paths) >= 30:
                base_image_paths = base_image_paths[:30]
                break

    # Download 30 secondary images
    secondary_image_paths = []
    for source_type, source, category, industry in secondary_sources:
        limit = 3 if "model" in category or "food" in category or "product" in category else 2  # Balance categories
        if source_type == "google":
            image_urls = google_image_search(source, limit=limit)
            for url in image_urls:
                if len(secondary_image_paths) >= 30:
                    break
                path = download_image(url, SECONDARY_IMAGES_FOLDER, category, industry, "secondary")
                if path:
                    secondary_image_paths.append(path)
        else:
            paths = crawl_and_download(
                source, SECONDARY_IMAGES_FOLDER, limit=limit, category=category, industry=industry, image_type="secondary"
            )
            secondary_image_paths.extend(paths)
            if len(secondary_image_paths) >= 30:
                secondary_image_paths = secondary_image_paths[:30]
                break

    # Generate and save a summary file with the download information
    summary = {
        "base_images_downloaded": len(base_image_paths),
        "secondary_images_downloaded": len(secondary_image_paths),
        "base_image_paths": base_image_paths,
        "secondary_image_paths": secondary_image_paths
    }
    with open(os.path.join(BASE_DIR, "download_summary.json"), "w") as f:
        json.dump(summary, f, indent=4)
    print("Download complete. Summary:")
    print(json.dumps(summary, indent=4))

if __name__ == "__main__":
    main()


Crawling: https://www.flickr.com/search/?text=nature+landscape
Removed crawl4ai_images/base_images/3a4d5d19-6ae0-44ba-aa3e-4cb1b9b92a96.jpg: Low quality (variance=2391.214127296576, Laplacian=4293.555032172815, size=(320, 213))
Removed crawl4ai_images/base_images/822a7670-62aa-4853-b8ac-51522284806b.jpg: Low quality (variance=2949.2293323231384, Laplacian=1231.785146619021, size=(319, 213))
Removed crawl4ai_images/base_images/634a5caf-5e45-4c2a-8d6c-8fa2e1d40872.jpg: Low quality (variance=1550.937160175186, Laplacian=845.9608903766639, size=(319, 213))
Removed crawl4ai_images/base_images/4b798405-707f-4dec-be13-fff74093c802.jpg: Low quality (variance=5259.459327958623, Laplacian=2870.970928610312, size=(319, 213))
Removed crawl4ai_images/base_images/3c6b7f91-c9e9-49a1-8d09-d382a258f1a8.jpg: Low quality (variance=2261.5516684457384, Laplacian=2671.97341908184, size=(159, 239))
Removed crawl4ai_images/base_images/bf436a61-ce80-48e2-8f0b-0aa0742288f8.jpg: Low quality (variance=2050.46845