In [2]:
import os
import requests
from google.cloud import vision
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from PIL import Image
from io import BytesIO

# Ensure the image storage directory exists
IMAGE_DIR = "GEM_CLARITY"
os.makedirs(IMAGE_DIR, exist_ok=True)

# List of shink design for which we will fetch images
gem_clarity=[

   "FL",
"IF",
"VVS1 - VVs2",
"VVS1",
"VVS2",
"VS",
"VS1",
"VS2",
"SI",
"SI1",
"SI2",
"SI3",
"I1",
"I2",
"I3"
    
 ]


def search_image(query, api_key, cse_id):
    """Search for images using Google Custom Search JSON API."""
    try:
        # Format query to avoid invalid characters
        formatted_query = query.replace(" ", "+").replace("-", "_").replace(".", "")  # Remove dots
        
        service = build("customsearch", "v1", developerKey=api_key)
        res = service.cse().list(
            q=formatted_query + "wedding+band+diamond",
            cx=cse_id,
            searchType="image",
            num=10 # Reduced to 10 as per Google API limits
        ).execute()

        if "items" in res:
            return [item["link"] for item in res["items"]]
        else:
            print(f"No images found for query: {query}")
    except HttpError as e:
        print(f"Google Search API Error: {e}")
    return []

def analyze_image(image_url):
    """Analyze image using Google Cloud Vision API."""
    try:
        client = vision.ImageAnnotatorClient()
        image = vision.Image()
        image.source.image_uri = image_url
        response = client.label_detection(image=image)
        labels = [label.description.lower() for label in response.label_annotations]
        return labels
    except Exception as e:
        print(f"Vision API Error: {e}")
        return []

def download_image(image_url, save_path):
    """Download and save an image if it's valid."""
    try:
        response = requests.get(image_url, timeout=10)
        if response.status_code == 200:
            img = Image.open(BytesIO(response.content))
            img.verify()  # Ensure it's a valid image
            img = Image.open(BytesIO(response.content))  # Reopen after verification
            img = img.convert("RGB")  # Convert to standard format
            img.save(save_path, format="JPEG")  # Save as JPEG
            return save_path
        else:
            print(f"Failed to download image: {image_url} (Status Code: {response.status_code})")
    except Exception as e:
        print(f"Error downloading or verifying image: {e} - URL: {image_url}")
    return None

def create_folder_structure():
    """Create subfolders for each gem color."""
    for cat in gem_clarity:
        folder_path = os.path.join(IMAGE_DIR, cat)
        os.makedirs(folder_path, exist_ok=True)

def fetch_and_save_images(api_key, cse_id):
    """Fetch and save images for each gem color, using Google Custom Search and Vision API."""
    for cat in gem_clarity:
        print(f"Fetching images for {cat}...")

        # Create subfolder for the current gem color
        color_folder = os.path.join(IMAGE_DIR, cat)
        
        # Search for images related to the gem color
        image_urls = search_image(cat, api_key, cse_id)

        if image_urls:
            downloaded_images = 0  # Track successfully downloaded images

            for image_url in image_urls:
                if downloaded_images >= 20:  # Stop if we have downloaded 20 images
                    break
                
                # Generate unique file name for each image
                image_filename = f"{cat.replace(' ', '_')}_{downloaded_images + 1}.jpg"
                image_path = os.path.join(color_folder, image_filename)

                # Attempt to download and save the image
                saved_image = download_image(image_url, image_path)

                if saved_image:
                    print(f"Image saved: {saved_image}")

                    # Analyze image with Vision API and get labels
                    labels = analyze_image(image_url)
                    print(f"Labels for {image_filename}: {', '.join(labels)}")
                    
                    downloaded_images += 1
        else:
            print(f"No images found for {cat}")

# Google API credentials (Replace these with your actual credentials)
API_KEY = "AIzaSyAo5M5HXAXSkVxBzHRG0bUMRJf3RltEhJw"  # Replace with your actual API key
CSE_ID = "d63ff1141028241de"  # Replace with your actual CSE ID

# Set Google Cloud credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:/Users/TechAir\Desktop/GEM_CLARITY/jewelry-449211-b1561756ae8d.json"  

# Create the folder structure for Gem Color categories
create_folder_structure()

# Fetch and save images for each gem color
fetch_and_save_images(API_KEY, CSE_ID)

Fetching images for FL...
Error downloading or verifying image: cannot identify image file <_io.BytesIO object at 0x0000029720111DB0> - URL: https://lookaside.instagram.com/seo/google_widget/crawler/?media_id=3337348134885943948
Image saved: GEM_CLARITY\FL\FL_1.jpg
Labels for FL_1.jpg: engagement ring, jewellery, body jewelry, ring, wedding ring, wedding ceremony supply, pre-engagement ring, diamond, gemstone, natural material
Image saved: GEM_CLARITY\FL\FL_2.jpg
Labels for FL_2.jpg: engagement ring, ring, jewellery, body jewelry, wedding ring, gemstone, diamond, pre-engagement ring, silver, metal
Image saved: GEM_CLARITY\FL\FL_3.jpg
Labels for FL_3.jpg: body jewelry, jewellery, gemstone, diamond, ring, pre-engagement ring, natural material, jewelry making, wedding ceremony supply, bangle
Image saved: GEM_CLARITY\FL\FL_4.jpg
Labels for FL_4.jpg: engagement ring, ring, jewellery, wedding ring, body jewelry, wedding ceremony supply, pre-engagement ring, gemstone, diamond, natural materia