In [1]:
!pip install --upgrade --quiet langchain-google-genai

In [2]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [11]:
!pip install --upgrade openai


Collecting openai
  Downloading openai-2.14.0-py3-none-any.whl.metadata (29 kB)
Downloading openai-2.14.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 2.12.0
    Uninstalling openai-2.12.0:
      Successfully uninstalled openai-2.12.0
Successfully installed openai-2.14.0


In [39]:
import os
from google.colab import userdata
from openai import OpenAI

# Load OpenRouter API key
os.environ["OPENROUTER_API_KEY"] = userdata.get("RITAM'S_KEY")

client = OpenAI(
    api_key=os.environ["OPENROUTER_API_KEY"],
    base_url="https://openrouter.ai/api/v1"
)

response = client.chat.completions.create(
    model="openai/gpt-4.1-mini",  # cheaper + sufficient
    messages=[
        {
            "role": "user",
            "content": """
You are an expert Computer Vision and Machine Learning analyst specializing in unsupervised image clustering.

Analyze the given image (or image embedding) and assign it to an unsupervised cluster.
Explain the visual reasoning briefly.

Respond strictly in JSON:

{
  "cluster_id": "<integer>",
  "reasoning": "<short explanation>",
  "confidence": "<percentage from 0-100>"
}
"""
        }
    ],
    temperature=0.2,
    max_tokens=300
)

print(response.choices[0].message.content)


{
  "cluster_id": 3,
  "reasoning": "The image contains mostly text with dense formatting and tables, indicating it belongs to a cluster of document images focused on structured information.",
  "confidence": "85"
}


In [20]:
image_description_prompt = """
You are a computer vision annotation system for unlabeled image clustering.

Task:
Analyze the image and return concise, objective metadata that helps group visually similar images.

Rules:
- Description: 1 sentence. State main subject, indoor/outdoor setting, and visible action or pose.
- Text content: If no readable text, return null. Otherwise transcribe exactly.
- Keywords: Provide 8–10 visual keywords focused on similarity (e.g., person, pet, indoor, outdoor, group, close-up).
- If the image is blurry, low-resolution, or ambiguous, mention it.
- Output RAW JSON only. No markdown or explanations.

Output JSON format:
{
  "main_subject": "string",
  "detailed_description": "string",
  "visual_elements": ["string"],
  "text_content": "string or null",
  "search_keywords": ["string"]
}
"""



In [40]:
import os
import base64
import mimetypes
import json
from google.colab import userdata
from openai import OpenAI

# Load OpenRouter API key from Colab Secrets
os.environ["OPENROUTER_API_KEY"] = userdata.get("RITAM'S_KEY")

# Initialize OpenRouter client
client = OpenAI(
    api_key=os.environ["OPENROUTER_API_KEY"],
    base_url="https://openrouter.ai/api/v1"
)
def encode_image(image_path):
    """Encodes a local image to base64 string."""
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")
def generate_search_metadata(image_path, client, prompt_text):
    """
    Generates image metadata using OpenRouter + OpenAI vision model.
    """
    try:
        # Detect mime type
        mime_type, _ = mimetypes.guess_type(image_path)
        if not mime_type:
            mime_type = "image/jpeg"

        # Encode image
        img_base64 = encode_image(image_path)

        # Call OpenRouter Vision Model
        response = client.chat.completions.create(
            model="openai/gpt-4o-mini",  # vision + cheap
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt_text},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:{mime_type};base64,{img_base64}"
                            }
                        }
                    ]
                }
            ],
            temperature=0.2,
            max_tokens=300
        )

        return response.choices[0].message.content.strip()

    except FileNotFoundError:
        print(f"Error: File not found -> {image_path}")
        return None
    except Exception as e:
        print(f"Generation error: {e}")
        return None


In [41]:
image_path = "/content/drive/MyDrive/Datasets/photos_no_class/asparagus-g4c4164115_640.jpg"

metadata = generate_search_metadata(
    image_path=image_path,
    client=client,
    prompt_text=image_description_prompt
)

if metadata:
    print(metadata)


{
  "main_subject": "food",
  "detailed_description": "A plate of sliced steak accompanied by asparagus and cherry tomatoes, set indoors.",
  "visual_elements": ["steak", "asparagus", "cherry tomatoes", "plate", "indoor setting", "cooked food", "garnish", "sliced"],
  "text_content": null,
  "search_keywords": ["food", "indoor", "cooked", "steak", "vegetables", "garnish", "meal", "plated"]
}


In [24]:
import os
import json

source_folder = "/content/drive/MyDrive/Datasets/photos_no_class"
output_folder = "/content/drive/MyDrive/Datasets/photos_json"

os.makedirs(output_folder, exist_ok=True)

# Supported image extensions
valid_extensions = ('.jpg', '.jpeg', '.png', '.webp', '.heic')

print(f"Starting processing for images in: {source_folder}\n")

# Get only valid image files
files = [
    f for f in os.listdir(source_folder)
    if f.lower().endswith(valid_extensions)
]

files = files[:20]

total_files = len(files)
processed_count = 0

for filename in files:
    image_path = os.path.join(source_folder, filename)

    json_filename = f"{os.path.splitext(filename)[0]}.json"
    json_path = os.path.join(output_folder, json_filename)

    if os.path.exists(json_path):
        print(f"[{processed_count + 1}/{total_files}] Skipping: {filename} (JSON already exists)")
        processed_count += 1
        continue

    print(f"[{processed_count + 1}/{total_files}] Generating metadata for: {filename}...")

    try:
        response_text = generate_search_metadata(
            image_path=image_path,
            client=client,
            prompt_text=image_description_prompt
        )

        if not response_text:
            print(f"   -> Skipped {filename} (Empty response)")
            processed_count += 1
            continue

        try:
            description = json.loads(response_text)
        except json.JSONDecodeError:
            print(f"   -> Invalid JSON for {filename}")
            print(response_text)
            processed_count += 1
            continue

        data = {
            "file_path": image_path,
            "description": description
        }

        with open(json_path, "w", encoding="utf-8") as json_file:
            json.dump(data, json_file, indent=4, ensure_ascii=False)

        print(f"   -> Saved to {json_filename}")

    except Exception as e:
        print(f"   -> Error processing {filename}: {e}")

    processed_count += 1

print("\n--- Processing Complete (First 20 Images Only) ---")


Starting processing for images in: /content/drive/MyDrive/Datasets/photos_no_class

[1/20] Skipping: asparagus-g4c4164115_640.jpg (JSON already exists)
[2/20] Skipping: beanie-g4c423e47b_640.jpg (JSON already exists)
[3/20] Skipping: bibimbap-gf29abdbf1_640.jpg (JSON already exists)
[4/20] Skipping: cat-g0052cc4e9_640.jpg (JSON already exists)
[5/20] Skipping: cat-g0fcd844a4_640.jpg (JSON already exists)
[6/20] Skipping: cat-g11b1f4535_640.jpg (JSON already exists)
[7/20] Skipping: cat-g4ae5d18aa_640.jpg (JSON already exists)
[8/20] Skipping: cat-g4fe5d8c20_640.jpg (JSON already exists)
[9/20] Skipping: cat-g6052b543b_640.jpg (JSON already exists)
[10/20] Skipping: cat-ga3a48da6e_640.jpg (JSON already exists)
[11/20] Skipping: cat-gaf654b3a3_640.jpg (JSON already exists)
[12/20] Skipping: cat-gf324dae69_640.jpg (JSON already exists)
[13/20] Generating metadata for: cave-g68bd31d20_640.jpg...
   -> Saved to cave-g68bd31d20_640.json
[14/20] Skipping: champon-g31fa88e14_640.jpg (JSON alre

In [34]:
!pip install -U -q openai langchain chromadb langchain-openai langchain-community

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/2.5 MB[0m [31m11.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m38.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m56.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━