In [1]:
!pip install faiss-cpu





In [2]:
import torch
import pandas as pd
import requests
from PIL import Image
import numpy as np
import os
import clip
from ultralytics import YOLO
import faiss
import json
from tqdm.notebook import tqdm # For a nice progress bar!

print("Libraries imported.")

# Device setup for M1 Mac
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon GPU)")
else:
    device = torch.device("cpu")
    print("MPS not found, using CPU")

Libraries imported.
Using MPS (Apple Silicon GPU)


In [3]:
# This cell is the same as before
import ssl
import certifi
import urllib.request
import clip

# Use certifi's certificate bundle
ssl_context = ssl.create_default_context(cafile=certifi.where())
urllib.request.install_opener(
    urllib.request.build_opener(
        urllib.request.HTTPSHandler(context=ssl_context)
    )
)
# Load YOLO
YOLO_MODEL_PATH = '../models/best.pt'
yolo_model = YOLO(YOLO_MODEL_PATH).to(device)
print("YOLOv8 model loaded.")

# Load CLIP
clip_model, preprocess = clip.load("ViT-L/14", device=device)
print("CLIP model loaded.")

YOLOv8 model loaded.
CLIP model loaded.


In [4]:
CATALOG_PATH = '../data/catalog_full.csv'
df_catalog = pd.read_csv(CATALOG_PATH)

# For testing, you might not want to run all 11k images. 
# Let's create a smaller sample for development.
# When you're ready for the full run, comment out the line below.

print(f"Loaded catalog with {len(df_catalog)} images.")
print(f"We will process a sample of {len(df_catalog)} images for this test run.")

Loaded catalog with 9172 images.
We will process a sample of 9172 images for this test run.


In [5]:
pip install ipywidgets


You should consider upgrading via the '/Users/ravinder/Desktop/flickd-hackathon/venv/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
# These lists will store our results
all_embeddings = []
product_id_map = [] # This will map the index of an embedding to its product ID

# Use tqdm to wrap our dataframe iterator for a progress bar
for index, row in tqdm(df_catalog.iterrows(), total=df_catalog.shape[0], desc="Processing Images"):
    image_url = row['image_url']
    product_id = row['id']

    try:
        # 1. Download image
        response = requests.get(image_url, stream=True, timeout=10)
        response.raise_for_status()
        image = Image.open(response.raw).convert("RGB")

        # 2. Detect with YOLO
        results = yolo_model(image, verbose=False)

        # 3. Find the best box (the one with the largest area)
        best_box = None
        max_area = 0
        if len(results[0].boxes) > 0:
            for box in results[0].boxes:
                # Calculate the area of the box
                x1, y1, x2, y2 = box.xyxy[0]
                area = (x2 - x1) * (y2 - y1)
                # If this box is bigger than the biggest one we've seen so far, it's our new best
                if area > max_area:
                    max_area = area
                    best_box = box
        
        # 4. If a best box was found, crop and get embedding
        if best_box is not None:
            x1, y1, x2, y2 = map(int, best_box.xyxy[0])
            cropped_image = image.crop((x1, y1, x2, y2))

            # 5. Generate CLIP embedding
            image_input = preprocess(cropped_image).unsqueeze(0).to(device)
            with torch.no_grad():
                image_features = clip_model.encode_image(image_input)
            
            # Normalize and convert to a CPU numpy array for FAISS
            image_features /= image_features.norm(dim=-1, keepdim=True)
            embedding_np = image_features.cpu().numpy()

            # 6. Store the results
            all_embeddings.append(embedding_np)
            product_id_map.append(int(product_id))

    except Exception as e:
        # This will catch download errors, processing errors, etc.
        # print(f"Skipping image at index {index} due to error: {e}")
        pass # We just ignore errors for this batch job

# Convert the list of embeddings into a single large NumPy array
if all_embeddings:
    embeddings_matrix = np.vstack(all_embeddings)
    print(f"\nProcessing complete.")
    print(f"Successfully generated {embeddings_matrix.shape[0]} embeddings.")
    print(f"Shape of the final matrix: {embeddings_matrix.shape}")
else:
    print("\nProcessing complete. No embeddings were generated.")

Processing Images:   0%|          | 0/9172 [00:00<?, ?it/s]


Processing complete.
Successfully generated 7922 embeddings.
Shape of the final matrix: (7922, 768)


In [7]:
if 'embeddings_matrix' in locals() and embeddings_matrix.shape[0] > 0:
    # Get the dimension of our embeddings (should be 512)
    d = embeddings_matrix.shape[1]

    # Create a FAISS index. IndexFlatL2 is a standard choice for dense vectors.
    index = faiss.IndexFlatL2(d)
    
    # Add our embeddings matrix to the index
    index.add(embeddings_matrix)

    print(f"\nFAISS index created.")
    print(f"Total embeddings in index: {index.ntotal}")

    # --- Save the files ---
    OUTPUT_DIR = '../models/'
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    # 1. Save the FAISS index
    faiss.write_index(index, os.path.join(OUTPUT_DIR, "catalog_index_large.faiss"))
    print(f"FAISS index saved to: {os.path.join(OUTPUT_DIR, 'catalog_index_large.faiss')}")

    # 2. Save the product ID mapping
    with open(os.path.join(OUTPUT_DIR, "product_id_map_large.json"), 'w') as f:
        json.dump(product_id_map, f)
    print(f"Product ID map saved to: {os.path.join(OUTPUT_DIR, 'product_id_map_large.json')}")

else:
    print("No embeddings were generated, skipping index creation.")


FAISS index created.
Total embeddings in index: 7922
FAISS index saved to: ../models/catalog_index_large.faiss
Product ID map saved to: ../models/product_id_map_large.json


In [8]:
# In a new cell at the end of your "build large index" notebook
if 'embeddings_matrix' in locals():
    print(f"The shape of the embeddings matrix is: {embeddings_matrix.shape}")

The shape of the embeddings matrix is: (7922, 768)
