### 1. Install Dependencies

In [None]:
%pip install torch torchvision clip-by-openai pillow

### 2. Load CLIP Model

In [None]:
import torch
import clip
from PIL import Image

# Load the CLIP model (ViT-B/32 is the default)
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

### 3. Extract Image Features

In [None]:
def extract_image_features(image_path):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image)
    return image_features / image_features.norm(dim=-1, keepdim=True)  # Normalize

In [None]:
# Load images and compute embeddings
image_folder = "path/to/your/images"
image_features_dict = {}

for filename in os.listdir(image_folder):
    if filename.lower().endswith(("png", "jpg", "jpeg")):
        image_path = os.path.join(image_folder, filename)
        image_features_dict[filename] = extract_image_features(image_path)

# Save image embeddings to a file
torch.save(image_features_dict, "image_embeddings.pt")
print("Image embeddings saved to 'image_embeddings.pt'")

### 4. Extract Text Features

In [None]:
def extract_text_features(text):
    text_tokenized = clip.tokenize([text]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_tokenized)
    return text_features / text_features.norm(dim=-1, keepdim=True)  # Normalize

In [None]:
# Example text prompts
text_prompts = ["fisheye perspective, black hair", "fantasy landscape", "cyberpunk city"]

# Compute and save text embeddings
text_features_dict = {prompt: extract_text_features(prompt) for prompt in text_prompts}
torch.save(text_features_dict, "text_embeddings.pt")
print("Text embeddings saved to 'text_embeddings.pt'")

In [None]:
# Load image embeddings
image_features_dict = torch.load("image_embeddings.pt")

# Load text embeddings
text_features_dict = torch.load("text_embeddings.pt")

### 5. Compare Images and Text (Run Code)

In [None]:
import os

# Load images and compute embeddings
image_folder = "path/to/your/images"
image_features_dict = {}

for filename in os.listdir(image_folder):
    if filename.lower().endswith(("png", "jpg", "jpeg")):
        image_path = os.path.join(image_folder, filename)
        image_features_dict[filename] = extract_image_features(image_path)

# Search for images matching a text prompt
query = "fisheye perspective, black hair"
text_features = extract_text_features(query)

# Compute cosine similarity
similarities = {
    img_name: torch.cosine_similarity(text_features, img_features, dim=-1).item()
    for img_name, img_features in image_features_dict.items()
}

# Sort results by similarity
sorted_images = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

# Display top matches
for img, score in sorted_images[:5]:
    print(f"{img}: {score:.4f}")


# 2nd

In [None]:
# Example query
query = "fisheye perspective, black hair"

# Get the text features for the query
if query in text_features_dict:
    text_features = text_features_dict[query]
else:
    # Compute text features if the query is not in the saved embeddings
    text_features = extract_text_features(query)

# Compute cosine similarity
similarities = {
    img_name: torch.cosine_similarity(text_features, img_features, dim=-1).item()
    for img_name, img_features in image_features_dict.items()
}

# Sort results by similarity
sorted_images = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

# Display top matches
for img, score in sorted_images[:5]:
    print(f"{img}: {score:.4f}")