In [1]:
import torch
import numpy as np
from transformers import AutoProcessor, AutoModel
from PIL import Image
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%pip install einops


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
from transformers import AutoModel
from transformers import AutoProcessor
# Load Nomic's Vision Embedding model
def load_nomic_vision_model():
    model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
    processor = AutoProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
    return model, processor

# Initialize the model and processor
model, processor = load_nomic_vision_model()


A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- configuration_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- modeling_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


In [4]:
def extract_features(image_path, model, processor):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt")
    
    with torch.no_grad():
        features = model(**inputs).last_hidden_state.mean(dim=1)
    
    return features.squeeze().cpu().numpy()

In [5]:
def get_average_feature_vectors(support_set, model, processor):
    class_features = {}
    
    for label, image_paths in support_set.items():
        features = []
        for img_path in image_paths:
            feature = extract_features(img_path, model, processor)
            features.append(feature)
        class_features[label] = np.mean(features, axis=0)  # Average feature vector for class
    
    return class_features

In [6]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def rank_classes(query_image_path, class_features, model, processor):
    query_feature = extract_features(query_image_path, model, processor)

    scores = {}
    for label, avg_feature in class_features.items():
        similarity = cosine_similarity(query_feature, avg_feature)
        scores[label] = similarity

    ranked_classes = sorted(scores, key=scores.get, reverse=True)
    return ranked_classes, scores

In [7]:
def zero_shot_classification(support_set, query_image_path, model, processor):
    # Step 3: Get average feature vectors for the support set
    class_features = get_average_feature_vectors(support_set, model, processor)
    
    # Step 4: Rank classes by similarity to the query image
    ranked_classes, scores = rank_classes(query_image_path, class_features, model, processor)
    
    # Step 5: Return the class with the highest score
    return ranked_classes[0], scores

In [10]:
# Example support set with class labels and corresponding image paths
support_set = {
    "class_1": ["cat1.jpg", "cat2.jpg"],
    "class_2": ["dog1.jpg", "dog2.jpg"],
    # Add more classes as needed
}

# Define the path to the query image
query_image_path = "cattest.jpeg"

# Perform zero-shot classification
predicted_class, similarity_scores = zero_shot_classification(support_set, query_image_path, model, processor)

print(f"Predicted class: {predicted_class}")
print(f"Similarity scores: {similarity_scores}")

Predicted class: class_1
Similarity scores: {'class_1': 0.85881895, 'class_2': 0.65683764}


In [11]:
# Display the predicted class and similarity scores
print(f"Predicted class: {predicted_class}")
print(f"Similarity scores: {similarity_scores}")

Predicted class: class_1
Similarity scores: {'class_1': 0.85881895, 'class_2': 0.65683764}
