In [1]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
from PIL import Image
import requests

processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
vision_model = AutoModel.from_pretrained(
    "nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True
)

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
images = [
    Image.open(requests.get(url, stream=True).raw),
    Image.open("../data/cat1.png"),
    Image.open("../data/cat2.png"),
    Image.open("../data/img1.png"),
    Image.open("../data/img2.png"),
    Image.open("../data/2458276043_ec7b8d20c6_o.jpg"),
]

inputs = processor(images, return_tensors="pt")

img_emb = vision_model(**inputs).last_hidden_state
print(img_emb.shape)
img_embeddings = F.normalize(img_emb[:, 0], p=2, dim=1)

img_embeddings = img_embeddings.detach().numpy()
img_embeddings.shape

  from .autonotebook import tqdm as notebook_tqdm


torch.Size([6, 197, 768])


(6, 768)

In [2]:
a, b = img_embeddings[0], img_embeddings[-1]

In [4]:
import numpy as np


def cosine_similarity(a, b) -> float:
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def scaled_dot(a, b):
    return (a @ b) / a.shape[0]


def eucliad(a, b):
    return ((a - b) ** 2).sum()


def manh(a, b):
    return np.abs(a - b).sum()

In [5]:
scaled_dot(a, a)

0.0013020836437741916

In [6]:
scaled_dot(a, b), cosine_similarity(a, b)

(0.0008872523903846741, 0.6814098)

In [7]:
for i in range(img_embeddings.shape[0]):
    for j in range(img_embeddings.shape[0]):
        qi, qj = img_embeddings[i], img_embeddings[j]
        print(
            i,
            j,
            f"{scaled_dot(qi, qj):.6f}, {cosine_similarity(qi,qj):.6f}, {eucliad(qi, qj):.6f}, {manh(qi, qj):6f}",
        )

0 0 0.001302, 1.000000, 0.000000, 0.000000
0 1 0.001216, 0.933559, 0.132881, 8.168940
0 2 0.001180, 0.906399, 0.187203, 9.665936
0 3 0.000842, 0.647037, 0.705927, 18.488314
0 4 0.000873, 0.670283, 0.659434, 17.922909
0 5 0.000887, 0.681410, 0.637181, 17.321896
1 0 0.001216, 0.933559, 0.132881, 8.168940
1 1 0.001302, 1.000000, 0.000000, 0.000000
1 2 0.001216, 0.933744, 0.132512, 7.950055
1 3 0.000827, 0.634808, 0.730384, 19.027008
1 4 0.000856, 0.657391, 0.685217, 18.346832
1 5 0.000885, 0.679330, 0.641341, 17.422548
2 0 0.001180, 0.906399, 0.187203, 9.665936
2 1 0.001216, 0.933744, 0.132512, 7.950055
2 2 0.001302, 1.000000, 0.000000, 0.000000
2 3 0.000853, 0.655093, 0.689814, 18.402540
2 4 0.000911, 0.699558, 0.600885, 17.213663
2 5 0.000913, 0.700995, 0.598010, 16.776394
3 0 0.000842, 0.647037, 0.705927, 18.488314
3 1 0.000827, 0.634808, 0.730384, 19.027008
3 2 0.000853, 0.655093, 0.689814, 18.402540
3 3 0.001302, 1.000000, 0.000000, 0.000000
3 4 0.001227, 0.942416, 0.115168, 7.529159

In [8]:
from qdrant_client import QdrantClient

#https://python-client.qdrant.tech/quickstart#Qdrant-without-fastembed

client = QdrantClient(host="0.0.0.0", port=6333)

collection_name = "my_collection"

In [9]:
from qdrant_client.models import VectorParams, Distance


if not client.collection_exists(collection_name=collection_name):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=768, distance=Distance.EUCLID),
    )

In [10]:
from qdrant_client.models import PointStruct
from torch import embedding

vectors = img_embeddings
client.upsert(
    collection_name="my_collection",
    points=[
        PointStruct(
            id=idx,
            vector=vector.tolist(),
            payload={"color": "red", "idx": idx,"value": 0},
        )
        for idx, vector in enumerate(vectors)
    ],
)

UpdateResult(operation_id=7, status=<UpdateStatus.COMPLETED: 'completed'>)

In [11]:
from qdrant_client.http.models import Filter, FieldCondition, MatchValue

In [12]:
query_vector = img_embeddings[0]
hits = client.search(
    collection_name="my_collection",
    query_vector=query_vector,
    limit=5,
    with_vectors=True
)
hits

[ScoredPoint(id=0, version=7, score=0.0, payload={'color': 'red', 'idx': 0, 'value': 0}, vector=[0.0047529037, -0.025439693, 0.0066818185, -0.029589424, -0.043529812, -0.01226674, 0.0024384656, -0.03605475, -0.07034518, -0.018580481, -0.044000436, -0.017478108, -0.013517745, -0.062596835, -0.06425236, -0.025551684, 0.0052608815, -0.04055552, -0.018450487, 0.001620729, -0.057390146, -0.011386606, -0.027567793, -0.014647286, 0.03718588, -0.059439473, -0.04885769, -0.02706275, -0.047969926, -0.029087596, -0.043004144, -0.053141832, -0.016649954, -0.027782356, -0.035605066, 0.0106917275, -0.036190957, -0.025946444, -0.060530763, -0.020660784, -0.03434099, -0.031145306, -0.0351838, 0.011066715, -0.046757933, 0.0026560386, -0.020195963, -0.05664619, -0.044104617, -0.039693292, -0.030171966, -0.047596965, -0.018873528, -0.015511736, -0.058378443, -0.013858716, -0.090495676, -0.02728211, -0.020452017, 0.008528679, -0.020871257, -0.032452196, -0.023346754, -0.01619308, -0.042959854, 0.035032954