## Import libraries

In [1]:
import os
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http import models
from app.utils import get_video_pack_files, get_keyframe_data
from app.PATH import CLIP_FEATURES_PATH, MAP_KEYFRAMES_PATH



## Create Qdrant client

In [2]:
client = QdrantClient(host="localhost", port=6333)
client

<qdrant_client.qdrant_client.QdrantClient at 0x1dbc0b384d0>

## Constant

In [3]:
VIDEO_PACK_LIST = [pack for pack in range(21, 31)]
COLLECTION_NAME = "my_collection"

## Create colletion

In [6]:
if COLLECTION_NAME not in [c.name for c in client.get_collections().collections]:
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=models.VectorParams(size=512, distance=models.Distance.COSINE),
        optimizers_config=models.OptimizersConfigDiff(
            indexing_threshold=0,
        ),
    )
    print(f"Collection '{COLLECTION_NAME}' created.")
else:
    print(f"Collection '{COLLECTION_NAME}' already exists.")

Collection 'my_collection' created.


## Upsert data automatically

In [7]:
start_id = 0
process_file = 0
if not os.path.exists(CLIP_FEATURES_PATH):
    print("The folder does not exist")
else:
    start_id = 0
    print(os.listdir(CLIP_FEATURES_PATH)[:5])
    for pack in VIDEO_PACK_LIST:
        video_pack = f"L{pack}"
        print(f"Process {video_pack}")
        files_per_video_pack = get_video_pack_files(CLIP_FEATURES_PATH, video_pack)
        # A list of .npy files, each file contains a list of 512 features vectors, 
        # each vector is embedded from a keyframe of the video that has the same name as the file

        if len(files_per_video_pack) == 0:
            print(f"No files found for video pack {video_pack}")
            continue

        for file in files_per_video_pack:
            # Process each video features in the pack
            feature = np.load(os.path.join(CLIP_FEATURES_PATH, file))
            if feature.shape[1] != 512:
                print(f"File {file} does not have 512 features, skipping")
                continue

            # Insert the feature into the collection
            file_name = file[:-4]
            num_frames = feature.shape[0]
            df = get_keyframe_data(MAP_KEYFRAMES_PATH, file_name)
            payloads = [{
                "origin": file_name[:3], 
                "video": file_name[4:],
                "frame_index": df.iloc[i]["frame_idx"], 
                "frame": f"00{i + 1}.jpg" if i + 1 < 10 else f"0{i + 1}.jpg" if i + 1 < 100 else f"{i + 1}.jpg"
                }
                for i in range(num_frames)
            ]

            client.upsert(
                collection_name=COLLECTION_NAME,
                points=models.Batch(
                    ids=range(start_id, start_id + feature.shape[0]),
                    vectors=feature.tolist(),
                    payloads=payloads
                )
            )
            start_id += feature.shape[0]

['L21_V001.npy', 'L21_V002.npy', 'L21_V003.npy', 'L21_V005.npy', 'L21_V006.npy']
Process L21
Process L22
Process L23
Process L24
Process L25
Process L26
Process L27
Process L28
Process L29
Process L30


## Upsert data manually (1 pack at a time)

In [None]:
# start_id = 0

In [None]:
# video_pack = "L30" # manually iterate from this list ["L21", "L22", "L23", ..., "L30"]

# files_per_video_pack = get_video_pack_files(CLIP_FEATURES_PATH, video_pack)
# # A list of .npy files, each file contains a list of 512 features vectors,
# # each vector is embedded from a keyframe of the video that has the same name as the file

# if len(files_per_video_pack) == 0:
#     print(f"No files found for video pack {video_pack}")
# else:
#     for file_name in files_per_video_pack:
#         feature = np.load(os.path.join(CLIP_FEATURES_PATH, file_name))
#         if feature.shape[1] != 512:
#             print(f"File {file_name} does not have 512 features, skipping")
#             continue
#         print(f"Processing file {file_name} with shape {feature.shape}")

#         # Insert the feature into the collection
#         num_frames = feature.shape[0]
#         payloads = [
#             {"origin": file_name[:-4], "frame_id": i + 1}
#             for i in range(num_frames)
#         ]

#         client.upsert(
#             collection_name=COLLECTION_NAME,
#             points=models.Batch(
#                 ids=range(start_id, start_id + feature.shape[0]),
#                 vectors=feature.tolist(),
#                 payloads=payloads
#             )
#         )
#         start_id += feature.shape[0]

## Check collection status

In [10]:
print(client.get_collection(COLLECTION_NAME))

status=<CollectionStatus.GREEN: 'green'> optimizer_status=<OptimizersStatusOneOf.OK: 'ok'> vectors_count=None indexed_vectors_count=177321 points_count=177321 segments_count=5 config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=512, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_con

In [13]:
def count_points_without_vectors(client, collection_name):
    offset = None
    count = 0
    while True:
        points, offset = client.scroll(
            collection_name=collection_name,
            with_vectors=True,
            offset=offset,
            limit=1000  # batch size
        )
        for point in points:
            if point.vector is None:
                count += 1
        if offset is None:
            break
    return count

no_vector_count = count_points_without_vectors(client, COLLECTION_NAME)
print(f"Points with no vectors: {no_vector_count}")

Points with no vectors: 0


## Enable indexing

In [9]:
client.update_collection(
    collection_name=COLLECTION_NAME,
    optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000),
)

True

## Delete collection

In [5]:
client.delete_collection(collection_name=COLLECTION_NAME)

True

## Delete all points

In [None]:
# client.delete(
#     collection_name=COLLECTION_NAME,
#     points_selector=models.FilterSelector(
#         filter=models.Filter(  # match all points
#             must=[]
#         )
#     )
# )

UpdateResult(operation_id=873, status=<UpdateStatus.COMPLETED: 'completed'>)

## Count all points

In [None]:
client.count(
    collection_name=COLLECTION_NAME,
    count_filter=models.Filter(
        must=[]
    ),
    exact=True,
)
# 177321

CountResult(count=177321)

## Retrive a list of points

In [11]:
client.retrieve(
    collection_name=COLLECTION_NAME,
    ids=[0, 9, 99],
)

[Record(id=0, payload={'origin': 'L21', 'video': 'V001', 'frame_index': 0.0, 'frame': '001.jpg'}, vector=None, shard_key=None, order_value=None),
 Record(id=9, payload={'origin': 'L21', 'video': 'V001', 'frame_index': 1131.0, 'frame': '010.jpg'}, vector=None, shard_key=None, order_value=None),
 Record(id=99, payload={'origin': 'L21', 'video': 'V001', 'frame_index': 12450.0, 'frame': '100.jpg'}, vector=None, shard_key=None, order_value=None)]

In [None]:
client.search(
    collection_name=COLLECTION_NAME,
    query_vector=query_vector, # assuming query_vector is defined, a python list of 512 features (float)
    with_payload=True,
    with_vectors=True,
    limit=20,
)

## Verify Section

In [None]:
import torch
from transformers import AutoProcessor, AutoTokenizer, CLIPModel

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")

In [None]:
image = Image.open(os.path.join(os.getcwd(), "keyframes", "L23_V001", "001.jpg"))
image = image.convert("RGB")  # Ensure the image is in RGB format

In [10]:
inputs = processor(images=image, text=["a group of bikers", "a man holding a sign of number 2", "a cow and a woman are waving"], return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
print(probs)

tensor([[0.0011, 0.0084, 0.9905]], grad_fn=<SoftmaxBackward0>)


In [None]:
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
inputs = tokenizer(["a group of bikers", "a man holding a sign of number 2", "a cow and a woman are waving"], return_tensors="pt", padding=True)
text_features = model.get_text_features(**inputs)
print(text_features.shape)

processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
inputs = processor(images=image, return_tensors="pt")
image_features = model.get_image_features(**inputs)
print(image_features.shape)

tensor([[ 0.1417,  0.1083,  0.1551,  ...,  0.1857, -0.1058, -0.1027],
        [ 0.2465, -0.0720, -0.2029,  ..., -0.8686,  0.1537, -0.3400],
        [ 0.3818,  0.2169, -0.2657,  ..., -0.1489,  0.2350, -0.2732]],
       grad_fn=<MmBackward0>)
torch.Size([3, 512])
torch.Size([1, 512])


Recreate embedding feature from L23_V001/001.jpg

In [None]:
# query_filter = models.Filter(
#     must=[
#         models.FieldCondition(
#             key="origin",
#             match=models.MatchValue(value="L23_V001")
#         )
#     ]
# )

In [None]:
# points, next_page = client.scroll(
#     COLLECTION_NAME,
#     limit=10,
#     with_payload=True,
#     with_vectors=True,
#     scroll_filter=query_filter,    
# )
# print(points[0].payload)

In [61]:
image = Image.open(os.path.join(os.getcwd(), "keyframes", "L23_V001", "001.jpg"))
image = image.convert("RGB")  # Ensure the image is in RGB format
image.show()

In [None]:
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
inputs = processor(images=image, return_tensors="pt")
print(inputs["pixel_values"].shape)   # should be (1, 3, 224, 224)
print(inputs["pixel_values"][0, :, :5, :5])  # peek at the first 5×5 patch

torch.Size([1, 3, 224, 224])
tensor([[[ 0.5581, -0.1280, -0.3032, -0.2594, -0.2594],
         [ 1.1128,  0.5143,  0.3537,  0.3245,  0.2953],
         [ 0.5727,  0.5581,  0.3975,  0.3391,  0.2661],
         [ 0.5581,  0.4413,  0.2953,  0.2953,  0.2807],
         [ 0.5289,  0.3391,  0.2077,  0.2369,  0.2077]],

        [[-0.1613, -0.5365, -0.5515, -0.5665, -0.5665],
         [ 0.2439, -0.0262,  0.0338,  0.0338,  0.0038],
         [-0.3714, -0.1012,  0.0338,  0.0789,  0.0338],
         [-0.4014, -0.2363, -0.0862,  0.0038,  0.0488],
         [-0.3714, -0.2663, -0.1613, -0.0712, -0.0112]],

        [[-0.0013, -0.4137, -0.4564, -0.4279, -0.4564],
         [ 0.4679,  0.0840,  0.0555,  0.0271, -0.0298],
         [-0.0867,  0.0555,  0.0129, -0.0440, -0.1293],
         [-0.1009, -0.1009, -0.1293, -0.1009, -0.1435],
         [-0.0867, -0.1578, -0.2431, -0.1862, -0.2004]]])


In [None]:
image_features = model.get_image_features(**inputs)
print(image_features.shape)

In [68]:
ref = np.load(os.path.join(os.getcwd(), "clip-features-32", "L23_V001.npy"))  # given embedding
ref = ref[0]

In [None]:
with torch.no_grad():
    new_emb = model.get_image_features(**inputs).cpu().numpy()

# Normalize both (in case ref was normalized)
ref = ref / np.linalg.norm(ref)
new_emb = new_emb / np.linalg.norm(new_emb)

# Compare
cosine_sim = np.dot(ref, new_emb.T).squeeze()
print("Cosine similarity:", cosine_sim)

Cosine similarity: 1.0000551
