In [1]:
from min_vec import MinVectorDB

In [2]:
my_db = MinVectorDB("http://localhost:7637")
# or specify a path
# my_db = MinVectorDB("my_vec")

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True, warm_up=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:01<00:00, 55.82batch/s]


## Run a query

In [5]:
collection.query(np.random.random(128), k=12)

(array([51928, 35231, 10788,  9778, 51419, 41854, 12926, 50371, 92764,
        94265, 78287, 83319]),
 array([0.85514951, 0.84728801, 0.84626621, 0.8415463 , 0.84127688,
        0.84093714, 0.83911288, 0.83817422, 0.83700967, 0.83690906,
        0.83659285, 0.83584338]))

## Print the most recent query report

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.09467 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [51928 35231 10788  9778 51419 41854 12926 50371 92764 94265 78287 83319]
| - Top 12 Results Similarity: [0.85514951 0.84728801 0.84626621 0.8415463  0.84127688 0.84093714
 0.83911288 0.83817422 0.83700967 0.83690906 0.83659285 0.83584338]
* - END OF REPORT -



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [8]:
import numpy as np

collection.query(np.random.random(128), k=12)

(array([94266, 99764, 52370, 18625, 83560, 37216, 88531, 92751, 36865,
        82313,  5831, 24125]),
 array([0.84063417, 0.83860576, 0.8363837 , 0.83500779, 0.83492041,
        0.83388281, 0.8333267 , 0.83215773, 0.83001536, 0.82889783,
        0.82689053, 0.82600659]))

In [9]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.01483 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [94266 99764 52370 18625 83560 37216 88531 92751 36865 82313  5831 24125]
| - Top 12 Results Similarity: [0.84063417 0.83860576 0.8363837  0.83500779 0.83492041 0.83388281
 0.8333267  0.83215773 0.83001536 0.82889783 0.82689053 0.82600659]
* - END OF REPORT -



## Using Filter to narrow down the search range

In [10]:
import operator

from min_vec.core_components.filter import Filter, FieldCondition, MatchField, IDCondition, MatchID


collection.query(
    vector=np.random.random(128), 
    k=10, 
    query_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            IDCondition(MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        # any=[
        #     FieldCondition(key='field', matcher=MatchField('test_1')),
        #     FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
        #     IDCondition(MatchID([1, 2, 3, 4, 5])),
        # ]
    )
)

(array([1, 5, 2, 4, 3]),
 array([0.80589008, 0.78945273, 0.74684119, 0.73874426, 0.71410728]))