In [1]:
from min_vec import MinVectorDB

In [2]:
# Local server
my_db = MinVectorDB("http://localhost:7637")
# or if in docker
# my_db = MinVectorDB("http://localhost:5403")
# or specify a path
# my_db = MinVectorDB("my_vec")

In [5]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True, warm_up=True)

In [6]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

## Run a query

In [7]:
collection.query(np.random.random(128), k=12)

(array([73697, 77702, 17807, 42889, 99631, 46444, 30706, 49453, 45300,
        67957, 10258,  3366]),
 array([0.85500115, 0.8475976 , 0.84583485, 0.84516144, 0.84397185,
        0.84379965, 0.84291363, 0.84267998, 0.84207964, 0.84137881,
        0.84128839, 0.83975387]))

## Print the most recent query report

In [8]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.13376 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [73697 77702 17807 42889 99631 46444 30706 49453 45300 67957 10258  3366]
| - Top 12 Results Similarity: [0.85500115 0.8475976  0.84583485 0.84516144 0.84397185 0.84379965
 0.84291363 0.84267998 0.84207964 0.84137881 0.84128839 0.83975387]
* - END OF REPORT -



## Using the threads to speed up queries

In [9]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [10]:
import numpy as np

collection.query(np.random.random(128), k=12)

(array([81941, 62327, 72022, 59189, 21910, 79938, 69486, 43378, 74343,
        96460, 92080, 63112]),
 array([0.84530234, 0.84270543, 0.84203595, 0.84162509, 0.8370254 ,
        0.83698463, 0.83687979, 0.83664346, 0.8365944 , 0.83587211,
        0.83585978, 0.83529031]))

In [11]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.01851 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [81941 62327 72022 59189 21910 79938 69486 43378 74343 96460 92080 63112]
| - Top 12 Results Similarity: [0.84530234 0.84270543 0.84203595 0.84162509 0.8370254  0.83698463
 0.83687979 0.83664346 0.8365944  0.83587211 0.83585978 0.83529031]
* - END OF REPORT -



## Using Filter to narrow down the search range

In [12]:
import operator

from min_vec.structures.filter import Filter, FieldCondition, MatchField, IDCondition, MatchID


collection.query(
    vector=np.random.random(128), 
    k=10, 
    query_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            IDCondition(MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        # any=[
        #     FieldCondition(key='field', matcher=MatchField('test_1')),
        #     FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
        #     IDCondition(MatchID([1, 2, 3, 4, 5])),
        # ]
    )
)

(array([409, 300, 364, 654, 762, 767, 204, 568, 590, 370]),
 array([0.82831156, 0.81029993, 0.8094672 , 0.80870515, 0.80779004,
        0.80695492, 0.80680823, 0.80619109, 0.80531538, 0.8032192 ]))