In [1]:
from min_vec import MinVectorDB

In [2]:
# Local server
my_db = MinVectorDB("http://localhost:7637")
# or if in docker
# my_db = MinVectorDB("http://localhost:5403")
# or specify a path
# my_db = MinVectorDB("my_vec")

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True, warm_up=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

## Run a query

In [5]:
collection.query(np.random.random(128), k=12)

(array([ 7679, 90229, 24612, 68119, 21638, 58263, 46543,  6583, 27181,
        21306, 39178, 99607]),
 array([0.85250688, 0.85047561, 0.85037601, 0.85004568, 0.84859741,
        0.8456679 , 0.84561676, 0.84544003, 0.84468186, 0.84423292,
        0.84214365, 0.84154063]))

## Print the most recent query report

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.15326 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [ 7679 90229 24612 68119 21638 58263 46543  6583 27181 21306 39178 99607]
| - Top 12 Results Similarity: [0.85250688 0.85047561 0.85037601 0.85004568 0.84859741 0.8456679
 0.84561676 0.84544003 0.84468186 0.84423292 0.84214365 0.84154063]
* - END OF REPORT -



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [8]:
import numpy as np

collection.query(np.random.random(128), k=12)

(array([24281, 90331, 11688, 10765, 68975, 42201, 70065, 80765, 96349,
        29354, 91688,  8939]),
 array([0.85525101, 0.85452056, 0.852422  , 0.84794348, 0.84703207,
        0.84698379, 0.84631741, 0.84555084, 0.84501308, 0.84477574,
        0.84126043, 0.84109133]))

In [9]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.02677 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [24281 90331 11688 10765 68975 42201 70065 80765 96349 29354 91688  8939]
| - Top 12 Results Similarity: [0.85525101 0.85452056 0.852422   0.84794348 0.84703207 0.84698379
 0.84631741 0.84555084 0.84501308 0.84477574 0.84126043 0.84109133]
* - END OF REPORT -



## Using Filter to narrow down the search range

In [10]:
import operator

from min_vec.structures.filter import Filter, FieldCondition, MatchField, IDCondition, MatchID


collection.query(
    vector=np.random.random(128), 
    k=10, 
    query_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            IDCondition(MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        # any=[
        #     FieldCondition(key='field', matcher=MatchField('test_1')),
        #     FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
        #     IDCondition(MatchID([1, 2, 3, 4, 5])),
        # ]
    )
)

(array([625, 746, 954, 922, 767, 358, 545, 259, 180, 477]),
 array([0.81069911, 0.80934203, 0.8072319 , 0.80611736, 0.8055706 ,
        0.80454683, 0.80333745, 0.79985464, 0.79888523, 0.79844809]))