In [1]:
from min_vec import MinVectorDB

In [2]:
my_db = MinVectorDB('my_vec_db')

MinVectorDB - INFO - Successful initialization of MinVectorDB in root_path: /Users/guobingming/projects/MinVectorDB/tutorials/my_vec_db


In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True, warm_up=True)

MinVectorDB - INFO - Creating collection test_vec with: 
//    dim=128, collection='test_vec', 
//    n_clusters=16, chunk_size=100000,
//    distance='cosine', index_mode='FLAT', 
//    dtypes='float32', use_cache=True, 
//    scaler_bits=8, n_threads=10
MinVectorDB - INFO - Collection 'test_vec' already exists. Dropped.


In [4]:
from tqdm import trange

import numpy as np

max_id = collection.get_max_id()
with collection.insert_session():
    for i in trange(max_id + 1, max_id + 10001, unit="vectors"):
        if i == max_id + 1:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)
        collection.add_item(vec, id=i, field={"test": f"test_{i // 1000}"})

100%|██████████| 10000/10000 [00:00<00:00, 182838.01vectors/s]


## Run a query

In [5]:
collection.query(np.random.random(128), k=12)

(array([9289,  812, 8066, 3266, 2831, 6155, 7262, 3198, 3366, 1613,  237,
        9027]),
 Array([0.8408177 , 0.83601075, 0.8340273 , 0.83262867, 0.8311916 ,
        0.8307878 , 0.8287518 , 0.82785505, 0.8275203 , 0.8269049 ,
        0.8258995 , 0.82415515], dtype=float32))

## Print the most recent query report

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Database Shape: (10000, 128)
| - Query Time: 0.09373 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [9289  812 8066 3266 2831 6155 7262 3198 3366 1613  237 9027]
| - Top 12 Results Similarity: [0.840818 0.836011 0.834027 0.832629 0.831192 0.830788 0.828752 0.827855
 0.82752  0.826905 0.8259   0.824155]
* - END OF REPORT -



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

MinVectorDB - INFO - Creating collection test_vec with: 
//    dim=None, collection='test_vec', 
//    n_clusters=16, chunk_size=100000,
//    distance='cosine', index_mode='IVF-FLAT', 
//    dtypes='float32', use_cache=True, 
//    scaler_bits=8, n_threads=12
MinVectorDB - INFO - Collection 'test_vec' already exists. Loaded.


In [8]:
import numpy as np

collection.query(np.random.random(128), k=12)

(array([5556,  327, 5476, 6571, 4005, 1635, 3167, 2342, 9562, 5703, 5164,
        8478]),
 Array([0.87152505, 0.86520463, 0.8459563 , 0.8448862 , 0.8424814 ,
        0.83888644, 0.83759296, 0.8366169 , 0.83652925, 0.83631235,
        0.8362526 , 0.8348547 ], dtype=float32))

In [9]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Database Shape: (10000, 128)
| - Query Time: 0.00199 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [5556  327 5476 6571 4005 1635 3167 2342 9562 5703 5164 8478]
| - Top 12 Results Similarity: [0.871525 0.865205 0.845956 0.844886 0.842481 0.838886 0.837593 0.836617
 0.836529 0.836312 0.836253 0.834855]
* - END OF REPORT -



## Using Filter to narrow down the search range

In [10]:
import operator

from min_vec.structures.filter import Filter, FieldCondition, MatchField, IDCondition, MatchID


collection.query(
    vector=np.random.random(128), 
    k=10, 
    query_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            IDCondition(MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        # The must and any parameters cannot be specified at the same time.
        # any=[
        #     FieldCondition(key='field', matcher=MatchField('test_1')),
        #     FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
        #     IDCondition(MatchID([1, 2, 3, 4, 5])),
        # ]
    )
)

(array([1, 2, 4, 5, 3]),
 Array([0.80172694, 0.74257493, 0.738181  , 0.7129409 , 0.70257354],      dtype=float32))