In [1]:
from min_vec import MinVectorDB

In [2]:
my_db = MinVectorDB("http://localhost:7637")
# or specify a path
# my_db = MinVectorDB("my_vec")

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True, warm_up=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:01<00:00, 60.48batch/s]


## Run a query

In [5]:
collection.query(np.random.random(128), k=12)

(array([35225, 61658, 56937, 22370, 76929, 82723, 39522, 83529, 75406,
        65492, 65899, 17413]),
 array([0.84384322, 0.8412385 , 0.83857441, 0.83661389, 0.83605361,
        0.83569294, 0.83545423, 0.83271849, 0.83241564, 0.83194226,
        0.8318367 , 0.83105266]))

## Print the most recent query report

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.14791 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [35225 61658 56937 22370 76929 82723 39522 83529 75406 65492 65899 17413]
| - Top 12 Results Similarity: [0.84384322 0.8412385  0.83857441 0.83661389 0.83605361 0.83569294
 0.83545423 0.83271849 0.83241564 0.83194226 0.8318367  0.83105266]
* - END OF REPORT -



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [8]:
import numpy as np

collection.query(np.random.random(128), k=12)

(array([93933,  2038, 84168, 78918,  6980, 82806, 92899, 75307, 35830,
        63711, 33628, 41216]),
 array([0.85993505, 0.85963386, 0.85962951, 0.8580128 , 0.85729456,
        0.8571651 , 0.85711849, 0.85669434, 0.85655409, 0.85622203,
        0.85599816, 0.85424376]))

In [9]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.03766 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [93933  2038 84168 78918  6980 82806 92899 75307 35830 63711 33628 41216]
| - Top 12 Results Similarity: [0.85993505 0.85963386 0.85962951 0.8580128  0.85729456 0.8571651
 0.85711849 0.85669434 0.85655409 0.85622203 0.85599816 0.85424376]
* - END OF REPORT -



## Using Filter to narrow down the search range

In [10]:
import operator

from min_vec.structures.filter import Filter, FieldCondition, MatchField, IDCondition, MatchID


collection.query(
    vector=np.random.random(128), 
    k=10, 
    query_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            IDCondition(MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        # any=[
        #     FieldCondition(key='field', matcher=MatchField('test_1')),
        #     FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
        #     IDCondition(MatchID([1, 2, 3, 4, 5])),
        # ]
    )
)

(array([762, 615, 373, 182, 377, 852,  19,  87,  72, 608]),
 array([0.83052421, 0.81398219, 0.81066859, 0.80477023, 0.80089283,
        0.7998516 , 0.79862028, 0.79467428, 0.79415911, 0.79328895]))