In [1]:
from min_vec import MinVectorDB

In [2]:
my_db = MinVectorDB("http://localhost:7637")
# or specify a path
# my_db = MinVectorDB("my_vec")

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True, warm_up=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:02<00:00, 39.70batch/s]


## Run a query

In [5]:
collection.query(np.random.random(128), k=12)

(array([47542,  5297, 50501, 38376, 75389, 56181, 33856, 56266, 29752,
        51642, 37435, 63887]),
 array([0.84904402, 0.84768826, 0.84427691, 0.84418142, 0.84163976,
        0.83989358, 0.83934748, 0.8393296 , 0.83839804, 0.83826619,
        0.83795744, 0.83768702]))

## Print the most recent query report

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.12654 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [47542  5297 50501 38376 75389 56181 33856 56266 29752 51642 37435 63887]
| - Top 12 Results Similarity: [0.84904402 0.84768826 0.84427691 0.84418142 0.84163976 0.83989358
 0.83934748 0.8393296  0.83839804 0.83826619 0.83795744 0.83768702]
* - END OF REPORT -



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [8]:
import numpy as np

collection.query(np.random.random(128), k=12)

(array([69745, 55507, 63419, 40495, 11339, 23506, 13687, 20820,  4482,
         8171, 65217, 14389]),
 array([0.82578266, 0.82334852, 0.82158798, 0.82148987, 0.82130849,
        0.81564808, 0.81450498, 0.8143751 , 0.8137145 , 0.81340706,
        0.81124794, 0.81000352]))

In [9]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.01921 s
| - Query Distance: cosine
| - Query K: 12
| - Top 12 Results ID: [69745 55507 63419 40495 11339 23506 13687 20820  4482  8171 65217 14389]
| - Top 12 Results Similarity: [0.82578266 0.82334852 0.82158798 0.82148987 0.82130849 0.81564808
 0.81450498 0.8143751  0.8137145  0.81340706 0.81124794 0.81000352]
* - END OF REPORT -



## Using Filter to narrow down the search range

In [10]:
import operator

from min_vec.core_components.filter import Filter, FieldCondition, MatchField, IDCondition, MatchID


collection.query(
    vector=np.random.random(128), 
    k=10, 
    query_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            IDCondition(MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        # any=[
        #     FieldCondition(key='field', matcher=MatchField('test_1')),
        #     FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
        #     IDCondition(MatchID([1, 2, 3, 4, 5])),
        # ]
    )
)

(array([5, 1, 3, 4, 2]),
 array([0.79130077, 0.75929183, 0.75263286, 0.73578966, 0.72570997]))