In [1]:
import min_vec

In [2]:
client = min_vec.VectorDBClient("http://localhost:7637")
my_db = client.create_database("my_vec_db", drop_if_exists=False)

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, drop_if_exists=True, warm_up=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session() as session:
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    session.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:01<00:00, 51.88batch/s]


## Run a query

In [5]:
collection.search(np.random.random(128), k=12)

(array([89739, 67302, 86506, 35637, 53829, 90629, 16648, 79609,  9411,
        48882, 11022, 27272]),
 array([5.99303675, 5.94914007, 5.92649078, 5.87868452, 5.87307835,
        5.84338474, 5.83773851, 5.82701254, 5.82503891, 5.82236385,
        5.82101488, 5.81897736]))

## Print the most recent query report

In [6]:
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (100000, 128)
| - Search Time: 0.15519 s
| - Search Distance: cosine
| - Search K: 12
| - Top 12 Results ID: [89739 67302 86506 35637 53829 90629 16648 79609  9411 48882 11022 27272]
| - Top 12 Results Similarity: [5.99303675 5.94914007 5.92649078 5.87868452 5.87307835 5.84338474
 5.83773851 5.82701254 5.82503891 5.82236385 5.82101488 5.81897736]



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [8]:
import numpy as np

collection.search(np.random.random(128), k=12)

(array([42653, 29634, 71011, 47453, 91977, 27724, 86506, 62559, 40592,
         6039, 39087,  7352]),
 array([6.00953531, 6.00372791, 5.99135399, 5.98910761, 5.96022749,
        5.95478773, 5.95436335, 5.95208073, 5.93630934, 5.92994547,
        5.92841339, 5.92321777]))

In [9]:
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (100000, 128)
| - Search Time: 0.01304 s
| - Search Distance: cosine
| - Search K: 12
| - Top 12 Results ID: [42653 29634 71011 47453 91977 27724 86506 62559 40592  6039 39087  7352]
| - Top 12 Results Similarity: [6.00953531 6.00372791 5.99135399 5.98910761 5.96022749 5.95478773
 5.95436335 5.95208073 5.93630934 5.92994547 5.92841339 5.92321777]



## Using Filter to narrow down the search range

In [10]:
import operator

from min_vec.field_models import Filter, FieldCondition, MatchField, MatchID


collection.search(
    vector=np.random.random(128), 
    k=10, 
    search_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            FieldCondition(key=":match_id:", matcher=MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        any=[
            FieldCondition(key='field', matcher=MatchField('test_1')),
            FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
            FieldCondition(key=":match_id:", matcher=MatchID([1, 2, 3, 4, 5])),
        ]
    )
)

(array([1, 3, 5, 4, 2]),
 array([5.25115204, 5.11612701, 4.93157673, 4.752244  , 4.59541464]))