In [1]:
from min_vec import MinVectorDB

In [2]:
my_db = MinVectorDB("http://localhost:7637")
# or specify a path
# my_db = MinVectorDB("my_vec")

## Use FLAT index mode

FLAT mode is a brute force search mode, so performance decays linearly with increasing data size.

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True)

In [4]:

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:02<00:00, 46.26batch/s]


Adding items: 100%|██████████| 100/100 [00:02<00:00, 45.67batch/s]


In [6]:
collection.query(vector=query, k=5)

(array([     0,  93766,  72221, 164287,  31034]),
 array([0.99663687, 0.83784258, 0.83720052, 0.83609754, 0.83343524]))

In [7]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (200000, 128)
| - Query Time: 0.18736 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [     0  93766  72221 164287  31034]
| - Top 5 Results Similarity: [0.99663687 0.83784258 0.83720052 0.83609754 0.83343524]
* - END OF REPORT -



## Use IVF-FLAT index mode

IVF-FLAT uses inverted indexes for significant speedups on large-scale data.


Note that IVF-FLAT mode will only take effect if the number of rows added to the collection has reached 100,000; below 100,000 rows, it will fall back to FLAT mode.

In [11]:
collection = my_db.require_collection(collection='test_vec2', dim=128, index_mode='IVF-FLAT', drop_if_exists=True, chunk_size=10000)

In [12]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
        
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:01<00:00, 53.86batch/s]


In [13]:
collection.query(vector=query, k=5)

(array([    0, 28076, 33346, 25574, 53443, 28211, 89654, 38817, 70201,
        47820]),
 array([0.99427956, 0.83509803, 0.82248974, 0.8204155 , 0.81968242,
        0.81914139, 0.81905454, 0.81847763, 0.81813222, 0.81808954]))

In [14]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.02555 s
| - Query Distance: cosine
| - Query K: 10
| - Top 10 Results ID: [    0 28076 33346 25574 53443 28211 89654 38817 70201 47820]
| - Top 10 Results Similarity: [0.99427956 0.83509803 0.82248974 0.8204155  0.81968242 0.81914139
 0.81905454 0.81847763 0.81813222 0.81808954]
* - END OF REPORT -

