In [1]:
from min_vec import MinVectorDB

In [2]:
my_db = MinVectorDB("http://localhost:7637")
# or specify a path
# my_db = MinVectorDB("my_vec")

## Use FLAT index mode

FLAT mode is a brute force search mode, so performance decays linearly with increasing data size.

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True)

In [4]:

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:02<00:00, 49.52batch/s]


In [5]:
collection.query(vector=query, k=5)

(array([    0, 85780, 81033, 93391, 14675]),
 array([0.99681091, 0.83603102, 0.83244038, 0.82392895, 0.82269329]))

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.01812 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [    0 85780 81033 93391 14675]
| - Top 5 Results Similarity: [0.99681091 0.83603102 0.83244038 0.82392895 0.82269329]
* - END OF REPORT -



## Use IVF-FLAT index mode

IVF-FLAT uses inverted indexes for significant speedups on large-scale data.


Note that IVF-FLAT mode will only take effect if the number of rows added to the collection has reached 100,000; below 100,000 rows, it will fall back to FLAT mode.

In [7]:
collection = my_db.require_collection(collection='test_vec2', dim=128, index_mode='IVF-FLAT', drop_if_exists=True, chunk_size=10000, use_cache=False)

In [8]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
        
    collection.bulk_add_items(vectors)

Adding items: 100%|██████████| 100/100 [00:02<00:00, 49.33batch/s]


In [9]:
collection.query(vector=query, k=5)

(array([    0, 36103, 61785, 46632, 86766]),
 array([0.99780244, 0.83608764, 0.83415651, 0.8321054 , 0.8320967 ]))

In [10]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.01919 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [    0 36103 61785 46632 86766]
| - Top 5 Results Similarity: [0.99780244 0.83608764 0.83415651 0.8321054  0.8320967 ]
* - END OF REPORT -

