In [1]:
from min_vec import MinVectorDB

In [11]:
# Local server
my_db = MinVectorDB("http://localhost:7637")
# or if in docker
# my_db = MinVectorDB("http://localhost:5403")
# or specify a path
# my_db = MinVectorDB("my_vec")

## Use FLAT index mode

FLAT mode is a brute force search mode, so performance decays linearly with increasing data size.

In [12]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True)

In [13]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

In [14]:
collection.query(vector=query, k=5)

(array([    0, 83015, 83514, 81066, 20269]),
 array([0.9964596 , 0.84821522, 0.84683108, 0.8449266 , 0.84055191]))

In [15]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.18751 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [    0 83015 83514 81066 20269]
| - Top 5 Results Similarity: [0.9964596  0.84821522 0.84683108 0.8449266  0.84055191]
* - END OF REPORT -



## Use IVF-FLAT index mode

IVF-FLAT uses inverted indexes for significant speedups on large-scale data.


Note that IVF-FLAT mode will only take effect if the number of rows added to the collection has reached 100,000; below 100,000 rows, it will fall back to FLAT mode.

In [16]:
collection = my_db.require_collection(collection='test_vec2', dim=128, index_mode='IVF-FLAT', drop_if_exists=True, chunk_size=10000)

In [17]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

In [18]:
collection.query(vector=query, k=5)

(array([    0, 77621, 51787, 23614, 65492]),
 array([0.99815422, 0.83720356, 0.82392925, 0.82383007, 0.82344675]))

In [19]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.07176 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [    0 77621 51787 23614 65492]
| - Top 5 Results Similarity: [0.99815422 0.83720356 0.82392925 0.82383007 0.82344675]
* - END OF REPORT -

