In [1]:
from min_vec import MinVectorDB

In [2]:
# Local server
my_db = MinVectorDB("http://localhost:7637")
# or if in docker
# my_db = MinVectorDB("http://localhost:5403")
# or specify a path
# my_db = MinVectorDB("my_vec")

## Use FLAT index mode

FLAT mode is a brute force search mode, so performance decays linearly with increasing data size.

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, index_mode='FLAT', drop_if_exists=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

In [5]:
collection.query(vector=query, k=5)

(array([    0, 85838, 61597, 47268, 48315]),
 array([0.99657476, 0.843458  , 0.84294021, 0.83954662, 0.83554995]))

In [6]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.12786 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [    0 85838 61597 47268 48315]
| - Top 5 Results Similarity: [0.99657476 0.843458   0.84294021 0.83954662 0.83554995]
* - END OF REPORT -



## Use IVF-FLAT index mode

IVF-FLAT uses inverted indexes for significant speedups on large-scale data.


Note that IVF-FLAT mode will only take effect if the number of rows added to the collection has reached 100,000; below 100,000 rows, it will fall back to FLAT mode.

In [7]:
collection = my_db.require_collection(collection='test_vec2', dim=128, index_mode='IVF-FLAT', drop_if_exists=True, chunk_size=10000)

In [8]:
from tqdm import trange

import numpy as np

with collection.insert_session():
    vectors = []
    for i in range(100000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    collection.bulk_add_items(vectors)

In [9]:
collection.query(vector=query, k=5)

(array([    0, 41509, 39325, 31824, 17708]),
 array([0.99684113, 0.82527226, 0.81985044, 0.81840128, 0.81757426]))

In [10]:
print(collection.query_report_)


* - MOST RECENT QUERY REPORT -
| - Collection Shape: (100000, 128)
| - Query Time: 0.06284 s
| - Query Distance: cosine
| - Query K: 5
| - Top 5 Results ID: [    0 41509 39325 31824 17708]
| - Top 5 Results Similarity: [0.99684113 0.82527226 0.81985044 0.81840128 0.81757426]
* - END OF REPORT -

