In [1]:
import min_vec

In [2]:
client = min_vec.VectorDBClient("http://localhost:7637")
my_db = client.create_database("my_vec_db", drop_if_exists=False)

## Use FLAT index mode

FLAT mode is a brute force search mode, so performance decays linearly with increasing data size.

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, drop_if_exists=True)

In [4]:
import numpy as np

with collection.insert_session() as session:
    vectors = []
    for i in range(1000000):
        if i == 0:
            search_vec = np.random.random(128)
            vec = search_vec
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    session.bulk_add_items(vectors)

Adding items: 100%|██████████| 1000/1000 [00:22<00:00, 43.63batch/s]


In [5]:
collection.search(vector=search_vec, k=5)

(array([     0, 380618, 515925, 703707, 964683]),
 array([6.78201008, 6.1760006 , 6.14976549, 6.11931086, 6.10258436]))

In [6]:
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (1000000, 128)
| - Search Time: 0.02995 s
| - Search Distance: cosine
| - Search K: 5
| - Top 5 Results ID: [     0 380618 515925 703707 964683]
| - Top 5 Results Similarity: [6.78201008 6.1760006  6.14976549 6.11931086 6.10258436]



## Use IVF-FLAT index mode

IVF-FLAT uses inverted indexes for significant speedups on large-scale data.


Note that IVF-FLAT mode will only take effect if the number of rows added to the collection has reached 100,000; below 100,000 rows, it will fall back to FLAT mode.

In [7]:
collection = my_db.require_collection(collection='test_vec2', dim=128, drop_if_exists=True, chunk_size=10000, use_cache=False)

In [8]:
from tqdm import trange

import numpy as np

with collection.insert_session() as session:
    vectors = []
    for i in range(1000000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
        
    session.bulk_add_items(vectors)

collection.build_index("IVF-FLAT")

Adding items: 100%|██████████| 1000/1000 [00:24<00:00, 40.33batch/s]


In [19]:
collection.search(vector=search_vec, k=5)
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (1000000, 128)
| - Search Time: 0.26925 s
| - Search Distance: cosine
| - Search K: 5
| - Top 5 Results ID: [ 21671 763098 829277 893376 449564]
| - Top 5 Results Similarity: [5.97338295 5.80930614 5.79313946 5.78817749 5.76111984]

