In [1]:
import lynse

In [2]:
client = lynse.VectorDBClient("http://127.0.0.1:7637")
my_db = client.create_database("my_vec_db", drop_if_exists=True)

## Use FLAT index mode

FLAT mode is a brute force search mode, so performance decays linearly with increasing data size.

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, drop_if_exists=True)

In [4]:
import numpy as np

with collection.insert_session() as session:
    vectors = []
    for i in range(1000000):
        if i == 0:
            search_vec = np.random.random(128)
            vec = search_vec
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    session.bulk_add_items(vectors)

Adding items: 100%|██████████| 1000/1000 [00:21<00:00, 46.46batch/s]

2024-06-18 11:50:56 - LynseDB - INFO - Task status: {'status': 'Processing'}
2024-06-18 11:50:58 - LynseDB - INFO - Task status: {'result': {'collection_name': 'test_vec', 'database_name': 'my_vec_db'}, 'status': 'Success'}

In [5]:
collection.search(vector=search_vec, k=5)

(array([     0, 890676,  34931, 900812, 719672]),
 array([0.9999969 , 0.84619355, 0.84239531, 0.82766622, 0.82372534]),
 None)

In [6]:
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (1000000, 128)
| - Search Time: 0.06348 s
| - Search Distance: cosine
| - Search K: 5
| - Top 5 Results ID: [     0 890676  34931 900812 719672]
| - Top 5 Results Similarity: [0.9999969  0.84619355 0.84239531 0.82766622 0.82372534]



## Use IVF-FLAT index mode

IVF-FLAT uses inverted indexes for significant speedups on large-scale data.


Note that IVF-FLAT mode will only take effect if the number of rows added to the collection has reached 100,000; below 100,000 rows, it will fall back to FLAT mode.

In [7]:
collection = my_db.require_collection(collection='test_vec2', dim=128, drop_if_exists=True, chunk_size=10000, use_cache=False)

In [8]:
from tqdm import trange

import numpy as np

with collection.insert_session() as session:
    vectors = []
    for i in range(1000000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
        
    session.bulk_add_items(vectors)

collection.build_index("IVF-FLAT")

Adding items: 100%|██████████| 1000/1000 [00:21<00:00, 45.65batch/s]

2024-06-18 11:51:53 - LynseDB - INFO - Task status: {'status': 'Processing'}
2024-06-18 11:51:55 - LynseDB - INFO - Task status: {'result': {'collection_name': 'test_vec2', 'database_name': 'my_vec_db'}, 'status': 'Success'}

{'status': 'success',
 'params': {'database_name': 'my_vec_db',
  'collection_name': 'test_vec2',
  'index_mode': 'IVF-FLAT'}}

In [14]:
collection.search(vector=search_vec, k=5)
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (1000000, 128)
| - Search Time: 0.11089 s
| - Search Distance: cosine
| - Search K: 5
| - Top 5 Results ID: [254174 447460 186416 321505 630247]
| - Top 5 Results Similarity: [0.86221623 0.86213523 0.8573665  0.85304284 0.85047865]

