In [1]:
import lynse

In [2]:
client = lynse.VectorDBClient("http://127.0.0.1:7637")
my_db = client.create_database("my_vec_db", drop_if_exists=True)

In [3]:
collection = my_db.require_collection(collection='test_vec', dim=128, drop_if_exists=True, warm_up=True)

In [4]:
from tqdm import trange

import numpy as np

with collection.insert_session() as session:
    vectors = []
    for i in range(1000000):
        if i == 0:
            query = np.random.random(128)
            vec = query
        else:
            vec = np.random.random(128)

        vectors.append((vec, i, {"test":f"test_{i // 1000}"}))
    session.bulk_add_items(vectors)

Adding items: 100%|██████████| 1000/1000 [00:23<00:00, 43.02batch/s]

2024-06-18 11:54:26 - LynseDB - INFO - Task status: {'status': 'Processing'}
2024-06-18 11:54:28 - LynseDB - INFO - Task status: {'result': {'collection_name': 'test_vec', 'database_name': 'my_vec_db'}, 'status': 'Success'}

## Run a query

In [5]:
collection.search(np.random.random(128), k=12)

(array([442186, 369906, 760258, 417597, 824217, 852957, 751271, 563815,
         63866, 502490, 612428, 469497]),
 array([0.83674979, 0.82599759, 0.82425785, 0.82280737, 0.82215673,
        0.81662929, 0.81500018, 0.81495905, 0.80531317, 0.80293918,
        0.79240268, 0.7873373 ]),
 None)

## Print the most recent query report

In [6]:
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (1000000, 128)
| - Search Time: 0.06849 s
| - Search Distance: cosine
| - Search K: 12
| - Top 12 Results ID: [442186 369906 760258 417597 824217 852957 751271 563815  63866 502490
 612428 469497]
| - Top 12 Results Similarity: [0.83674979 0.82599759 0.82425785 0.82280737 0.82215673 0.81662929
 0.81500018 0.81495905 0.80531317 0.80293918 0.79240268 0.7873373 ]



## Using the threads to speed up queries

In [7]:
collection = my_db.require_collection('test_vec', warm_up=True, n_threads=12)

In [8]:
import numpy as np

collection.search(np.random.random(128), k=12)

(array([ 46875, 785480, 480978,  67796, 721626, 751271, 433003, 376463,
         63866, 424603, 737830, 958699]),
 array([0.82882881, 0.82197392, 0.82114285, 0.8172878 , 0.81713367,
        0.81522357, 0.81377411, 0.81365538, 0.81216002, 0.80962861,
        0.80810547, 0.80175018]),
 None)

In [9]:
print(collection.search_report_)


* - MOST RECENT SEARCH REPORT -
| - Collection Shape: (1000000, 128)
| - Search Time: 0.10288 s
| - Search Distance: cosine
| - Search K: 12
| - Top 12 Results ID: [ 46875 785480 480978  67796 721626 751271 433003 376463  63866 424603
 737830 958699]
| - Top 12 Results Similarity: [0.82882881 0.82197392 0.82114285 0.8172878  0.81713367 0.81522357
 0.81377411 0.81365538 0.81216002 0.80962861 0.80810547 0.80175018]



## Using Filter to narrow down the search range

In [10]:
import operator

from lynse.field_models import Filter, FieldCondition, MatchField, MatchID


collection.search(
    vector=np.random.random(128), 
    k=10, 
    search_filter=Filter(
        must=[
            FieldCondition(key='test', matcher=MatchField('test_0')),  # Support for filtering fields
            FieldCondition(key=":match_id:", matcher=MatchID([1, 2, 3, 4, 5]))  # Support for filtering IDs
        ], 
        any=[
            FieldCondition(key='field', matcher=MatchField('test_1')),
            FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
            FieldCondition(key=":match_id:", matcher=MatchID([1, 2, 3, 4, 5])),
        ]
    )
)

(array([2, 5, 1, 3, 4]),
 array([0.80089688, 0.78131926, 0.75293136, 0.72805262, 0.71561301]),
 None)