In [1]:
from min_vec.field_models import VeloKV, IndexSchema, MatchField, MatchID, FieldCondition, Filter, MatchRange

In [2]:
kv = VeloKV("test_kv", as_temp_file=False)

In [3]:
from tqdm import tqdm
for i in tqdm(range(1000000)):
    kv.store(data={"field": f"test_{i // 10000}", "order": i + 1}, external_id=i)

kv.commit()

100%|██████████| 1000000/1000000 [00:02<00:00, 456971.13it/s]


In [4]:
kv.storage.retrieve_by_external_id(900061)

{900061: {'field': 'test_90', 'order': 900062}}

In [5]:
kv.retrieve_ids([1, 1212, 12422, 204654])

[{'id': 1, 'field': 'test_0', 'order': 2},
 {'id': 1212, 'field': 'test_0', 'order': 1213},
 {'id': 12422, 'field': 'test_1', 'order': 12423},
 {'id': 204654, 'field': 'test_20', 'order': 204655}]

In [3]:
import operator
query_filter = Filter(
        must=[
            FieldCondition(key='field', matcher=MatchField('test_0')),  # Support for filtering fields
            FieldCondition(key='order', matcher=MatchRange(7, 20, inclusive='left'))
        ], 
        any=[
            FieldCondition(key='order', matcher=MatchField(8, comparator=operator.ge)),
            FieldCondition(matcher=MatchID([1, 2, 3, 4, 5, 6, 7, 8, 9])),  # Support for filtering IDs
        ],
        must_not=[
            FieldCondition(matcher=MatchID([5])),
            FieldCondition(key='order', matcher=MatchField(21, comparator=operator.ge)),
        ]
)

In [5]:
%prun kv.query(query_filter, return_ids_only=False)

 

         22040138 function calls in 6.713 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  1000001    2.836    0.000    3.099    0.000 kv_storage.py:113(retrieve_all)
  1000000    0.814    0.000    3.312    0.000 kv_query.py:148(_matches_filter)
  2010014    0.734    0.000    1.627    0.000 filter.py:130(evaluate)
  2000013    0.583    0.000    0.807    0.000 filter.py:64(match)
  2000000    0.331    0.000    1.137    0.000 kv_query.py:153(<genexpr>)
        1    0.302    0.302    6.712    6.712 kv_query.py:181(_normal_query)
  2010000    0.293    0.000    1.114    0.000 kv_query.py:150(<genexpr>)
  2000001    0.142    0.000    0.142    0.000 {method 'read' of '_io.BufferedReader' objects}
  2000013    0.126    0.000    0.126    0.000 {built-in method builtins.isinstance}
  1000000    0.126    0.000    1.189    0.000 {built-in method builtins.all}
  1000013    0.122    0.000    1.209    0.000 {built-in method builtins.any}

In [None]:
22040138 function calls in 20.440 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  1000001   16.382    0.000   16.653    0.000 kv_storage.py:113(retrieve_all)
  1000000    0.875    0.000    3.467    0.000 kv_query.py:148(_matches_filter)
  2010014    0.740    0.000    1.666    0.000 filter.py:130(evaluate)
  2000013    0.610    0.000    0.839    0.000 filter.py:64(match)
  2000000    0.351    0.000    1.167    0.000 kv_query.py:153(<genexpr>)
        1    0.320    0.320   20.440   20.440 kv_query.py:181(_normal_query)
  2010000    0.318    0.000    1.168    0.000 kv_query.py:150(<genexpr>)
  2000001    0.146    0.000    0.146    0.000 {method 'read' of '_io.BufferedReader' objects}
  1000000    0.132    0.000    1.248    0.000 {built-in method builtins.all}
  2000013    0.129    0.000    0.129    0.000 {built-in method builtins.isinstance}
  1000013    0.125    0.000    1.245    0.000 {built-in method builtins.any}
  2010013    0.085    0.000    0.085    0.000 {method 'get' of 'dict' objects}
  1000000    0.067    0.000    0.067    0.000 {built-in method from_bytes}
  1000000    0.059    0.000    0.059    0.000 {method 'items' of 'dict' objects}
  1000000    0.051    0.000    0.051    0.000 {built-in method _operator.eq}
  1000013    0.049    0.000    0.049    0.000 {built-in method _operator.ge}
    10000    0.002    0.000    0.002    0.000 filter.py:24(match)
        1    0.000    0.000    0.000    0.000 {built-in method io.open}
        1    0.000    0.000   20.440   20.440 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {built-in method posix.stat}
       27    0.000    0.000    0.000    0.000 kv_query.py:159(<genexpr>)
        1    0.000    0.000    0.000    0.000 {method '__exit__' of '_io._IOBase' objects}
        1    0.000    0.000   20.440   20.440 <string>:1(<module>)
        1    0.000    0.000   20.440   20.440 kv_query.py:217(query)
        2    0.000    0.000    0.000    0.000 pathlib.py:546(__fspath__)
        1    0.000    0.000    0.000    0.000 <frozen genericpath>:16(exists)
        1    0.000    0.000   20.440   20.440 __init__.py:32(query)
       13    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'update' of 'set' objects}
        2    0.000    0.000    0.000    0.000 pathlib.py:536(__str__)
        1    0.000    0.000    0.000    0.000 kv_storage.py:65(auto_commit)
        1    0.000    0.000    0.000    0.000 filter.py:98(match)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}o'/哦、
        ij

In [9]:
schema = IndexSchema({
    "order": int
})

kv.build_index(schema, rebuild_if_exists=True)

In [10]:
kv.query(query_filter, return_ids_only=False)

[(6, {'id': 6, 'field': 'test_0', 'order': 7}),
 (7, {'id': 7, 'field': 'test_0', 'order': 8}),
 (8, {'id': 8, 'field': 'test_0', 'order': 9}),
 (9, {'id': 9, 'field': 'test_0', 'order': 10}),
 (10, {'id': 10, 'field': 'test_0', 'order': 11}),
 (11, {'id': 11, 'field': 'test_0', 'order': 12}),
 (12, {'id': 12, 'field': 'test_0', 'order': 13}),
 (13, {'id': 13, 'field': 'test_0', 'order': 14}),
 (14, {'id': 14, 'field': 'test_0', 'order': 15}),
 (15, {'id': 15, 'field': 'test_0', 'order': 16}),
 (16, {'id': 16, 'field': 'test_0', 'order': 17}),
 (17, {'id': 17, 'field': 'test_0', 'order': 18}),
 (18, {'id': 18, 'field': 'test_0', 'order': 19})]

In [11]:
schema = IndexSchema({
    "field": str,
    "order": int
})

kv.build_index(schema, rebuild_if_exists=True)

In [12]:
kv.query(query_filter, return_ids_only=False)

[(6, {'id': 6, 'field': 'test_0', 'order': 7}),
 (7, {'id': 7, 'field': 'test_0', 'order': 8}),
 (8, {'id': 8, 'field': 'test_0', 'order': 9}),
 (9, {'id': 9, 'field': 'test_0', 'order': 10}),
 (10, {'id': 10, 'field': 'test_0', 'order': 11}),
 (11, {'id': 11, 'field': 'test_0', 'order': 12}),
 (12, {'id': 12, 'field': 'test_0', 'order': 13}),
 (13, {'id': 13, 'field': 'test_0', 'order': 14}),
 (14, {'id': 14, 'field': 'test_0', 'order': 15}),
 (15, {'id': 15, 'field': 'test_0', 'order': 16}),
 (16, {'id': 16, 'field': 'test_0', 'order': 17}),
 (17, {'id': 17, 'field': 'test_0', 'order': 18}),
 (18, {'id': 18, 'field': 'test_0', 'order': 19})]

In [13]:
kv.remove_index("field")

In [14]:
kv.query(query_filter, return_ids_only=False)

[(6, {'id': 6, 'field': 'test_0', 'order': 7}),
 (7, {'id': 7, 'field': 'test_0', 'order': 8}),
 (8, {'id': 8, 'field': 'test_0', 'order': 9}),
 (9, {'id': 9, 'field': 'test_0', 'order': 10}),
 (10, {'id': 10, 'field': 'test_0', 'order': 11}),
 (11, {'id': 11, 'field': 'test_0', 'order': 12}),
 (12, {'id': 12, 'field': 'test_0', 'order': 13}),
 (13, {'id': 13, 'field': 'test_0', 'order': 14}),
 (14, {'id': 14, 'field': 'test_0', 'order': 15}),
 (15, {'id': 15, 'field': 'test_0', 'order': 16}),
 (16, {'id': 16, 'field': 'test_0', 'order': 17}),
 (17, {'id': 17, 'field': 'test_0', 'order': 18}),
 (18, {'id': 18, 'field': 'test_0', 'order': 19})]

In [15]:
kv.remove_index("order")

In [19]:
%prun kv.query(query_filter, return_ids_only=False)

 

         22040138 function calls in 20.440 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  1000001   16.382    0.000   16.653    0.000 kv_storage.py:113(retrieve_all)
  1000000    0.875    0.000    3.467    0.000 kv_query.py:148(_matches_filter)
  2010014    0.740    0.000    1.666    0.000 filter.py:130(evaluate)
  2000013    0.610    0.000    0.839    0.000 filter.py:64(match)
  2000000    0.351    0.000    1.167    0.000 kv_query.py:153(<genexpr>)
        1    0.320    0.320   20.440   20.440 kv_query.py:181(_normal_query)
  2010000    0.318    0.000    1.168    0.000 kv_query.py:150(<genexpr>)
  2000001    0.146    0.000    0.146    0.000 {method 'read' of '_io.BufferedReader' objects}
  1000000    0.132    0.000    1.248    0.000 {built-in method builtins.all}
  2000013    0.129    0.000    0.129    0.000 {built-in method builtins.isinstance}
  1000013    0.125    0.000    1.245    0.000 {built-in method builtins.any

In [None]:
kv

In [16]:
kv.delete()