In [1]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

import numpy as np

from spinesUtils.utils import Timer
from db_src import MinVectorDB

timer = Timer()



# ===================================================================
# ========================= DEMO 1 ==================================
# ===================================================================
# Demo 1 -- Sequentially add vectors.
# Create a MinVectorDB instance.
print("# ==========================================")
print("# This is the demonstration area for Demo 1 -- Sequentially add vectors.")

timer.start()
db = MinVectorDB(dim=1024, database_path='test.npy', chunk_size=100)

np.random.seed(23)

# Define the initial ID.
id = 0
for t in np.random.random((10000, 1024)):
    # Vectors need to be normalized before writing to the database.
    t = t / np.linalg.norm(t) 
    db.add_item(t, id=id)
    
    # ID increments by 1 with each loop iteration.
    id += 1
db.save()
print(f"\n**[Insert data] Time cost {timer.last_timestamp_diff():>.4f} s.**")
timer.middle_point()

res = db.query(db.head(10)[0], k=10)
print("  - Query vector: ", db.head(10)[0])
print("  - Database index of top 10 results: ", res[0])
print("  - Cosine similarity of top 10 results: ", res[1])
print(f"\n**[Query data] Time cost {timer.last_timestamp_diff():>.4f} s.**")
timer.middle_point()

print("# ==========================================")



# For demonstrating Demo2, the database created in Demo1 needs to be deleted, but this operation is not required in actual use.
db.delete()

del db




# ===================================================================
# ========================= DEMO 2 ==================================
# ===================================================================
# Demo 2 -- Bulk add vectors.
print("\n# ==========================================")
print("# This is the demonstration area for Demo 2 -- Bulk add vectors.")

timer.middle_point()

db = MinVectorDB(dim=1024, database_path='test.npy', chunk_size=100)

np.random.seed(23)

# Define the initial ID.
id = 0
vectors = []
for t in np.random.random((10000, 1024)):
    # Vectors need to be normalized before writing to the database.
    t = t / np.linalg.norm(t) 
    vectors.append((t, id))
    # ID increments by 1 with each loop iteration.
    id += 1
    
db.bulk_add_items(vectors)
db.save()

print(f"\n**[Insert data] Time cost {timer.last_timestamp_diff():>.4f} s.**")
timer.middle_point()

res = db.query(db.head(10)[0], k=10)
print("  - Query vector: ", db.head(10)[0])
print("  - Database index of top 10 results: ", res[0])
print("  - Cosine similarity of top 10 results: ", res[1])
print(f"\n**[Query data] Time cost {timer.last_timestamp_diff():>.4f} s.**")

print("# ==========================================")


timer.end()
# This operation is not required in actual use.
db.delete()

# This is the demonstration area for Demo 1 -- Sequentially add vectors.

**[Insert data] Time cost 2.4945 s.**
  - Query vector:  [0.02898663 0.05306277 0.04289231 ... 0.0143056  0.01658325 0.04808333]
  - Database index of top 10 results:  [   0 5788  842  202 6658  396 5116 9447 1245 2393]
  - Cosine similarity of top 10 results:  [1.         0.77570757 0.77242908 0.77178528 0.77165615 0.77129891
 0.77062634 0.77019239 0.76990888 0.76983951]

**[Query data] Time cost 0.0319 s.**

# This is the demonstration area for Demo 2 -- Bulk add vectors.

**[Insert data] Time cost 2.8534 s.**
  - Query vector:  [0.02898663 0.05306277 0.04289231 ... 0.0143056  0.01658325 0.04808333]
  - Database index of top 10 results:  [   0 5788  842  202 6658  396 5116 9447 1245 2393]
  - Cosine similarity of top 10 results:  [1.         0.77570757 0.77242908 0.77178528 0.77165615 0.77129891
 0.77062634 0.77019239 0.76990888 0.76983951]

**[Query data] Time cost 0.0298 s.**
