In [2]:
import numpy as np
d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

In [3]:
from faiss_engine import FaissEngine

fe = FaissEngine(embeddings=xb)
fe.search(xq, topk=1)


[(381, 6.8155059814453125)]

In [2]:
import faiss                   # make faiss available
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

True
100000


In [3]:
k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(I[-5:])                  # neighbors of the 5 last queries

[[  0 393 363  78]
 [  1 555 277 364]
 [  2 304 101  13]
 [  3 173  18 182]
 [  4 288 370 531]]
[[0.        7.175174  7.2076287 7.251163 ]
 [0.        6.323565  6.684582  6.799944 ]
 [0.        5.7964087 6.3917365 7.2815127]
 [0.        7.277905  7.5279875 7.6628447]
 [0.        6.763804  7.295122  7.368814 ]]
[[ 381  207  210  477]
 [ 526  911  142   72]
 [ 838  527 1290  425]
 [ 196  184  164  359]
 [ 526  377  120  425]]
[[ 9900 10500  9309  9831]
 [11055 10895 10812 11321]
 [11353 11103 10164  9787]
 [10571 10664 10632  9638]
 [ 9628  9554 10036  9582]]


Faiis

In [1]:
from tinydb import TinyDB, Query
db = TinyDB('./db/db.json')
User = Query()

In [7]:

db.insert({'name': 'John', 'age': 22})
db.search(User.name == 'John')

[{'name': 'John', 'age': 22}]

In [3]:
db.search((User.name == 'John') & (User.age <= 30))

[{'name': 'John', 'age': 22}]

In [4]:
db.search((User.name == 'John') | (User.name == 'Bob'))

[{'name': 'John', 'age': 22}]

In [5]:
db.search((User.age.map(lambda x: x + x) == 44))

[{'name': 'John', 'age': 22}]

In [17]:
# insert documents

db.insert({'type': 'apple', 'count': 7})
db.insert({'type': 'peach', 'count': 3})


# Iterate over stored documents

for item in db:
    print(item)


# Query documents

Fruit = Query()
db.search(Fruit.type == 'peach')
# [{'count': 3, 'type': 'peach'}]
db.search(Fruit.count > 5)
# [{'count': 7, 'type': 'apple'}]

# Remove documents
db.remove(Fruit.count < 5)

In [None]:
# drop db
db.truncate()

# Tables

In [11]:
table = db.table('users')
table.insert({'user_id': 1, "username": "Aemilius" })
table.all()

[{'user_id': 1, 'username': 'Aemilius'}]

In [16]:
el = db.get(User.username == 'Aemilius')
el.doc_id

AttributeError: 'NoneType' object has no attribute 'doc_id'

In [None]:
table = db.table('users')
table.insert({'user_id': 1, "username": "Aemilius" })
table.all()

In [5]:
table = db.table('prompt_templates')
table.insert({"user": "Emile", "name": "Summarize", "text": "Summarize the following text {}"})
table.all()

[{'user_id': 1, 'username': 'Aemilius'},
 {'name': 'Summarize', 'text': 'Summarize the following text \\{\\}'},
 {'name': 'Summarize', 'text': 'Summarize the following text {}'},
 {'user': 'Emile',
  'name': 'Summarize',
  'text': 'Summarize the following text {}'}]

<!-- >>> from tinydb.storages import JSONStorage
>>> from tinydb.middlewares import CachingMiddleware
>>> db = TinyDB('/path/to/db.json', storage=CachingMiddleware(JSONStorage)) -->

In [7]:
table = db.table('prompt_templates')
prompt_template = Query()
table.update({"name": "Summarize", "text": "Summarize the following bva {}"}, prompt_template.user == 'Emile', prompt_template.name == 'Summarize' )
table.all()

TypeError: 'QueryInstance' object is not iterable

In [18]:
# initilize DB
def load_db(db_path: str = './db/db.json'):
    db = TinyDB(db_path)
    return db

def initialize_db(db_path: str = './db/db.json'):
    db = TinyDB(db_path)
    table = db.table('users')
    table.insert({"username": "Aemilius"})
    return True

[{'user_id': 1, 'username': 'Aemilius'}, {'username': 'Aemilius'}]