In [2]:
from pathlib import Path
from dotenv import load_dotenv
import openai
from rag_diary.config import Config
from rag_diary.vectore_store_chromadb import VectorStoreChromadb

current_folder = globals()['_dh'][0]

load_dotenv(Path(current_folder).parent / ".env")
openai.api_key = Config().OPENAI_API_KEY

config = Config()

## Rag diary Chromadb
#### create a new rag_diary.VectorStoreChromadb

The VectorStoreChromadb implements the rag_diary.VectorStore. rag_diary vector stores should work the same way.

define a collection name to retrieve, if the collection does not exist you will get a ValueError warning then the VecorStore will create a new collection. Chromadb will then persist the collection in a sqllite db stored locally at the path provided in the .env variable rag_diary_vector_db_path

In [3]:
rag_vector_store = VectorStoreChromadb(db_path=config.rag_diary_vector_db_path, collection_name="rag_diary_jupyter")

Collection rag_diary_jupyter does not exist.


### Lets add some test data into the persistent storage 
adds a single record with the text to embedd and some metadata

In [4]:
rag_vector_store.add("this is an entry about dogs", {"name": "dogs", "id":"123"})

add multiple records at once to the db.

In [5]:
rag_vector_store.add_multiple(
    documents=[
        "this is a document about cats",
        "Robots are destined to rule the world of man"
    ],
    metadatas=[
        {"name": "cats", "id":"122"},
        {"name": "robots", "id": "0101010101010101010"}
    ]
    
)

### Here is the fun part. 
Let us query the vector db. This implementation uses a local embedding model. queries will return results ordered by cosine similarity in the form of Records.

In [6]:
data = rag_vector_store.query_by_str("robots are cool")
data

Number of requested results 10 is greater than number of elements in index 3, updating n_results = 3


[{'document': 'Robots are destined to rule the world of man',
  '_id': 'd479505e-e4c8-4647-96be-ac6fbcc1905e',
  'id': '0101010101010101010',
  'name': 'robots'},
 {'document': 'this is an entry about dogs',
  '_id': 'f0447a32-ffdd-4688-bea3-9532dbc9d631',
  'id': '123',
  'name': 'dogs'},
 {'document': 'this is a document about cats',
  '_id': '7047b6d9-56be-4830-8d88-4720486569cf',
  'id': '122',
  'name': 'cats'}]