In [2]:

import chromadb
from langchain_community.vectorstores import Chroma

from langchain_community.embeddings import FastEmbedEmbeddings

from langchain_core.documents import Document

client = chromadb.HttpClient(
    host="localhost", port=8000
)

vector_store = Chroma(
    client=client,
    collection_name="knowledge",
    embedding_function=FastEmbedEmbeddings(),
)

ids = vector_store.get()['ids']

def get_document_by_id(document_id: str) -> Document:
    """Get a document in the collection by its ID.

    Args:
        document_id (str): ID of the document to get.

    Returns:
        Document: The document with the given ID.
    """
    result = vector_store.get(ids=[document_id])
    if result and "documents" in result and result["documents"]:
        return Document(page_content=result["documents"][0], metadata=result["metadatas"][0])
    else:
        return None

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 68089.35it/s]


In [6]:
docs_count = 0

for id in ids:
    # print(id)
    doc = get_document_by_id(id)
    # print(doc)

    if "docs.sui." in doc.metadata['source']:
        doc.metadata["tag"] = "sui"
        vector_store.update_document(id, doc)
        docs_count += 1
    
docs_count
    

60231

In [29]:
def get_retreiver(input):
    if "tag" in input:
        search_kwargs={
                "filter": {
                    "tag": input["tag"]
                }
            }
    else:
        search_kwargs = {}
    
    return vector_store.as_retriever(search_kwargs=search_kwargs)

In [8]:
from langchain_core.runnables import RunnablePassthrough, Runnable

In [10]:
from operator import itemgetter

In [13]:
from langchain_core.prompts import PromptTemplate

In [14]:
prompt = PromptTemplate.from_template(
    """
    You are an expert in blockchain products consulting.\
    You help newcomers to go deep in crypto world.\
    You know everything about crypto projects.\
    Reply in short, simple and friendly manner.\
    Use provided context to answer user's question.\
    If you can't answer the question, tell so.\
    User doesn't know anything about context, context is your knowledge, don't mention word \"context\" in the answer, use \"knowledgebase\" instead.
        
    Context: {context}

    Question: {question}
    """
)

In [30]:


chain = (
    {
        "context": get_retreiver,
        "question": RunnablePassthrough(itemgetter("question")) 
    }
    | prompt
)

In [33]:
print(chain.invoke({
    # "tag": "wormhole",
    "question": "How does it work?"
}).text)


    You are an expert in blockchain products consulting.    You help newcomers to go deep in crypto world.    You know everything about crypto projects.    Reply in short, simple and friendly manner.    Use provided context to answer user's question.    If you can't answer the question, tell so.    User doesn't know anything about context, context is your knowledge, don't mention word "context" in the answer, use "knowledgebase" instead.
        
    Context: [Document(metadata={'description': "Objects in Sui can have different types of ownership, with two broad categories: immutable objects and mutable objects. An immutable object is an object that can't be mutated, transferred, or deleted. Immutable objects have no owner, so anyone can use them.", 'language': 'en', 'source': 'https://docs.sui.io/concepts/object-ownership/immutable', 'tag': 'sui', 'title': 'Immutable Objects | Sui Documentation'}, page_content='This test submits a transaction as `sender1`, which tries to create an\ni