### VectorStore and Retrieve - ChromaDB

Focused on developer productivity and happiness

In [1]:
# Building sample vectordb
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:

loader = TextLoader("speech.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [5]:
embeddings = OllamaEmbeddings(model='gemma:2b')

In [6]:
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x15ac2730d90>

In [7]:
# Query from db
query = "What does the speaker believe is the main reason behind united states should enter the war?"
docs = vectordb.similarity_search(query=query)
docs[0].page_content

'democracy, for the right of those who submit to authority to have a voice in their own governments, for the rights and liberties of small nations, for a universal dominion of right by such a concert of free peoples as shall bring peace and safety to all nations and make the world itself at last free.'

In [8]:
docs

[Document(id='1d299b91-c65d-44d6-be6c-1c0be7e07556', metadata={'source': 'speech.txt'}, page_content='democracy, for the right of those who submit to authority to have a voice in their own governments, for the rights and liberties of small nations, for a universal dominion of right by such a concert of free peoples as shall bring peace and safety to all nations and make the world itself at last free.'),
 Document(id='60112b3b-a691-42ed-bb83-60c0ced631f9', metadata={'source': 'speech.txt'}, page_content='government in the hour of test. They are, most of them, as true and loyal Americans as if they had never known any other fealty or allegiance. They will be prompt to stand with us in rebuking and restraining the few who may be of a different mind and purpose. If there should be disloyalty, it will be dealt with with a firm hand of stern repression; but, if it lifts its head at all, it will lift it only here and there and without countenance except from a lawless and malignant few.'),
 D

In [9]:
# Save to local
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./chroma_db")

In [10]:
# Load from disk
db2 = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

In [12]:
docs = db2.similarity_search(query)

In [13]:
docs[0].page_content

'democracy, for the right of those who submit to authority to have a voice in their own governments, for the rights and liberties of small nations, for a universal dominion of right by such a concert of free peoples as shall bring peace and safety to all nations and make the world itself at last free.'

In [15]:
# Retriever
retriever = vectordb.as_retriever()
retriever.invoke(query)[0].page_content

'democracy, for the right of those who submit to authority to have a voice in their own governments, for the rights and liberties of small nations, for a universal dominion of right by such a concert of free peoples as shall bring peace and safety to all nations and make the world itself at last free.'