### Chroma VectorDB

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load the documents
text_loader = TextLoader('../datasets/speech.txt')
text_documents = text_loader.load()

# Split the documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
documents = text_splitter.split_documents(text_documents)
documents

[Document(metadata={'source': '../datasets/speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='indemnities for ourselves, no material compensation for the sacrifices we shall freely make. We are'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='but one of the champions of the rights of mankind. We shall be satisfied when those rights have'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='been made as secure as the faith and the freedom of nations can make them.'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='Just because we fight without rancor and without selfish object, seeking nothing for our

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings

  from .autonotebook import tqdm as notebook_tqdm


HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [3]:
from langchain_chroma import Chroma

chromadb = Chroma.from_documents(documents, embeddings)
chromadb

<langchain_chroma.vectorstores.Chroma at 0x7b14be9ec7a0>

In [4]:
query = "How does the speaker describe the desired outcome of the war?"
results = chromadb.similarity_search(query)
results 

[Document(metadata={'source': '../datasets/speech.txt'}, page_content='fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='all wars, civilization itself seeming to be in the balance. But the right is more precious than'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='principles of right and of fair play we profess to be fighting for.')]

In [5]:
results[0].page_content

'fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'

In [6]:
# Query
query = "What does the speaker believe is the main reason the United States should enter the war?"

query_results = chromadb.similarity_search(query)
query_results

[Document(metadata={'source': '../datasets/speech.txt'}, page_content='fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='principles of right and of fair play we profess to be fighting for.'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='all wars, civilization itself seeming to be in the balance. But the right is more precious than')]

### ChromaDB as a Retriever

In [8]:
retriever = chromadb.as_retriever()
query_docs = retriever.invoke(query)
query_docs[0].page_content

'fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'

### Based on Score

In [9]:
score_docs = chromadb.similarity_search_with_score(query=query)
score_docs

[(Document(metadata={'source': '../datasets/speech.txt'}, page_content='fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'),
  1.1479679346084595),
 (Document(metadata={'source': '../datasets/speech.txt'}, page_content='political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
  1.1490247249603271),
 (Document(metadata={'source': '../datasets/speech.txt'}, page_content='principles of right and of fair play we profess to be fighting for.'),
  1.318261742591858),
 (Document(metadata={'source': '../datasets/speech.txt'}, page_content='all wars, civilization itself seeming to be in the balance. But the right is more precious than'),
  1.336186408996582)]

### Save and Load

In [11]:
# Save 
Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory="./chroma_database")


<langchain_chroma.vectorstores.Chroma at 0x7b14bc7ad670>

In [12]:
# Load
newdb = Chroma(persist_directory="./chroma_database", embedding_function=embeddings)
newdb

<langchain_chroma.vectorstores.Chroma at 0x7b14bc7adbb0>

In [13]:
query = "What does the speaker believe is the main reason the United States should enter the war?"

query_results = newdb.similarity_search(query)
query_results

[Document(metadata={'source': '../datasets/speech.txt'}, page_content='fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={'source': '../datasets/speech.txt'}, page_content='political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no')]

In [14]:
results[0].page_content

'fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of'