In [1]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

## Chroma

In [2]:
loader = TextLoader("../Data/test.txt")
data = loader.load()

#### Split Document

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = text_splitter.split_documents(data)

#### Setup Embeddings

In [4]:
#embeddings = OllamaEmbeddings(model="nomic-embed-text:latest")
embeddings = OllamaEmbeddings(model="llama3.2:latest")
vectordb = Chroma.from_documents(documents=splits,embedding=embeddings, persist_directory="../Embeddings/test_text_chroma_db")

#### Query Vector Store

In [5]:
#query = "What does the speaker believe is the main reason the United States should enter the war?"
#query = "find all sentences with text containing the name of a person"
query = "Find sentences with words similar to 'shining' and 'brightly'."
docs = vectordb.similarity_search(query)
docs[0].page_content

'This is a test sentence.\nAnother sentence to test the model.\nSome more words: cat, dog, house, car.\nA short paragraph:\nThe sun was shining brightly in the clear blue sky. The birds were singing their sweet melodies. It was a\nbeautiful day outside.\n\nA sentence with punctuation: Hello, world! How are you?\nA quote: "Be the change you wish to see in the world." - Mahatma Gandhi\n\nSome numbers: 1, 2, 3, 4, 5\nA list of words:\napple, banana, cherry, date, elderberry'

In [6]:
db2  = Chroma(persist_directory="../Embeddings/test_text_chroma_db", embedding_function=embeddings)
docs=db2.similarity_search_with_score(query)

docs

[(Document(metadata={'source': '../Data/test.txt'}, page_content='This is a test sentence.\nAnother sentence to test the model.\nSome more words: cat, dog, house, car.\nA short paragraph:\nThe sun was shining brightly in the clear blue sky. The birds were singing their sweet melodies. It was a\nbeautiful day outside.\n\nA sentence with punctuation: Hello, world! How are you?\nA quote: "Be the change you wish to see in the world." - Mahatma Gandhi\n\nSome numbers: 1, 2, 3, 4, 5\nA list of words:\napple, banana, cherry, date, elderberry'),
  10105.213544844262),
 (Document(metadata={'source': '../Data/test.txt'}, page_content='This is a test sentence.\nAnother sentence to test the model.\nSome more words: cat, dog, house, car.\nA short paragraph:\nThe sun was shining brightly in the clear blue sky. The birds were singing their sweet melodies. It was a\nbeautiful day outside.\n\nA sentence with punctuation: Hello, world! How are you?\nA quote: "Be the change you wish to see in the world."

In [8]:
retriever = vectordb.as_retriever()
retriever.invoke(query)[0].page_content

'This is a test sentence.\nAnother sentence to test the model.\nSome more words: cat, dog, house, car.\nA short paragraph:\nThe sun was shining brightly in the clear blue sky. The birds were singing their sweet melodies. It was a\nbeautiful day outside.\n\nA sentence with punctuation: Hello, world! How are you?\nA quote: "Be the change you wish to see in the world." - Mahatma Gandhi\n\nSome numbers: 1, 2, 3, 4, 5\nA list of words:\napple, banana, cherry, date, elderberry'