In [2]:
# Chroma DB
from langchain_chroma import Chroma

In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings

In [4]:
# Load the documents
loader = TextLoader("speech.txt")
documents = loader.load()

# Split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10)
texts = text_splitter.split_documents(documents)
texts

[Document(metadata={'source': 'speech.txt'}, page_content='THE MARVELLOUS THING IS THAT IT’S painless," he said. "That\'s how you know when it starts."'),
 Document(metadata={'source': 'speech.txt'}, page_content='"Is it really?"\n\n"Absolutely. I\'m awfully sorry about the odor though. That must bother you."'),
 Document(metadata={'source': 'speech.txt'}, page_content='"Don\'t! Please don\'t."'),
 Document(metadata={'source': 'speech.txt'}, page_content='"Look at them," he said. "Now is it sight or is it scent that brings them like that?"'),
 Document(metadata={'source': 'speech.txt'}, page_content='The cot the man lay on was in the wide shade of a mimosa tree and as he looked out past the shade onto the glare of the plain there were three of the big birds squatted obscenely, while in the sky a dozen more sailed, making quick-moving shadows as they passed.')]

In [5]:
embeddings = (
    OllamaEmbeddings(model="nomic-embed-text")
)

embeddings

  OllamaEmbeddings(model="nomic-embed-text")


OllamaEmbeddings(base_url='http://localhost:11434', model='nomic-embed-text', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [None]:
db = Chroma.from_documents(texts, embeddings)

In [7]:
db

<langchain_chroma.vectorstores.Chroma at 0x23749d042c0>

In [9]:
# Now we can query the database
query = "What is marvelous thing?"
results = db.similarity_search(query)
print(results[0].page_content)

THE MARVELLOUS THING IS THAT IT’S painless," he said. "That's how you know when it starts."


In [10]:
# Saving to the disk
db = Chroma.from_documents(texts, embeddings, persist_directory="chroma_db")

In [11]:
# load from disk

chromadb = Chroma(persist_directory="chroma_db", embedding_function=embeddings)

results1 = chromadb.similarity_search("What is marvelous thing?")

In [14]:
print(results1[0].page_content)

THE MARVELLOUS THING IS THAT IT’S painless," he said. "That's how you know when it starts."


In [15]:
retriever = chromadb.as_retriever()
retriever.invoke("What is marvelous thing?")[0].page_content

'THE MARVELLOUS THING IS THAT IT’S painless," he said. "That\'s how you know when it starts."'