In [1]:
## building a sample vectordb

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma

In [2]:
loader = TextLoader('info.txt')
documents = loader.load()
documents

[Document(metadata={'source': 'info.txt'}, page_content="My name is Shubham. I am a Software Engineer. I love to code and explore new technologies. Building applications that solve real-world problems gives me immense satisfaction. I enjoy working with both front-end and back-end systems. Debugging and optimizing code are challenges I genuinely look forward to. I'm always curious about how things work under the hood. Learning never stops for me — I constantly explore new programming languages, frameworks, and tools. I also like collaborating with other developers and sharing knowledge. Coding is not just a job for me; it’s something I truly enjoy doing every day.")]

In [29]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=40,chunk_overlap=5)
docs = text_splitter.split_documents(documents)
docs

[Document(metadata={'source': 'info.txt'}, page_content='My name is Shubham. I am a Software'),
 Document(metadata={'source': 'info.txt'}, page_content='Engineer. I love to code and explore'),
 Document(metadata={'source': 'info.txt'}, page_content='new technologies. Building applications'),
 Document(metadata={'source': 'info.txt'}, page_content='that solve real-world problems gives me'),
 Document(metadata={'source': 'info.txt'}, page_content='me immense satisfaction. I enjoy'),
 Document(metadata={'source': 'info.txt'}, page_content='working with both front-end and'),
 Document(metadata={'source': 'info.txt'}, page_content='and back-end systems. Debugging and'),
 Document(metadata={'source': 'info.txt'}, page_content='and optimizing code are challenges I'),
 Document(metadata={'source': 'info.txt'}, page_content="I genuinely look forward to. I'm always"),
 Document(metadata={'source': 'info.txt'}, page_content='curious about how things work under the'),
 Document(metadata={'source':

In [30]:
embeddings = OllamaEmbeddings(model="nomic-embed-text")
db = Chroma.from_documents(docs, embeddings)
db

<langchain_chroma.vectorstores.Chroma at 0x210ffda1450>

In [31]:
query = "Collaborating"

query_search = db.similarity_search_with_relevance_scores(query)
query_search

[(Document(id='8f93bc82-37cc-49f5-84c3-b6c1fe779a35', metadata={'source': 'info.txt'}, page_content='also like collaborating with'),
  0.8471572039571081),
 (Document(id='028a1bf8-7bad-4f9c-b8e1-e2d40b8adcda', metadata={'source': 'info.txt'}, page_content='also like collaborating with'),
  0.8471572039571081),
 (Document(id='bb82fa02-5169-41f6-8ef0-ee6454a5f111', metadata={'source': 'info.txt'}, page_content='collaborating with other developers and sharing'),
  0.7306219159310472),
 (Document(id='5c441bbe-9a02-4307-89cf-6b9f9a6a3fce', metadata={'source': 'info.txt'}, page_content='and tools. I also like collaborating'),
  0.7066234160952471)]

In [32]:
## Saving to the disk

vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory='./chroma_db')

In [34]:
## load from disk

db2 = Chroma(persist_directory='./chroma_db',embedding_function=embeddings)
docs = db2.similarity_search("Who am I")
docs[0].page_content

'am a Software Engineer. I'

In [36]:
## Retriever option
retriever = vectordb.as_retriever()
retriever.invoke(query)[0].page_content

'also like collaborating with'