ChromaDB


In [1]:
## building sample vector db 
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter 


In [8]:
loader = TextLoader(r"D:\LangChain Project\VectorStore\speech.txt")
data = loader.load()
data

[Document(metadata={'source': 'D:\\LangChain Project\\VectorStore\\speech.txt'}, page_content="Welcome to Bharti Local Market.\nHow can I help you today?\n\nPlease wait while I connect you to an expert.\nThank you for choosing our service.\n\nYour request has been received.\nWe will get back to you shortly.\n\nHave a great day!\n\nSorry, I didn't understand that.\nCould you please repeat?\n\nYour service is scheduled for tomorrow.\nThe technician is on the way.\n\nPress one for support.\nPress two for billing.\n\nPlease confirm your address.\nWe appreciate your feedback.\n\nGoodbye and take care!\n\nYour payment was successful.\nThe invoice has been sent to your email.\n\nDo you want to reschedule your service?\nLet me check that for you.\n\nYour service has been rescheduled.\nThank you for your patience.\n\nDo you want to reschedule your service?\nLet me check that for you.\n\nYour service has been rescheduled.\nThank you for your patience.\n\nPress five to talk to a customer service 

In [9]:
#split
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

In [13]:
# Specify a valid model name or pull the required model
embedding = OllamaEmbeddings(model="gemma:2b")  # Ensure "llama2" is pulled or replace with a valid model name
vectordb = Chroma.from_documents(splits, embedding=embedding)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x2dadb31df70>

In [16]:
## qurey it 
query = "Do you want to reschedule your service?"
docs=vectordb.similarity_search(query)
docs[0]

Document(id='44d46940-9f1b-4da7-b61a-766b6b06fce2', metadata={'source': 'D:\\LangChain Project\\VectorStore\\speech.txt'}, page_content="Is there anything else I can assist you with?\nThank you for choosing Bharti Local Market.\n\nWe're experiencing a high volume of requests, please bear with us for a moment.\nWe are currently processing your request.\nPlease hold while we retrieve your information.\n\nWe’ve sent you a confirmation email with your service details.\nYou will receive a reminder message 24 hours before your service appointment.")

In [22]:
## saving to the disk 

vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory="D:\\LangChain Project\\VectorStore\\speech_vectorstore")

In [30]:
## load from disk

db2 = Chroma(persist_directory="d:\\LangChain Project\\VectorStore\\Chroma", embedding_function=embedding)
docs = db2.similarity_search(query)
docs



[]

In [None]:
## similarity score
## based on manhatten distance we get the score
docs = vectordb.similarity_search_with_score(query)
docs

[(Document(id='dddd2b77-e625-49e7-8ffa-cb1d73878a52', metadata={'source': 'D:\\LangChain Project\\VectorStore\\speech.txt'}, page_content="Is there anything else I can assist you with?\nThank you for choosing Bharti Local Market.\n\nWe're experiencing a high volume of requests, please bear with us for a moment.\nWe are currently processing your request.\nPlease hold while we retrieve your information.\n\nWe’ve sent you a confirmation email with your service details.\nYou will receive a reminder message 24 hours before your service appointment."),
  2282.3773266976023),
 (Document(id='9339fe84-d0d1-4937-94c9-d7c9db4cd7f5', metadata={'source': 'D:\\LangChain Project\\VectorStore\\speech.txt'}, page_content='Your payment was successful.\nThe invoice has been sent to your email.\n\nDo you want to reschedule your service?\nLet me check that for you.\n\nYour service has been rescheduled.\nThank you for your patience.\n\nDo you want to reschedule your service?\nLet me check that for you.\n\nY

In [35]:
## retriver option
retriever = vectordb.as_retriever()
retriever.invoke(query)[0]

Document(id='dddd2b77-e625-49e7-8ffa-cb1d73878a52', metadata={'source': 'D:\\LangChain Project\\VectorStore\\speech.txt'}, page_content="Is there anything else I can assist you with?\nThank you for choosing Bharti Local Market.\n\nWe're experiencing a high volume of requests, please bear with us for a moment.\nWe are currently processing your request.\nPlease hold while we retrieve your information.\n\nWe’ve sent you a confirmation email with your service details.\nYou will receive a reminder message 24 hours before your service appointment.")