#### Chroma
Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0.

https://python.langchain.com/v0.2/docs/integrations/vectorstores/

In [2]:
## building a sample vectordb
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [5]:
# add this import for running in jupyter notebook
import nest_asyncio

nest_asyncio.apply()

from langchain_community.document_loaders.mongodb import MongodbLoader

loader = MongodbLoader(
    connection_string="mongodb+srv://kaiumallimon:e2B1tUqAZ0IBSRCN@meditouch-backend.ogsmo.mongodb.net/meditouch?retryWrites=true",
    db_name="meditouch",
    collection_name="doctors",
    field_names=["name", "email", "phone", "speciality", "all_timeslots"] 
)


docs = loader.load()

print(docs[0])

page_content=' Dr. Arnob connect.arnob@gmail.com 01738439423 Medicine [{'date': '2024-12-10', 'intervals': [{'start': '08:00', 'end': '10:00', '_id': ObjectId('6755f64a492b8447dbc94edf')}, {'start': '07:00', 'end': '11:00', '_id': ObjectId('6755f64a492b8447dbc94ee0')}, {'start': '04:00', 'end': '12:00', '_id': ObjectId('6755f64a492b8447dbc94ee1')}], 'timePerInterval': 60, '_id': ObjectId('6755f82511985b3589e5aaa6')}, {'date': '2024-15-10', 'intervals': [{'start': '09:00', 'end': '10:00', '_id': ObjectId('6755f89211985b3589e5aab2')}, {'start': '11:00', 'end': '12:00', '_id': ObjectId('6755f89211985b3589e5aab4')}], 'timePerInterval': 15, '_id': ObjectId('6755f89211985b3589e5aab1')}, {'date': '2024-19-10', 'intervals': [{'start': '09:00', 'end': '10:00', '_id': ObjectId('6755f89711985b3589e5aac1')}, {'start': '10:00', 'end': '11:00', '_id': ObjectId('6755f89711985b3589e5aac2')}, {'start': '11:00', 'end': '12:00', '_id': ObjectId('6755f89711985b3589e5aac3')}], 'timePerInterval': 15, '_id':

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)
#as i have json output i dont need chunks
text_splitter = RecursiveCharacterTextSplitter()
output = text_splitter.split_documents(docs)
output

[Document(metadata={'database': 'meditouch', 'collection': 'doctors'}, page_content="Dr. Arnob connect.arnob@gmail.com 01738439423 Medicine [{'date': '2024-12-10', 'intervals': [{'start': '08:00', 'end': '10:00', '_id': ObjectId('6755f64a492b8447dbc94edf')}, {'start': '07:00', 'end': '11:00', '_id': ObjectId('6755f64a492b8447dbc94ee0')}, {'start': '04:00', 'end': '12:00', '_id': ObjectId('6755f64a492b8447dbc94ee1')}], 'timePerInterval': 60, '_id': ObjectId('6755f82511985b3589e5aaa6')}, {'date': '2024-15-10', 'intervals': [{'start': '09:00', 'end': '10:00', '_id': ObjectId('6755f89211985b3589e5aab2')}, {'start': '11:00', 'end': '12:00', '_id': ObjectId('6755f89211985b3589e5aab4')}], 'timePerInterval': 15, '_id': ObjectId('6755f89211985b3589e5aab1')}, {'date': '2024-19-10', 'intervals': [{'start': '09:00', 'end': '10:00', '_id': ObjectId('6755f89711985b3589e5aac1')}, {'start': '10:00', 'end': '11:00', '_id': ObjectId('6755f89711985b3589e5aac2')}, {'start': '11:00', 'end': '12:00', '_id':

In [8]:
embedding=OllamaEmbeddings(model="mistral")
vectordb=Chroma.from_documents(documents=output,embedding=embedding)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x25efd6d78e0>

In [14]:
## query it
query = "all data"
docs = vectordb.similarity_search(query)
docs[0].page_content

'Dr.  kona connect.arnob+2@gmail.com 01738439423 Medicine []'

In [11]:
## Saving to the disk
vectordb=Chroma.from_documents(documents=docs,embedding=embedding,persist_directory="./chroma_db")


In [15]:
# load from disk
db2 = Chroma(persist_directory="./chroma_db", embedding_function=embedding)
docs=db2.similarity_search(query)
print(docs[0].page_content)

Dr.  kona connect.arnob+2@gmail.com 01738439423 Medicine []


In [16]:
## similarity Search With Score
docs = vectordb.similarity_search_with_score(query)
docs

[(Document(metadata={'collection': 'doctors', 'database': 'meditouch'}, page_content='Dr.  kona connect.arnob+2@gmail.com 01738439423 Medicine []'),
  200273.80618257725),
 (Document(metadata={'collection': 'doctors', 'database': 'meditouch'}, page_content='Dr.  masud connect.arnob1@gmail.com 01738439423 Medicine []'),
  208514.4339877967),
 (Document(metadata={'collection': 'doctors', 'database': 'meditouch'}, page_content="Dr. Arnob connect.arnob@gmail.com 01738439423 Medicine [{'date': '2024-12-10', 'intervals': [{'start': '08:00', 'end': '10:00', '_id': ObjectId('6755f64a492b8447dbc94edf')}, {'start': '07:00', 'end': '11:00', '_id': ObjectId('6755f64a492b8447dbc94ee0')}, {'start': '04:00', 'end': '12:00', '_id': ObjectId('6755f64a492b8447dbc94ee1')}], 'timePerInterval': 60, '_id': ObjectId('6755f82511985b3589e5aaa6')}, {'date': '2024-15-10', 'intervals': [{'start': '09:00', 'end': '10:00', '_id': ObjectId('6755f89211985b3589e5aab2')}, {'start': '11:00', 'end': '12:00', '_id': Objec

In [12]:
### Retriever option
retriever=vectordb.as_retriever()
retriever.invoke(query)[0].page_content

'To such a task we can dedicate our lives and our fortunes, everything that we are and everything that we have, with the pride of those who know that the day has come when America is privileged to spend her blood and her might for the principles that gave her birth and happiness and the peace which she has treasured. God helping her, she can do no other.'