## **Chroma**

Chroma is an AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0.


In [2]:
## sample vector db

from langchain_chroma.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [3]:
loader = TextLoader("/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt")
data = loader.load()
data

[Document(metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content="\n**Title: Embracing Challenges: Our Path to Growth**\n\nLadies and gentlemen, esteemed guests,\n\nToday, I stand before you to discuss a powerful theme that resonates with each of us: overcoming challenges.\n\nLife is a series of peaks and valleys, and it’s during the valleys that true character is revealed.\n\nEvery challenge we face is an opportunity to grow stronger, wiser, and more resilient.\n\nThink of the obstacles you’ve encountered—each one has shaped you in ways you may not fully realize.\n\nIt’s easy to feel daunted by difficulties, but remember: discomfort breeds progress.\n\nWhen we push past our limits, we discover the incredible potential within us.\n\nConsider the story of Thomas Edison, who famously said, “I have not failed; I’ve just found 10,000 ways that won’t work.”\n\nHis relentless pursuit of innovation exemplifies how setbacks can lead to success.\n\nEach s

In [6]:
text_splitter = CharacterTextSplitter(chunk_size=100,chunk_overlap=30) 
splits = text_splitter.split_documents(data)
splits

Created a chunk of size 108, which is longer than the specified 100
Created a chunk of size 121, which is longer than the specified 100


[Document(metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='**Title: Embracing Challenges: Our Path to Growth**\n\nLadies and gentlemen, esteemed guests,'),
 Document(metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Today, I stand before you to discuss a powerful theme that resonates with each of us: overcoming challenges.'),
 Document(metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Life is a series of peaks and valleys, and it’s during the valleys that true character is revealed.'),
 Document(metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Every challenge we face is an opportunity to grow stronger, wiser, and more resilient.'),
 Document(metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Think of the obstacles you’ve encountered—each one has shaped you in

In [8]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(documents=splits,embedding=embeddings)

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm
2025-01-21 17:21:46.712422: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737460306.725450   46044 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737460306.729288   46044 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-21 17:21:46.743777: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
vectordb

<langchain_chroma.vectorstores.Chroma at 0x7e7ee2930210>

In [11]:
### querying vector
query = "the journey of Nelson Mandela, who faced what?"
docs = vectordb.similarity_search(query)
docs[0].page_content


'Take, for example, the journey of Nelson Mandela, who faced unimaginable challenges.'

In [13]:
docs_and_scores = vectordb.similarity_search_with_score(query)
docs_and_scores

[(Document(id='5ed19fe6-7c1c-4618-9545-b48d1a8011e3', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Take, for example, the journey of Nelson Mandela, who faced unimaginable challenges.'),
  0.6133413910865784),
 (Document(id='4e7410bb-9898-4a2b-a077-3f8b381eeb66', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Despite 27 years of imprisonment, he emerged not with vengeance but with a vision for unity.'),
  1.267770528793335),
 (Document(id='e49411f4-fe77-463e-b1a0-0989e72d9711', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='His story is a testament to the strength of the human spirit.'),
  1.315242052078247),
 (Document(id='533abf84-1c85-453e-b89b-7c6aff46b6cb', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content="Every great leader has faced adversity; it's how they responded that defined their l

In [14]:
## saving to disk

vectordb = Chroma.from_documents(documents=splits,embedding=embeddings,persist_directory="/media/shubham/shubham SSD2/Generative-AI/Vector_store/chroma_db")

In [19]:
db2 = Chroma(persist_directory="/media/shubham/shubham SSD2/Generative-AI/Vector_store/chroma_db",embedding_function=embeddings)
docs = db2.similarity_search_with_score(query)
docs

[(Document(id='80fe1a4c-cdc9-4784-b6e7-01d829454119', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Take, for example, the journey of Nelson Mandela, who faced unimaginable challenges.'),
  0.6133413869201049),
 (Document(id='f75095c7-89b2-4217-963d-86da45b4df41', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Despite 27 years of imprisonment, he emerged not with vengeance but with a vision for unity.'),
  1.2677704548373365),
 (Document(id='8dc22735-5cec-43f3-91d2-af2147a3ad05', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='His story is a testament to the strength of the human spirit.'),
  1.3152419282035877),
 (Document(id='a1d16f0f-6ca6-489d-bcab-4d34e258b282', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content="Every great leader has faced adversity; it's how they responded that defined their

In [20]:
print(docs[0])

(Document(id='80fe1a4c-cdc9-4784-b6e7-01d829454119', metadata={'source': '/media/shubham/shubham SSD2/Generative-AI/Data/speech.txt'}, page_content='Take, for example, the journey of Nelson Mandela, who faced unimaginable challenges.'), 0.6133413869201049)
