### Chroma 

- Chroma is a AI - native open source vector database focused on developer productivity and happiness .

In [1]:
# Building a sample vectordb

from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings

In [2]:
loader = TextLoader("F:\Complete Generative AI\LANGCHAIN\speech.txt")
data = loader.load()
data

[Document(metadata={'source': 'F:\\Complete Generative AI\\LANGCHAIN\\speech.txt'}, page_content='Hello , My name is  Ashis Kumar Mishra .\n\nI am born and brought up in Odisha .\n\nCurrently I am in my pre final year of my Btech from NIT Rourkela .\n\nMy habit includes coding , playing football and gaming .\n\nFlight Pattern is a one-act contemporary ballet performed in 30 minutes.\n\n[1] The music inspired the structure of the choreography, with a long and slow crescendo that transitions to a single voice. \n\nCrystal Pite, the choreographer of this piece, mimicked this structure in the creative process.\n\nShe focused first on the large scale of the crisis, then on a singular story. \n\nPite felt that an emotional connection with a single story would be more impactful to the audience than many dancers on stage.\n\nThe piece begins with 36 dancers arranged in three equal rows, standing in profile to the audience and staring at a light while rocking in packed rows.\n\nThe dancers then

In [None]:
# splitting the data
text_splitter = RecursiveCharacterTextSplitter( chunk_size=100 , chunk_overlap=20 )
splits = text_splitter.split_documents(data)

In [6]:
# Vector db and embeddings 

embedding = OllamaEmbeddings(model = "gemma:2b" )
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x1ba824ccbb0>

In [7]:
## Now quering 

query = " What is the name ? "
cs = vectordb.similarity_search(query)

cs[0].page_content

'She focused first on the large scale of the crisis, then on a singular story.'

In [10]:
# saving into local  by persist directory  , no save or save_local available
vectordb = Chroma.from_documents(documents=splits , embedding=embedding , persist_directory="F:\Complete Generative AI\LANGCHAIN\_1_Langchain\_6_Vector_store\_6_Vector_chroma_db")


In [11]:
# load from disk
db2 = Chroma(persist_directory="F:\Complete Generative AI\LANGCHAIN\_1_Langchain\_6_Vector_store\_6_Vector_chroma_db" ,
             embedding_function=embedding)

docs = db2.similarity_search(query)
docs[0].page_content

'She focused first on the large scale of the crisis, then on a singular story.'

In [13]:
# to get the score
docs = vectordb.similarity_search_with_score(query)
docs


[(Document(id='db21ff7f-bbe2-484f-8b8b-46b078d6c278', metadata={'source': 'F:\\Complete Generative AI\\LANGCHAIN\\speech.txt'}, page_content='She focused first on the large scale of the crisis, then on a singular story.'),
  3793.6706360723247),
 (Document(id='79ec100c-b253-48f1-af07-feb47bc1def8', metadata={'source': 'F:\\Complete Generative AI\\LANGCHAIN\\speech.txt'}, page_content='to the audience than many dancers on stage.'),
  3895.299862736715),
 (Document(id='069ff70a-843b-4f5c-ac45-554ed906ad40', metadata={'source': 'F:\\Complete Generative AI\\LANGCHAIN\\speech.txt'}, page_content='to the audience and staring at a light while rocking in packed rows.'),
  3906.984936337337),
 (Document(id='97e2050f-19ba-4071-9420-b0c47044396d', metadata={'source': 'F:\\Complete Generative AI\\LANGCHAIN\\speech.txt'}, page_content='slow crescendo that transitions to a single voice.'),
  3909.1418448452846)]

In [None]:
# Retriever Option

retriever = vectordb.as_retriever()
retriever.invoke(query)[0].page_content

'She focused first on the large scale of the crisis, then on a singular story.'