## Chroma
Chroma is a AI-native open source vector database focused on developer productivity and happiness. Chroma is licensed under Apache2.0

In [2]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
loader = TextLoader("./../4-Embedding/Speech1.txt")
documents = loader.load()
documents

[Document(metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='Mumbai – The City of Dreams\n\nGood morning everyone,\n\nToday, I want to talk about Mumbai, a city that never sleeps and is often called the "City of Dreams." Located on the western coast of India, Mumbai is not just a city; it’s an emotion for millions of people.\n\nMumbai is the financial capital of India, home to the Bombay Stock Exchange, Reserve Bank of India, and many multinational corporations. It is also the heart of India’s film industry, Bollywood, which has gained global recognition.\n\nThis vibrant city is known for its rich history, iconic landmarks like the Gateway of India, Marine Drive, and Chhatrapati Shivaji Maharaj Terminus, a UNESCO World Heritage Site. The bustling streets, local trains, and the delicious street food reflect the spirit of the city – fast-paced yet welcoming.\n\nMumbai is a melting pot of cultures, with people from all over the country living here. It celebrates diversity 

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
docs = text_splitter.split_documents(documents)

In [11]:
embeddings = OllamaEmbeddings(model="gemma:2b")
db = Chroma.from_documents(docs,embeddings)

In [12]:
query = "Which city is 'City of Dreams'?"
results = db.similarity_search(query)
results

[Document(id='5c641a29-3a99-4043-a64e-2c41135c8a3e', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='which has gained global recognition.'),
 Document(id='18b457ec-b50c-4556-a8bd-d83908012432', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='here. It celebrates diversity through festivals like Ganesh Chaturthi, Eid, Christmas, and more.'),
 Document(id='508dbbcd-0fef-43ee-9992-9047b3b7a325', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='industry, Bollywood, which has gained global recognition.'),
 Document(id='9f50f472-bcf5-4328-a7c0-a8683276dd85', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='many. As the saying goes, “If you can survive in Mumbai, you can survive anywhere in the world.”')]

In [13]:
## saving the db to disk
vectordb = Chroma.from_documents(documents=docs,embedding=embeddings,persist_directory="./chroma_db")

In [14]:
load_vectordb = Chroma(persist_directory="./chroma_db",embedding_function=embeddings)

In [17]:
docs = load_vectordb.similarity_search(query)
docs

[Document(id='a10e560b-533c-4bf9-a44d-7bd7525c5a1b', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='here. It celebrates diversity through festivals like Ganesh Chaturthi, Eid, Christmas, and more.'),
 Document(id='d40f985f-c48a-4062-8e5c-fae17128f440', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='industry, Bollywood, which has gained global recognition.'),
 Document(id='83413fa9-6b1f-4ea8-8a52-87203e72f34f', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='many. As the saying goes, “If you can survive in Mumbai, you can survive anywhere in the world.”'),
 Document(id='4608207b-bc7d-4583-8910-37ba7d04bfb8', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content="Mumbai's people is truly remarkable.")]

In [18]:
docs = load_vectordb.similarity_search_with_score(query)
docs

[(Document(id='a10e560b-533c-4bf9-a44d-7bd7525c5a1b', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='here. It celebrates diversity through festivals like Ganesh Chaturthi, Eid, Christmas, and more.'),
  3386.170577692207),
 (Document(id='d40f985f-c48a-4062-8e5c-fae17128f440', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='industry, Bollywood, which has gained global recognition.'),
  3390.410173522467),
 (Document(id='83413fa9-6b1f-4ea8-8a52-87203e72f34f', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='many. As the saying goes, “If you can survive in Mumbai, you can survive anywhere in the world.”'),
  3416.641086481131),
 (Document(id='4608207b-bc7d-4583-8910-37ba7d04bfb8', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content="Mumbai's people is truly remarkable."),
  3999.363238453671)]

## As a Retriver

In [21]:
retriver = load_vectordb.as_retriever()
retriver.invoke(query)

[Document(id='a10e560b-533c-4bf9-a44d-7bd7525c5a1b', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='here. It celebrates diversity through festivals like Ganesh Chaturthi, Eid, Christmas, and more.'),
 Document(id='d40f985f-c48a-4062-8e5c-fae17128f440', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='industry, Bollywood, which has gained global recognition.'),
 Document(id='83413fa9-6b1f-4ea8-8a52-87203e72f34f', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content='many. As the saying goes, “If you can survive in Mumbai, you can survive anywhere in the world.”'),
 Document(id='4608207b-bc7d-4583-8910-37ba7d04bfb8', metadata={'source': './../4-Embedding/Speech1.txt'}, page_content="Mumbai's people is truly remarkable.")]