#### Chroma
chroma is a AI-native opensource vector database focus on developer productivity and happiness.
Chroma is licensed under Apache 2.0.

In [2]:
## Sample vectorDB using chroma

from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
from dotenv import load_dotenv
load_dotenv()  ## load all the environment variables

True

In [4]:
## Creating embedding model

os.environ['GOOGLE_API_KEY']=os.getenv("GOOGLE_API_KEY")
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embedding = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
embedding

GoogleGenerativeAIEmbeddings(client=<google.genai.client.Client object at 0x00000226ABC6F250>, model='gemini-embedding-001', task_type=None, google_api_key=SecretStr('**********'), credentials=None, vertexai=None, project=None, location=None, base_url=None, additional_headers=None, client_args=None, request_options=None, output_dimensionality=None)

In [5]:
## Text Loading 

loader = TextLoader('sample.txt')
data = loader.load()
data

[Document(metadata={'source': 'sample.txt'}, page_content='1. Artificial Intelligence (AI)\nThe Concept: The broadest umbrella term for any technique that enables computers to mimic human intelligence.\nThe Approach: It doesn\'t always require "learning." Early AI used "Expert Systems" or "If-Then" logicâ€”hard-coded rules written by humans to help a machine make decisions (e.g., a chess program that follows a specific set of programmed moves).\nThe Goal: To create a system that can execute tasks that would normally require a human brain, such as reasoning, planning, or understanding language.\n2. Machine Learning (ML)\nThe Concept: A subset of AI defined by the ability to learn from data rather than following rigid instructions.\nThe Approach: Instead of a human writing code for every possibility, we provide the machine with examples (data). The machine uses algorithms (like Linear Regression, Decision Trees, or Random Forests) to find patterns. For instance, to identify a "spam" emai

In [6]:
## Splitting text document

txt_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)
splited_docs = txt_splitter.split_documents(data)
splited_docs

[Document(metadata={'source': 'sample.txt'}, page_content='1. Artificial Intelligence (AI)\nThe Concept: The broadest umbrella term for any technique that enables computers to mimic human intelligence.'),
 Document(metadata={'source': 'sample.txt'}, page_content='The Approach: It doesn\'t always require "learning." Early AI used "Expert Systems" or "If-Then" logicâ€”hard-coded rules written by humans to help a machine make decisions (e.g., a chess program that'),
 Document(metadata={'source': 'sample.txt'}, page_content='(e.g., a chess program that follows a specific set of programmed moves).'),
 Document(metadata={'source': 'sample.txt'}, page_content='The Goal: To create a system that can execute tasks that would normally require a human brain, such as reasoning, planning, or understanding language.\n2. Machine Learning (ML)'),
 Document(metadata={'source': 'sample.txt'}, page_content='2. Machine Learning (ML)\nThe Concept: A subset of AI defined by the ability to learn from data rat

In [7]:
## Embedding
vectordb = Chroma.from_documents(splited_docs, embedding=embedding)
vectordb


<langchain_chroma.vectorstores.Chroma at 0x226fdb4db10>

In [8]:
query = "What is Deep Learning?"
result = vectordb.similarity_search(query)
result

[Document(id='e3093b23-8f30-439c-a1f8-9d77a6c561ae', metadata={'source': 'sample.txt'}, page_content="The Approach: It mimics the structure of the human brain. Unlike standard ML, Deep Learning does Automatic Feature Extraction. You don't tell the machine what to look for; you give it 10,000 photos"),
 Document(id='519db3df-76c8-439a-b6d6-a4a28edacbc9', metadata={'source': 'sample.txt'}, page_content='3. Deep Learning (DL)\nThe Concept: A specialized subset of ML that uses Artificial Neural Networks with many layers (hence the "Deep") to solve highly complex problems.'),
 Document(id='ccc9c252-e377-470a-8134-d26a9164520a', metadata={'source': 'sample.txt'}, page_content='The Requirements: DL is data-hungry and power-hungry. It requires Big Data (millions of data points) and high-performance hardware like GPUs to function effectively.'),
 Document(id='0b0bbac4-6fdd-440d-b2de-64989203bff6', metadata={'source': 'sample.txt'}, page_content='you give it 10,000 photos of cats, and the layers

In [9]:
## Save to the disk

vectordb=Chroma.from_documents(documents=splited_docs, embedding=embedding, persist_directory="./chroma_db")

In [12]:
## Load from disk

db2 = Chroma(persist_directory="./chroma_db", embedding_function=embedding)
docs = db2.similarity_search(query)
print(docs)

[Document(id='39124922-9364-4dcd-87f0-685e9e7320a1', metadata={'source': 'sample.txt'}, page_content="The Approach: It mimics the structure of the human brain. Unlike standard ML, Deep Learning does Automatic Feature Extraction. You don't tell the machine what to look for; you give it 10,000 photos"), Document(id='08caf5fb-0b53-4aff-ba71-70f320e1e847', metadata={'source': 'sample.txt'}, page_content="The Approach: It mimics the structure of the human brain. Unlike standard ML, Deep Learning does Automatic Feature Extraction. You don't tell the machine what to look for; you give it 10,000 photos"), Document(id='f9aac3f2-8a1e-46e4-88ef-4d1f4bcc1feb', metadata={'source': 'sample.txt'}, page_content='3. Deep Learning (DL)\nThe Concept: A specialized subset of ML that uses Artificial Neural Networks with many layers (hence the "Deep") to solve highly complex problems.'), Document(id='e23f5fa3-638a-4ba0-a859-eca7b91781cc', metadata={'source': 'sample.txt'}, page_content='3. Deep Learning (DL

In [10]:
## Retriever

retriever = vectordb.as_retriever()
retriever.invoke(query)

[Document(id='39124922-9364-4dcd-87f0-685e9e7320a1', metadata={'source': 'sample.txt'}, page_content="The Approach: It mimics the structure of the human brain. Unlike standard ML, Deep Learning does Automatic Feature Extraction. You don't tell the machine what to look for; you give it 10,000 photos"),
 Document(id='08caf5fb-0b53-4aff-ba71-70f320e1e847', metadata={'source': 'sample.txt'}, page_content="The Approach: It mimics the structure of the human brain. Unlike standard ML, Deep Learning does Automatic Feature Extraction. You don't tell the machine what to look for; you give it 10,000 photos"),
 Document(id='f9aac3f2-8a1e-46e4-88ef-4d1f4bcc1feb', metadata={'source': 'sample.txt'}, page_content='3. Deep Learning (DL)\nThe Concept: A specialized subset of ML that uses Artificial Neural Networks with many layers (hence the "Deep") to solve highly complex problems.'),
 Document(id='e23f5fa3-638a-4ba0-a859-eca7b91781cc', metadata={'source': 'sample.txt'}, page_content='3. Deep Learning 