In [None]:
!pip install langchain_chroma
!pip install langchain_huggingface
!pip install python-dotenv

Collecting langchain_chroma
  Downloading langchain_chroma-0.2.4-py3-none-any.whl.metadata (1.1 kB)
Collecting chromadb>=1.0.9 (from langchain_chroma)
  Downloading chromadb-1.0.12-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting fastapi==0.115.9 (from chromadb>=1.0.9->langchain_chroma)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting posthog>=2.4.0 (from chromadb>=1.0.9->langchain_chroma)
  Downloading posthog-4.2.0-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb>=1.0.9->langchain_chroma)
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb>=1.0.9->langchain_chroma)
  Downloading opentelemetry_api-1.34.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb>=1.0.9->langchain_chroma)
  Downloading opentelemetry_exporter_otlp_pro

# Vector_store_based Retriever

In [None]:
from langchain_chroma import Chroma
from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
from langchain_core.documents import Document
from dotenv import load_dotenv
import os

load_dotenv()

hf_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")


# Step 1: Your source documents
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

# Step 2: Initialize embedding model
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

# Step 3: Create Chroma vector store in memory
vector_store = Chroma(
    embedding_function=embeddings,
    collection_name='my_collection',
    persist_directory='None'
)

vector_store.add_documents(documents=documents)

# Step 4: Convert vectorstore into a retriever
retriever = vector_store.as_retriever(kwargs={'k':2})

query = "What is Chroma used for?"

result = retriever.invoke(query)

for i, doc in enumerate(result):
    print(f"\n--- Result {i+1} ---")
    print(f"Content:\n{doc.page_content}...")  # truncate for display


--- Result 1 ---
Content:
Chroma is a vector database optimized for LLM-based search....

--- Result 2 ---
Content:
LangChain helps developers build LLM applications easily....

--- Result 3 ---
Content:
Embeddings convert text into high-dimensional vectors....

--- Result 4 ---
Content:
OpenAI provides powerful embedding models....


# MMR


In [None]:
!pip install langchain-community --quiet
!pip install faiss-cpu --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
from langchain_core.documents import Document
from dotenv import load_dotenv
import os

load_dotenv()

hf_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")

docs = [
    Document(page_content="LangChain makes it easy to work with LLMs."),
    Document(page_content="LangChain is used to build LLM based applications."),
    Document(page_content="Chroma is used to store and search document embeddings."),
    Document(page_content="Embeddings are vector representations of text."),
    Document(page_content="MMR helps you get diverse results when doing similarity search."),
    Document(page_content="LangChain supports Chroma, FAISS, Pinecone, and more."),
]

embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

vector_store = FAISS.from_documents(
    documents=docs,
    embedding=embeddings
)

retriever = vector_store.as_retriever(
    search_type = 'mmr',
    search_kwargs={'k':3, 'lambda_mult':0.5}
)

query = "What is Langchain?"

result = retriever.invoke(query)

for i, doc in enumerate(result):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
LangChain supports Chroma, FAISS, Pinecone, and more.

--- Result 2 ---
LangChain is used to build LLM based applications.

--- Result 3 ---
Embeddings are vector representations of text.
