### Vector Store Retriever

In [1]:
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

# Created docs
docs = [
    Document(page_content="Python is a programming language", metadata={"id": 1}),
    Document(page_content="Java is also a programming language", metadata={"id": 2}),
    Document(page_content="Cats sleeps often during the day", metadata={"id": 3}),
    Document(page_content="Dogs barks at strangers", metadata={"id": 4}),
    Document(page_content="Birds fly at the sky", metadata={"id": 5}),
]

# Embedding docs
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

# Storing in the vector store
vector_store = FAISS.from_documents(docs, embeddings)

# Create retriever
# retriever = vector_store.as_retriever(search_kwargs={"k": 2})

# Query
query = "Which animals are domestic pets?"

# out_docs = retriever.invoke(query)
out_docs = vector_store.similarity_search(query, k=2)

print("Retriever results:")

for doc in out_docs:
    print(f"ID: {doc.metadata.get('id')}, Content: {doc.page_content}")


Retriever results:
ID: 3, Content: Cats sleeps often during the day
ID: 4, Content: Dogs barks at strangers


##### Using mmr

In [None]:
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

# Created docs
docs = [
    Document(page_content="Python is a programming language", metadata={"id": 1}),
    Document(page_content="Java is also a programming language", metadata={"id": 2}),
    Document(page_content="Cats sleeps often during the day", metadata={"id": 3}),
    Document(page_content="Dogs barks at strangers", metadata={"id": 4}),
    Document(page_content="Birds fly at the sky", metadata={"id": 5}),
]

# Embedding docs
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

# Storing in the vector store
vector_store = FAISS.from_documents(docs, embeddings)

# Create retriever
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 5, "lambda_mult": 0.5})

# Query
query = "Which animals are domestic pets?"
out_docs = retriever.invoke(query)

print("Retriever results:")

for doc in out_docs:
    print(f"ID: {doc.metadata.get('id')}, Content: {doc.page_content}")


Retriever results:
ID: 3, Content: Cats sleeps often during the day
ID: 1, Content: Python is a programming language


### BM 25 Retriever

In [14]:
from langchain_core.documents import Document
from langchain_community.retrievers import BM25Retriever

# Created docs
docs = [
    Document(page_content="The sky is blue during the day", metadata={"id": 1}), # sky, blue
    Document(page_content="At night stars light up the sky", metadata={"id": 2}), # sky
    Document(page_content="Blue whales are the largest animal on Earth", metadata={"id": 3}), # blue, animal
    Document(page_content="Birds can fly high up in the sky", metadata={"id": 4}), # sky
    Document(page_content="Deep sea creatures live in the darkness", metadata={"id": 5}), # no matching
]

# Build BM25 retriever
bm_25_retriever = BM25Retriever.from_documents(docs, k=3)

# Run query
query = "sky blue animal"
out_docs = bm_25_retriever.invoke(query)

print("Retriever results:")

for doc in out_docs:
    print(f"ID: {doc.metadata.get('id')}, Content: {doc.page_content}")

Retriever results:
ID: 1, Content: The sky is blue during the day
ID: 3, Content: Blue whales are the largest animal on Earth
ID: 2, Content: At night stars light up the sky


### Ensemble / Hybrid Retriever

In [None]:
# 6:28:26