In [82]:
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.schema import Document
from dotenv import load_dotenv
from langchain.retrievers.document_compressors import LLMChainExtractor
load_dotenv()

True

In [83]:
documents = [
    Document(
        page_content="Virat Kohli is a former captain of the Indian cricket team and one of the greatest modern batsmen. He is known for his aggressive leadership and consistency across formats.",
        metadata={"name": "Virat Kohli", "role": "Batsman", "era": "Modern", "format": "All"}
    ),
    Document(
        page_content="Sachin Tendulkar, also known as the Little Master, is a legendary Indian cricketer with 100 international centuries. He played international cricket for over 24 years.",
        metadata={"name": "Sachin Tendulkar", "role": "Batsman", "era": "Classic", "format": "All"}
    ),
    Document(
        page_content="MS Dhoni is a former Indian captain famous for his calm demeanor and finishing ability. He led India to victories in the 2007 T20 World Cup, 2011 ODI World Cup, and 2013 Champions Trophy.",
        metadata={"name": "MS Dhoni", "role": "Wicketkeeper-Batsman", "era": "Modern", "format": "Limited Overs"}
    ),
    Document(
        page_content="Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.",
        metadata={"name": "Rohit Sharma", "role": "Batsman", "era": "Modern", "format": "ODI/T20"}
    ),
    Document(
        page_content="Jasprit Bumrah is an Indian fast bowler known for his unorthodox action and deadly yorkers. He is a key player across all formats, especially in death overs.",
        metadata={"name": "Jasprit Bumrah", "role": "Bowler", "era": "Modern", "format": "All"}
    )
]

In [84]:
vector_store = FAISS.from_documents(documents=documents , embedding=GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001"))

In [101]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3, "lambda_mult": 0.5}, search_type="mmr")

In [102]:
query = "Best captain of India on the basis of runs"

In [103]:
results = retriever.invoke(query)

In [104]:
results

[Document(id='79081b8c-97b7-494b-92e5-1286dd693a15', metadata={'name': 'Virat Kohli', 'role': 'Batsman', 'era': 'Modern', 'format': 'All'}, page_content='Virat Kohli is a former captain of the Indian cricket team and one of the greatest modern batsmen. He is known for his aggressive leadership and consistency across formats.'),
 Document(id='f496d193-cb49-4a86-a873-e4dcd69b4912', metadata={'name': 'Rohit Sharma', 'role': 'Batsman', 'era': 'Modern', 'format': 'ODI/T20'}, page_content='Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.'),
 Document(id='efaafe60-e967-49a3-8e3e-2fdae7331e02', metadata={'name': 'Sachin Tendulkar', 'role': 'Batsman', 'era': 'Classic', 'format': 'All'}, page_content='Sachin Tendulkar, also known as the Little Master, is a legendary Indian cricketer with 100 international centuries. He played international cricket for over 24 years.')]

In [105]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite")
compressor = LLMChainExtractor.from_llm(llm)

In [106]:
compressor_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

In [109]:
query = "How to be a best captain?"

results = compressor_retriever.invoke(query)

In [110]:
results

[Document(metadata={'name': 'Virat Kohli', 'role': 'Batsman', 'era': 'Modern', 'format': 'All'}, page_content='Virat Kohli is a former captain of the Indian cricket team and one of the greatest modern batsmen. He is known for his aggressive leadership and consistency across formats.')]