In [3]:
!pip install -U \
  langchain \
  langchain-core \
  langchain-community \
  langchain-openai \
  faiss-cpu \
  openai \
  tiktoken


Collecting langchain
  Downloading langchain-1.2.7-py3-none-any.whl.metadata (4.9 kB)
Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-openai
  Downloading langchain_openai-1.1.7-py3-none-any.whl.metadata (2.6 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting langgraph<1.1.0,>=1.0.7 (from langchain)
  Downloading langgraph-1.0.7-py3-none-any.whl.metadata (7.4 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclas

In [4]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_core.documents import Document

In [None]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="""
        Contextual compression is a technique used in retrieval-augmented
        generation systems. While many documents contain useful information,
        they often also include background explanations, historical notes,
        or marketing language. ContextualCompressionRetriever uses an LLM
        to extract only the parts of a document that are relevant to a user query.
        """,
        metadata={"source": "doc1"}
    ),
    Document(
        page_content="""
        FAISS is a popular vector database developed by Facebook AI Research.
        It supports efficient similarity search and clustering of dense vectors.
        In practical RAG pipelines, FAISS is often paired with retrievers.
        However, the retrieved documents may still contain unnecessary sections,
        which is where contextual compression becomes useful.
        """,
        metadata={"source": "doc2"}
    ),
    Document(
        page_content="""
        Traditional retrievers return full documents or chunks even if only
        a single sentence is relevant. Contextual compression solves this by
        selectively extracting relevant passages before sending them to the LLM.
        This helps reduce token usage and improve answer quality.
        """,
        metadata={"source": "doc3"}
    ),
    Document(
        page_content="""
        In many enterprise knowledge bases, documents are written by different
        teams and include inconsistent formatting, redundant explanations,
        and off-topic sections. ContextualCompressionRetriever helps clean
        these documents dynamically at query time.
        """,
        metadata={"source": "doc4"}
    ),
    Document(
        page_content="""
        Large language models can act as intelligent filters. When used as
        document compressors, they analyze retrieved text and keep only
        information that directly answers the user question.
        The rest of the content is discarded.
        """,
        metadata={"source": "doc5"}
    ),
    Document(
        page_content="""
        Compression retrievers are especially helpful when documents are long
        or when chunking alone is not sufficient. Instead of relying only on
        embedding similarity, an LLM evaluates semantic relevance in context.
        """,
        metadata={"source": "doc6"}
    ),
    Document(
        page_content="""
        Contextual compression introduces additional LLM calls, which can
        increase latency and cost. For this reason, it is typically used
        after an initial retrieval step rather than as a standalone retriever.
        """,
        metadata={"source": "doc7"}
    ),
    Document(
        page_content="""
        When users ask focused questions, such as asking for a definition
        or a specific mechanism, contextual compression can remove
        introductory sections and examples that are not directly relevant.
        """,
        metadata={"source": "doc8"}
    ),
    Document(
        page_content="""
        In contrast to reranking, which orders documents by relevance,
        contextual compression modifies document content itself by
        extracting only the most useful spans of text.
        """,
        metadata={"source": "doc9"}
    ),
    Document(
        page_content="""
        A common pattern in RAG systems is: retrieve broadly, then compress.
        This allows the system to maintain high recall while still providing
        concise, high-signal context to the language model.
        """,
        metadata={"source": "doc10"}
    ),
]


In [None]:
embedding_model=OpenAIEmbeddings()
vectorstore=FAISS.from_documents(documents,embedding_model)


In [None]:
base_retriever=vectorstore.as_retriever(search_kwargs={"k":5})


In [None]:
#Set up the compressor using an LLM
llm=ChatOpenAI(temperature=0)
compressor=LLMChainExtractor.from_llm(llm)

In [None]:
compression_retriever=ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

In [None]:
#Query the retriever
query="What is photosynthesis?"
compressed_docs=compression_retriever.invoke(query)