In [11]:
# 1. Imports
import os
from dotenv import load_dotenv

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.schema.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load API key
load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)

embedding = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")


# 1. Prepare documents
documents = [Document(page_content="LangChain helps build LLM-powered apps with memory and agents.", metadata={"id": "1"}),
        Document(page_content="Agents in LangChain use tools to answer questions.", metadata={"id": "2"})]

# 2. Setup child splitter
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)


# 5. Compress the documents using LLMChainExtractor
compressor = LLMChainExtractor.from_llm(llm)
compressed_docs = compressor.compress_documents(documents, query="Summarize the key points")

print("Compressed Summary:")
for doc in compressed_docs:
    print("-", doc.page_content)

# 6. Now ask a question about the compressed content using a custom LLMChain
qa_prompt = PromptTemplate.from_template(
    "Given the context below, answer the question:\n\nContext:\n{context}\n\nQuestion: {question}"
)
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

query = "What is the main idea of the document?"

# Response without Compression
response_without = qa_chain.invoke({
    "context": "\n".join([doc.page_content for doc in documents]),
    "question": query
})

#Response with compression
response_with = qa_chain.invoke({
    "context": "\n".join([doc.page_content for doc in compressed_docs]),
    "question": query
})


print("\n ANSWER WITH OUT COMPRESSION")
print("==============================")
print(response_without["text"])
print("Length before compression",len(response_without["text"]))
print("Token count without compression",llm.get_num_tokens(response_without["text"]))


print("\n ANSWER WITH COMPRESSION")
print("============================")
print(response_with["text"])
print("Length after compression",len(response_with["text"]))
print("Token count after compression",llm.get_num_tokens(response_with["text"]))

Compressed Summary:
- LangChain helps build LLM-powered apps with memory and agents.
- Agents in LangChain use tools to answer questions.

 ANSWER WITH OUT COMPRESSION
The main idea of the document is that LangChain is a framework for building applications powered by Large Language Models (LLMs), and it provides features like memory and agents that can use tools to answer questions.
Length before compression 217
Token count without compression 41

 ANSWER WITH COMPRESSION
The main idea of the document is that LangChain is a framework for building applications powered by Large Language Models (LLMs), and it achieves this by providing tools like memory and agents, where agents use tools to answer questions.
Length after compression 237
Token count after compression 45


Why Length and Tokens Changed Even After Compression?
*****************************************************

Important:

LLMChainExtractor compresses documents, NOT the final answer.

The answer may become longer because the LLM explains more clearly even if the input summary is shorter.

So the compression reduces input text, not necessarily output answer length.