In [1]:
# Helper function for printing docs


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

documents = TextLoader(
    "../data/input/romeo_and_juliet.txt",
).load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

embedding = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={"k": 20})

query = "How did Romeo die?"
docs = retriever.invoke(query)
pretty_print_docs(docs)

Document 1:

Of stout Mercutio, and then Tybalt fled.
But by and by comes back to Romeo,
Who had but newly entertain’d revenge,
And to’t they go like lightning; for, ere I
Could draw to part them was stout Tybalt slain;
And as he fell did Romeo turn and fly.
This is the truth, or let Benvolio die.
----------------------------------------------------------------------------------------------------
Document 2:

The form of death. Meantime I writ to Romeo
That he should hither come as this dire night
To help to take her from her borrow’d grave,
Being the time the potion’s force should cease.
But he which bore my letter, Friar John,
Was stay’d by accident; and yesternight
Return’d my letter back. Then all alone
At the prefixed hour of her waking
Came I to take her from her kindred’s vault,
Meaning to keep her closely at my cell
Till I conveniently could send to Romeo.
----------------------------------------------------------------------------------------------------
Document 3:

ROMEO.
Nu

In [24]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_compressors import LLMLinguaCompressor
from langchain_openai import ChatOpenAI
from langchain.retrievers.document_compressors import (
    CrossEncoderReranker,
    DocumentCompressorPipeline,
    EmbeddingsFilter,
    LLMChainExtractor,
)
from flashrank import Ranker
from langchain_community.document_transformers import EmbeddingsRedundantFilter
import os
import logging
from langchain_community.document_compressors import FlashrankRerank
from flashrank import Ranker

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir=os.path.expanduser("~/.cache/flashrank"))
llm = ChatOpenAI(model="meta-llama/llama-3.3-70b-instruct",
                api_key=os.getenv("OPENROUTER_API_KEY"),
                openai_api_base="https://openrouter.ai/api/v1",
                temperature=0,
                streaming=False)

# CHANGE: Add LLMLinguaCompressor initialization
lingua_compressor = LLMLinguaCompressor(
    model_name="openai-community/gpt2",
    device_map="cpu"
)

# CHANGE: Reordered pipeline for optimal performance
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[

        # 1. Remove redundant documents (still fast, works on reduced set)
        EmbeddingsRedundantFilter(
            embeddings=embedding, similarity_threshold=0.95
        ),
        # 2. Filter by embeddings similarity first (fastest, removes irrelevant docs)
        EmbeddingsFilter(
            embeddings=embedding, similarity_threshold=0.6
        ),

        # 3. Rerank remaining documents (more expensive, but on smaller set)
        FlashrankRerank(client=ranker),

        # 5. Final LLM-based extraction (most expensive, on compressed content)
        LLMChainExtractor.from_llm(llm),  # CHANGE: Fixed reference to use llm instead of self.llm
                # 4. Compress content while preserving key information (before LLM extraction)
        lingua_compressor,
    ]
)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

logger.info("Starting document compression and retrieval")
compressed_docs = compression_retriever.invoke("Who is Paris to Juliet?")
logger.info(f"Retrieved {len(compressed_docs)} compressed documents")

pretty_print_docs(compressed_docs)

INFO:__main__:Starting document compression and retrieval
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:Retrieved 3 compressed documents


Document 1:

Paris is Juliet's fiancé, as indicated by the line "I hear thou must, and nothing may prorogue it, On Thursday next be married to this County" which refers to Paris, although he is referred to as "this County" in this context, his name is mentioned earlier as "PARIS". 
Extracted relevant parts: 
PARIS.
...
I hear thou must, and nothing may prorogue it,
On Thursday next be married to this County.
----------------------------------------------------------------------------------------------------
Document 2:

He told me Paris should have married Juliet.
----------------------------------------------------------------------------------------------------
Document 3:

PARIS.
Do not deny to him that you love me.
JULIET.
I will confess to you that I love him.


In [28]:
logger.info("Starting document compression and retrieval")
compressed_docs = compression_retriever.invoke("How does the feud between the Montagues and Capulets contribute to both Romeo and Juliet’s deaths, and how could this have been avoided based on earlier events in the play?")
logger.info(f"Retrieved {len(compressed_docs)} compressed documents")

pretty_print_docs(compressed_docs)

INFO:__main__:Starting document compression and retrieval
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:Retrieved 3 compressed documents


Document 1:

CAPULET, head of a Veronese family at feud with the Montagues. 
TYBALT, nephew to Lady Capulet.
SAMPSON, servant to Capulet.
GREGORY, servant to Capulet.
----------------------------------------------------------------------------------------------------
Document 2:

LADY CAPULET.
He is a kinsman to the Montague.
Affection makes him false, he speaks not true.
Some twenty of them fought in this black strife,
And all those twenty could but kill one life.
I beg for justice, which thou, Prince, must give;
Romeo slew Tybalt, Romeo must not live.
PRINCE.
Romeo slew him, he slew Mercutio.
Who now the price of his dear blood doth owe?
MONTAGUE.
Not Romeo, Prince, he was Mercutio’s friend;
His fault concludes but what the law should end,
The life of Tybalt.
----------------------------------------------------------------------------------------------------
Document 3:

MONTAGUE, head of a Veronese family at feud with the Capulets. 
ROMEO, son to Montague.


In [29]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_compressors import LLMLinguaCompressor
from langchain_openai import ChatOpenAI
from langchain.retrievers.document_compressors import (
   CrossEncoderReranker,
   DocumentCompressorPipeline,
   EmbeddingsFilter,
   LLMChainExtractor,
)
from flashrank import Ranker
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.document_compressors import FlashrankRerank
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize components
ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir=os.path.expanduser("~/.cache/flashrank"))
llm = ChatOpenAI(model="meta-llama/llama-3.3-70b-instruct",
               api_key=os.getenv("OPENROUTER_API_KEY"),
               openai_api_base="https://openrouter.ai/api/v1",
               temperature=0,
               streaming=False)

lingua_compressor = LLMLinguaCompressor(
   model_name="openai-community/gpt2",
   device_map="cpu"
)

# Document compression pipeline
pipeline_compressor = DocumentCompressorPipeline(
   transformers=[
       # 1. Remove redundant documents (still fast, works on reduced set)
       EmbeddingsRedundantFilter(
           embeddings=embedding, similarity_threshold=0.95
       ),
       # 2. Filter by embeddings similarity first (fastest, removes irrelevant docs)
       EmbeddingsFilter(
           embeddings=embedding, similarity_threshold=0.6
       ),
       # 3. Rerank remaining documents (more expensive, but on smaller set)
       FlashrankRerank(client=ranker),
       # 4. Compress content while preserving key information (before LLM extraction)
       lingua_compressor,
       # 5. Final LLM-based extraction (most expensive, on compressed content)
       LLMChainExtractor.from_llm(llm),
   ]
)

# Compression retriever
compression_retriever = ContextualCompressionRetriever(
   base_compressor=pipeline_compressor, base_retriever=retriever
)

# QA Chain Setup
qa_prompt = PromptTemplate(
   template="""Use the following pieces of context to answer the question at the end. 
   If you don't know the answer, just say that you don't know, don't try to make up an answer.

   {context}

   Question: {question}
   Answer:""",
   input_variables=["context", "question"]
)

qa_chain = RetrievalQA.from_chain_type(
   llm=llm,
   chain_type="stuff",
   retriever=compression_retriever,
   chain_type_kwargs={"prompt": qa_prompt},
   return_source_documents=True
)

logger.info("QA chain initialized successfully")

# Execute QA chain
question = "Who is Paris to Juliet?"
logger.info(f"Processing question: {question}")

result = qa_chain.invoke({"query": question})

logger.info(f"Answer: {result['result']}")
logger.info(f"Retrieved {len(result['source_documents'])} source documents")

# Print results
print(f"Question: {question}")
print(f"Answer: {result['result']}")
print(f"\nSource Documents ({len(result['source_documents'])}):")
for i, doc in enumerate(result['source_documents'], 1):
   print(f"{i}. {doc.page_content[:200]}...")

def pretty_print_docs(docs):
   """Utility function to pretty print documents"""
   print(f"\n{'='*50}")
   print(f"COMPRESSED DOCUMENTS ({len(docs)})")
   print(f"{'='*50}")
   for i, doc in enumerate(docs, 1):
       print(f"\nDocument {i}:")
       print(f"Content: {doc.page_content}")
       print(f"Metadata: {doc.metadata}")
       print("-" * 30)

INFO:__main__:QA chain initialized successfully
INFO:__main__:Processing question: Who is Paris to Juliet?
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:Answer: Paris is Juliet's fiancé, as indicated by Friar Lawrence's mention of their upcoming marriage on Thursday.
INFO:__main__:Retrieved 3 source documents


Question: Who is Paris to Juliet?
Answer: Paris is Juliet's fiancé, as indicated by Friar Lawrence's mention of their upcoming marriage on Thursday.

Source Documents (3):
1. He told me Paris should have married Juliet....
2. PARIS. should devotionJuliet early will rill ad and keep holy._] 
JULIET. 
FAR LAWRENCE.O Juliet, I already strains past compass of my witsI thou, may prorogue, Thursday next be married this County...
3. PARIS.
Poor soul, thy face is much abus’d with tears.
JULIET.
The tears have got small victory by that;
For it was bad enough before their spite.
PARIS.
Thou wrong’st it more than tears with that repo...


In [31]:

# Execute QA chain
# question = "How does the feud between the Montagues and Capulets contribute to both Romeo and Juliet's deaths, and how could this have been avoided based on earlier events in the play?"
question = "How did Romeo die?"
logger.info(f"Processing question: {question}")

result = qa_chain.invoke({"query": question})

logger.info(f"Answer: {result['result']}")
logger.info(f"Retrieved {len(result['source_documents'])} source documents")

# Print results
print(f"Question: {question}")
print(f"Answer: {result['result']}")
print(f"\nSource Documents ({len(result['source_documents'])}):")
for i, doc in enumerate(result['source_documents'], 1):
   print(f"{i}. {doc.page_content[:200]}...")

def pretty_print_docs(docs):
   """Utility function to pretty print documents"""
   print(f"\n{'='*50}")
   print(f"COMPRESSED DOCUMENTS ({len(docs)})")
   print(f"{'='*50}")
   for i, doc in enumerate(docs, 1):
       print(f"\nDocument {i}:")
       print(f"Content: {doc.page_content}")
       print(f"Metadata: {doc.metadata}")
       print("-" * 30)

INFO:__main__:Processing question: How did Romeo die?
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:Answer: I don't know. The given context only mentions that Romeo fell and turned to fly, but it does not provide information about his death.
INFO:__main__:Retrieved 1 source documents


Question: How did Romeo die?
Answer: I don't know. The given context only mentions that Romeo fell and turned to fly, but it does not provide information about his death.

Source Documents (1):
1. And as he fell did Romeo turn and fly....
