# Context Compression

In [16]:
from langchain.document_loaders import WikipediaLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_chroma import Chroma

In [17]:
embedding_function = OllamaEmbeddings(model="all-minilm:33m")
db_connection = Chroma(persist_directory="./db", embedding_function=embedding_function)


### Contextual Compression

In [18]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [19]:
llm = ChatOllama(model="llama3.2", temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

In [20]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=db_connection.as_retriever()
)

In [21]:
docs = db_connection.similarity_search("When was this declassified?")

In [22]:
docs

[]

In [23]:
compressed_docs = compression_retriever.invoke("When was this declassified?")

In [24]:
compressed_docs

[]

In [25]:
# Import necessary libraries
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS

# from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_text_splitters import CharacterTextSplitter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


# Helper function to print documents in a readable format
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


# Step 1: Load and prepare the documents
# Replace 'your_document_file.txt' with the path to your text file
documents = TextLoader("your_document_file.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OllamaEmbeddings(model="all-minilm:33m")
# Step 2: Create a base retriever using a vector store
retriever = FAISS.from_documents(texts, embeddings).as_retriever()

# Step 3: Initialize a document compressor
# Use an LLM to extract the most relevant information
llm = ChatOllama(model="llama3.2", temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

# Step 4: Set up a contextual compression retriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

# Step 5: Query the retriever
query = "What did the president say about Ketanji Brown Jackson?"
compressed_docs = compression_retriever.invoke(query)

# Step 6: Display the results
pretty_print_docs(compressed_docs)


RuntimeError: Error loading your_document_file.txt