In [None]:
pip install langchain langchain_community langchain_groq faiss-cpu

Collecting langchain_community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain_groq
  Downloading langchain_groq-0.3.4-py3-none-any.whl.metadata (2.6 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting groq<1,>=0.28.0 (from langchain_groq)
  Downloading groq-0.29.0-py3-none-any.whl.metadata (16 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecti

In [None]:
pip install sentence-transformers



In [None]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

# Vanilla Vector Store Retriever

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [None]:
docs = TextLoader("/content/state_of_the_union.txt").load()
len(docs)

1

In [None]:
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = splitter.split_documents(docs)
len(texts)

95

In [None]:
from google.colab import userdata
import os
GROQ_API_KEY = userdata.get("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
vector_store = FAISS.from_documents(texts, embeddings)
retriever = vector_store.as_retriever()

In [None]:
response = retriever.invoke("What did the president say about Ketanji Brown Jackson")
pretty_print_docs(response)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:

We cannot let this happen. 

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
----------------------------------------------------------

In [None]:
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

In [None]:
llm = ChatGroq(model="gemma2-9b-it")

In [None]:
chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

In [None]:
query = "What were the top three priorities outlined in the most recent State of the Union address?"
response = chain.invoke(query)
response

{'query': 'What were the top three priorities outlined in the most recent State of the Union address?',
 'result': 'Based on the text provided, here are the top three priorities outlined in the most recent State of the Union address:\n\n1. **Ending the shutdown of schools and businesses:** This includes getting people back to work, returning to in-person work in the federal government, and keeping schools open. \n2. **Economic support for American families:** This involves passing the Paycheck Fairness Act and paid leave, raising the minimum wage to $15 an hour, extending the Child Tax Credit, increasing Pell Grants, and supporting HBCUs and community colleges.\n3. **Strengthening worker rights and protecting vulnerable populations:**  This includes passing the PRO Act to support unionization and soon strengthening the Violence Against Women Act.  \n\n\n\nLet me know if you have any other questions. \n'}

# Contextual Compression with LLMChainExtractor

In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_groq import ChatGroq

In [None]:
llm = ChatGroq(model= "gemma2-9b-it")

In [None]:
compressor = LLMChainExtractor.from_llm(llm)

In [None]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
    )

In [None]:
compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

> And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


In [None]:
compressed_docs = compression_retriever.invoke(
    "What were the top three priorities outlined in the most recent State of the Union address?"
)
pretty_print_docs(compressed_docs)

Document 1:

Let’s pass the Paycheck Fairness Act and paid leave.  

Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. 

Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges. 

And let’s pass the PRO Act when a majority of workers want to form a union—they shouldn’t be stopped.
----------------------------------------------------------------------------------------------------
Document 2:

So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.  

First, beat the opioid epidemic. 

There is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.


#  Contextual Compression with LLMChainFilter

In [None]:
from langchain_groq import ChatGroq
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.retrievers import ContextualCompressionRetriever

In [None]:
llm = ChatGroq(model = "gemma2-9b-it")

In [None]:
filter = LLMChainFilter.from_llm(llm)

In [None]:
compression_retriever_filter = ContextualCompressionRetriever(
    base_compressor=filter, base_retriever=retriever
)

In [None]:
compressed_docs = compression_retriever_filter.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


In [None]:
compressed_docs = compression_retriever_filter.invoke(
    "What were the top three priorities outlined in the most recent State of the Union address?"
)
pretty_print_docs(compressed_docs)

Document 1:

Let’s pass the Paycheck Fairness Act and paid leave.  

Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. 

Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges. 

And let’s pass the PRO Act when a majority of workers want to form a union—they shouldn’t be stopped.


#  Contextual Compression with EmdeddingFilter

In [None]:
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq

In [None]:
llm = ChatGroq(model = "gemma2-9b-it")

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
embedding_filter = EmbeddingsFilter(embeddings = embeddings , similarity_threshold = 0.35)

In [None]:
compression_retriever_embedding = ContextualCompressionRetriever(
    base_compressor=embedding_filter, base_retriever=retriever
)

In [None]:
compressed_docs = compression_retriever_embedding.invoke(
    "What did the president say about Ketanji Jackson Brown?"
)
pretty_print_docs(compressed_docs)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


In [None]:
embedding_filter = EmbeddingsFilter(embeddings = embeddings , similarity_threshold = 0.45)

compression_retriever_embedding = ContextualCompressionRetriever(
    base_compressor=embedding_filter, base_retriever=retriever
)

In [None]:
compressed_docs = compression_retriever_embedding.invoke(
    "What were the top three priorities outlined in the most recent State of the Union address?"
)
pretty_print_docs(compressed_docs)

Document 1:

Because I see the future that is within our grasp. 

Because I know there is simply nothing beyond our capacity. 

We are the only nation on Earth that has always turned every crisis we have faced into an opportunity. 

The only nation that can be defined by a single word: possibilities. 

So on this night, in our 245th year as a nation, I have come to report on the State of the Union. 

And my report is this: the State of the Union is strong—because you, the American people, are strong.


# DocumentCompressorPipeline

In [None]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline, EmbeddingsFilter
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain.text_splitter import CharacterTextSplitter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

In [None]:
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
### To Reduce the Redudancy in the Embedings
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)

In [None]:
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.3)

In [None]:
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter ,redundant_filter, relevant_filter]
)

In [None]:
compression_retriever_pipeline = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

In [None]:
compressed_docs = compression_retriever_pipeline.invoke(
    "What were the top three priorities outlined in the most recent State of the Union address?"
)
pretty_print_docs(compressed_docs)

Document 1:

So on this night, in our 245th year as a nation, I have come to report on the State of the Union. 

And my report is this: the State of the Union is strong—because you, the American people, are strong.
----------------------------------------------------------------------------------------------------
Document 2:

Third – we can end the shutdown of schools and businesses. We have the tools we need. 

It’s time for Americans to get back to work and fill our great downtowns again.  People working from home can feel safe to begin to return to the office.   

We’re doing that here in the federal government
----------------------------------------------------------------------------------------------------
Document 3:

Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges. 

And let’s pass the PRO Act when a majority of workers want to form a union—

In [None]:
llm = ChatGroq(model = "gemma2-9b-it", temperature = 0.4, verbose = True)

In [None]:
chain = RetrievalQA.from_chain_type(
    llm, retriever=compression_retriever_pipeline
)

In [None]:
query = "What were the top three priorities outlined in the most recent State of the Union address?"
response = chain.invoke(query)
print(response['result'])

Based on the text provided, here are three priorities outlined in the State of the Union address:

1. **Ending the shutdown of schools and businesses:** The speaker emphasizes the need to get Americans back to work and fill downtowns.
2. **Investing in education:**  The address mentions increasing Pell Grants, supporting HBCUs, and investing in community colleges.
3. **Supporting workers' rights:**  The speaker calls for passing the PRO Act to allow workers to form unions more easily. 


Let me know if you have any other questions. 

