In [None]:
# So in this we will be making a RAG Application using LangChain,Gemini and FAISS (whihch is the vector DB)


In [2]:
# Installing necessary dependecies
!pip install langchain langchain-community langchain-google-genai google-generativeai faiss-cpu



In [3]:
GOOGLE_API_KEY = "#######"

In [4]:
# Setting Up the key in the enviorment Variable
import os
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
 

In [5]:
# # We all know the three stages of RAG
# 1. Data Ingestion
# 2. Data Reterivalext
# 3. Data Generation

In [6]:
# Data Ingestion
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS


In [7]:
# Below is the data url basically a wikipedia page
url = "https://en.wikipedia.org/wiki/State_of_the_Union#:~:text=Though%20the%20language%20of%20the,as%20late%20as%20March%207"

In [8]:
response = requests.get(url)
# To convert Html ciontent to normal readable text
soup = BeautifulSoup(response.content, 'html.parser')

In [9]:
# Remove script and style elements
for script in soup(["script", "style"]):
    script.extract()

# Get text content
text = soup.get_text()
# Clean up the text - remove extra whitespace and empty lines
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
clean_text = ' '.join(chunk for chunk in chunks if chunk)

print("First 500 characters of extracted text:")
print(clean_text[:500])

First 500 characters of extracted text:
State of the Union - Wikipedia Jump to content Main menu Main menu move to sidebar hide Navigation Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us Contribute HelpLearn to editCommunity portalRecent changesUpload fileSpecial pages Search Search Appearance Donate Create account Log in Personal tools Donate Create account Log in Pages for logged out editors learn more ContributionsTalk Contents move to sidebar hide (Top) 1 Formality 2 History 3 Delivery of the speech Toggle D


In [10]:
# Now we will store our reposne text as the raw data

In [11]:
# So now we are going to store our data in the file
with open("state_of_the_union.txt", "w", encoding="utf-8") as f:
    f.write(clean_text)

In [12]:
# Now we will perform the data ingestion which basically means we are going to store our data in form of chunks in our Database
loder=TextLoader('state_of_the_union.txt', encoding="utf8")
documents=loder.load()

In [13]:
# Chunking of the Data
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [14]:
# Now we will create an object of the  RecursiveCharacterTextSplitter and pass parameter into it
# In langchain there are multiple textsplitter we can find out 
# So basically what this does is by its own it will do the spliiting in backend it is written in such a way that it will be able to split the stuff
# Chunk size is basically in a particular chunk how many tokens we will consider and chunk overlap means basically overlaaping previous 49 token and taking new 1 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)


In [15]:
text_chunks = text_splitter.split_documents(documents)
print(text_chunks)

[Document(metadata={'source': 'state_of_the_union.txt'}, page_content='State of the Union - Wikipedia Jump to content Main menu Main menu move to sidebar hide Navigation Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us Contribute HelpLearn to editCommunity portalRecent changesUpload fileSpecial pages Search Search Appearance Donate Create account Log in Personal tools Donate Create account Log in Pages for logged out editors learn more ContributionsTalk Contents move to sidebar hide (Top) 1 Formality 2 History 3 Delivery of the speech Toggle'), Document(metadata={'source': 'state_of_the_union.txt'}, page_content='2 History 3 Delivery of the speech Toggle Delivery of the speech subsection 3.1 Invitations 3.2 Protocol of entry into the House chamber 3.3 Designated survivor and other logistics 3.4 Content of the speech 4 Opposition response 5 Significance 6 Local versions 7 Historic speeches 8 TV ratings 9 See also 10 References 11 External links Toggle the table of 

In [16]:
# This text_chunks are nothing but the List 
print(text_chunks[0].page_content)

State of the Union - Wikipedia Jump to content Main menu Main menu move to sidebar hide Navigation Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us Contribute HelpLearn to editCommunity portalRecent changesUpload fileSpecial pages Search Search Appearance Donate Create account Log in Personal tools Donate Create account Log in Pages for logged out editors learn more ContributionsTalk Contents move to sidebar hide (Top) 1 Formality 2 History 3 Delivery of the speech Toggle


In [17]:
# So now we have a chunk of the data now its time to store it so basically to db for that we need to do embedding

In [18]:
from langchain.vectorstores import FAISS


In [23]:
# Install the required packages
!pip install sentence-transformers
!pip install langchain-huggingface

# Use the updated import (this is the correct way now)
from langchain_huggingface import HuggingFaceEmbeddings

# Create local embeddings for vector store
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

print("Local embeddings created successfully!")
print(embeddings)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.0-py3-none-any.whl.metadata (996 bytes)
Downloading langchain_huggingface-0.3.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.3.0
Local embeddings created successfully!
model_name='sentence-transformers/all-MiniLM-L6-v2' cache_folder=None model_kwargs={} encode_kwargs={} query_encode_kwargs={} multi_process=False show_progress=False


In [21]:
!pip install faiss-cpu



In [24]:
# Now further we going to create object of the FAISS aswell FAISS is a inmemory Database
vectorstore = FAISS.from_documents(text_chunks, embeddings)
print(vectorstore)

<langchain_community.vectorstores.faiss.FAISS object at 0x746c1207ba90>


In [25]:
# So firt we will make our vector source as retiever which will be basically useful to retrieve data for LLM 
# So basically what is does is simply retrieves the Data so what happens here is basically when you will do query based on your query retivever will send the most perfect response to your LLM based on your query
retriever = vectorstore.as_retriever()

In [29]:
from langchain.prompts import ChatPromptTemplate

In [30]:
template="""You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use ten sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""

In [33]:
# Now we will Import GenAi Chat Model
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

In [34]:
# Creating prompt Template
prompt = ChatPromptTemplate.from_template(template)

In [40]:
# Now using Gemini as LLM
llm_model = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=GOOGLE_API_KEY,
    temperature=0.1,
    # convert_system_message_to_human=True
)

In [41]:
# Output parser
output_parser = StrOutputParser()

In [42]:
# This is simply the LangChain Expression to do multiple things in one go
# Use of RunnablePass through is basically I can take the query from Run Time itself
rag_chain = (
    {"context":retriever,"question":RunnablePassthrough()}
    | prompt
    | llm_model
    | output_parser
)

In [52]:
# query = "What is the State of the Union address?"
query = "Expalin me aout State of the Union address in brief"
result = rag_chain.invoke(query)
print("Question:", query)
print("Answer:", result)

Question: Expalin me aout State of the Union address in brief
Answer: The State of the Union Address is an annual message delivered by the President of the United States to a joint session of Congress. It typically occurs near the beginning of most calendar years. The address reports on the nation's budget, economy, news, agenda, progress, and achievements. It also outlines the president's legislative proposals for the coming year. Woodrow Wilson giving his first State of the Union address on December 2, 1913, was the first time since 1801 that such an address was made in person before a joint session of Congress, initiating the modern trend.
