In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import RetrievalQA
import langchain

import os
GEMINI_KEY = os.getenv('GEMINI_API_KEY')
os.environ["GOOGLE_API_KEY"] = GEMINI_KEY

In [None]:
# setting up the llm
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=GEMINI_KEY,
    temperature=0.6,
    max_tokens=600,
    timeout=None,
    max_retries=2,
)

In [None]:
# loading some url using langchain web base loader

loader = WebBaseLoader([
    "https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html",
    "https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html"
])

data = loader.load()

In [None]:
# splitting the data into chunks using langchain recursive text splitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,  
    separators=["\n\n", "\n", ".", "!", "?", " ", ""] 
)

# chunks = splitter.split_text(data)
chunks = splitter.split_documents(data)
chunks_str = [chunk.page_content for chunk in chunks]
len(chunks_str)

In [None]:
# embedding of chunks using google genai embedding (different libraries can be used but since we are using google genai llm, better to use that embedding only)
# make sure your os has GOOGLE_API_KEY set
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# models/embedding-001 is a pre-trained embedding model provided by Google's Gemini API.
# It is designed to produce a 768-dimensional vector for each input string, regardless of the length of the text (though longer text may be truncated or summarized internally).

vectors = embeddings.embed_documents(chunks_str)
vectors_rows = len(vectors)
vectors_cols = len(vectors[0])    
print(vectors_rows)
print(vectors_cols)

In [None]:
# build a faiss index for vectors
# Step 3: Create FAISS vector store
vectorstore = FAISS.from_documents(documents=chunks, embedding=embeddings)

# Step 4: Save it to disk
vectorstore.save_local("my_faiss_index") 

In [None]:
# Load it again later using the same embedding model
vectorstore = FAISS.load_local(
	"my_faiss_index",
	embeddings=embeddings,
	allow_dangerous_deserialization=True  # Only set to True if you trust the file source
)

# use the similarity search
# results = vectorstore.similarity_search("Tesla stock", k=2)
# results

In [None]:
# Retrieve similar embeddings for a given question and call LLM to retrieve final answer
# Create a question-answering chain with source citations
# It retrieves relevant documents from the vector store and uses the LLM to answer the question
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
chain

### Stuff Method

In [None]:
# === STUFF METHOD ===
# 1. Retrieve top-k relevant document chunks from the vector store.
# 2. Concatenate (stuff) all retrieved chunks into a single prompt.
# 3. Send that combined context + question to the LLM in one call.
# 4. LLM generates the final answer in a single step.
#
# ✅ Simpler and faster.
# ⚠️ May hit token limits if many or long documents are retrieved.

chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever()
)
# chain 

# ask question
query = "what has been appointed for a period of five years"
# query = "what are the main features of punch iCNG?"
# langchain.debug=True
chain({"question": query}, return_only_outputs=True)

### Map-Reduce Method

In [None]:
# === MAP-REDUCE METHOD ===
# 1. Retrieve top-k relevant document chunks from the vector store.
# 2. MAP: Pass each chunk individually to the LLM with the same question.
#    - The LLM returns an answer or summary per chunk (e.g., fc1, fc2, fc3...).
# 3. REDUCE: Combine all intermediate outputs and pass them again to the LLM.
#    - The LLM synthesizes a final, aggregated answer.
#
# ✅ Handles longer documents and supports more complex reasoning.
# ⚠️ Slower and more expensive (multiple LLM calls).

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_reduce",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

# ask question
query = "what has been appointed for a period of five years"
# query = "what are the main features of punch iCNG?"
# langchain.debug=True
result = chain(query)
print(result["result"])
