In [1]:
!pip install langchain langchain-google-genai faiss-cpu python-dotenv chromadb langchain_community pypdf


Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.8-py3-none-any.whl.metadata (3.6 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting chromadb
  Downloading chromadb-0.6.2-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.14-py3-none-any.whl.metadata (2.9 kB)
Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (

In [2]:
import os
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.retrievers import BaseRetriever
from typing import List
from langchain.docstore.document import Document
from pydantic import BaseModel, Field

from google.colab import userdata
api_key = userdata.get('GOOGLE_API_KEY1')

# Load environment variables from a .env file
load_dotenv()
os.environ["GOOGLE_API_KEY"] = api_key

# Load embedding model
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [3]:
# Load data
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("/content/FinancialServices.pdf")
documents = loader.load()

# Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.split_documents(documents)

# Create vectorstore
vectorstore = Chroma.from_documents(documents, embeddings)

# Create retriever
retriever = vectorstore.as_retriever()

# Create LLM
llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-flash", max_tokens=4000)

# Create compression retriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

# Checking compressed doc
compressed_docs = compression_retriever.invoke("what are points on a mortgage")
print(compressed_docs)

[Document(metadata={'page': 0, 'source': '/content/FinancialServices.pdf'}, page_content='Response: Points on a mortgage are fees paid directly to the lender at closing in exchange for a reduced interest rate. This is also known as "buying down the rate, " which can lower your monthly mortgage payments. Each point costs 1% of your mortgage'), Document(metadata={'page': 1, 'source': '/content/FinancialServices.pdf'}, page_content='Response: Points on a mortgage are fees paid directly to the lender at closing in exchange for a  reduced interest rate. This is also known as "buying down the rate, " which can lower your monthly  mortgage payments. Each point costs 1% of your mortgage amount.'), Document(metadata={'page': 0, 'source': '/content/FinancialServices.pdf'}, page_content='• Discount Points: These are prepaid interest on the mortgage loan. Each discount point \ntypically lowers the interest rate by 0.25%. \n• Origination Points: These are fees charged by the lender for processing t

In [4]:
# Create document chain
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

template = """
You are a helpful assistant that answers questions based on the following context.
If you don't find the answer in the context, just say that you don't know.
Context: {context}

Question: {input}

Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

# Setup RAG pipeline
rag_chain = (
    {"context": compression_retriever, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Response
response = rag_chain.invoke("what are points on a mortgage")
print("Contextual RAG Output:", response)

Contextual RAG Output: Points on a mortgage are fees paid directly to the lender at closing in exchange for a reduced interest rate.  This is also known as "buying down the rate," which lowers monthly mortgage payments. Each point costs 1% of the mortgage amount.  There are two types: discount points (prepaid interest lowering the interest rate by approximately 0.25% per point) and origination points (fees for loan processing that don't affect the interest rate).


In [7]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "what are points on a mortgage ?"})
print("RAG Output:", response["answer"])


RAG Output: Points on a mortgage are fees paid to the lender at closing to reduce the interest rate, a process called "buying down the rate."  Each point costs 1% of the mortgage amount and lowers monthly payments.
