In [38]:
import streamlit as st
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

Loading the PDF

In [58]:
from langchain.document_loaders import PyPDFLoader
from langchain.schema import Document
loader = PyPDFLoader("ch31.pdf")
docs = loader.load()

Splitting into Chunks

In [59]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 100)
chunks = text_splitter.split_documents(docs)

Storing in Database

In [42]:
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

db  = FAISS.from_documents(chunks,embeddings)
query = "breeds"
result = db.similarity_search(query)
result[0].page_content

'head shape, body conformation and coat quality. British \ntypes are stocky with a heavier coat. Foreign types are slender and smooth coated. Breeds are also classiﬁ  ed by hair \nlength; Short - hairs and Long - hairs. The difference is due to a single gene, the allele for long coat being recessive. A more recent hair mutation has resulted in three new breeds; the Cornish Rex, the Devon Rex and the American Wire - hair. Colour varieties are caused by less than a dozen mutations. Most seem to affect only pigmentation but that producing blue - eyed white cats is linked with timidity, deafness, ele-\nvated mortality and poor mothering ability. Breeders are now producing breeds in several colours; blurring the dis-tinction between breeds and varieties (a full account of breeds and varieties is provided by Vella  et al.    1999 ).   \n  Sources of  s upply \n It is good practice, and a legal requirement in some countries'

In [43]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context.
Think step by step before providing a detailed answer. I will tip you $1000
if the user finds the answer helpful
<context>
{context}
</context>
Question:{input}                                     
""")

Defining the model

In [50]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model = "gemini-pro")

Creating Chain

In [52]:
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm,prompt)

In [53]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000023A5ABF2920>)

In [55]:
from langchain.chains import create_retrieval_chain
retreival_chain = create_retrieval_chain(retriever,document_chain)

In [56]:
response = retreival_chain.invoke({
    "input" : "Can you tell about the breeds"
})

In [57]:
response["answer"]

'**Step 1: Identify the relevant information in the context.**\n\nThe context mentions that "Cats have not been subject to intensive selective breeding programmes with most breeds originating in single gene mutations or a few combinations." It also states that "Breeds are classiﬁed into British (European or American) and Foreign on the basis of head shape, body conformation and coat quality."\n\n**Step 2: Understand the different types of breeds.**\n\nThe context classifies breeds into British (European or American) and Foreign based on head shape, body conformation, and coat quality. British types are stocky with a heavier coat, while Foreign types are slender and smooth coated.\n\n**Step 3: Summarize the information about breeds.**\n\nBreeds are classified based on physical characteristics, such as head shape, body conformation, and coat quality. British breeds are stocky with a heavier coat, while Foreign breeds are slender and smooth coated.\n\n**Answer:**\n\nBreeds are classified 