In [1]:
#!pip install chromadb pypdf

In [2]:
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI

warnings.filterwarnings("ignore")
# restart python kernal if issues with langchain import.

In [3]:
pdf_file = "./storybook.pdf"
pdf_loader = PyPDFLoader(pdf_file)
pages = pdf_loader.load_and_split()


In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

In [5]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [6]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [7]:
question = "What is the moral of the story two frogs"
docs = vector_index.get_relevant_documents(question)

In [34]:
docs

[Document(page_content='Spoken English: Short Stories  \n5 \n LEVEL 1 : STORI ES FOR PRIMARY SCHOOL CHILDREN  \nTHE WIND AND THE SUN  \n \nOnce the Wind and the Sun had an argument. “I am stronger than you,” said the Wind. “No, \nyou are not,” said the Sun. Just at that moment they saw a traveler walking across the road. \nHe was wrapped in a s hawl. The Sun and the Wind agreed that whoever could separate the \ntraveller from his shawl was stronger.  \nThe Wind took the first turn. He blew with all his might to tear the traveller’s shawl from his \nshoulders. But the harder he blew, the tighter the trav eller gripped the shawl to his body. \nThe struggle went on till the Wind’s turn was over.  \nNow it was the Sun’s turn. The Sun smiled warmly. The traveller felt the warmth of the \nsmiling Sun. Soon he let the shawl fall open. The Sun’s smile grew warmer and wa rmer... \nhotter and hotter. Now the traveller no longer needed his shawl. He took it off and dropped \nit on the ground. The

In [27]:
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)

In [28]:
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context, say "answer not available in context" \n\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                  """

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [35]:
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [38]:
stuff_answer = stuff_chain(
    {"input_documents": docs, "question": question}, return_only_outputs=True
)

In [39]:
print(stuff_answer)

{'output_text': "A farmer had puppies he wanted to sell. He put up a sign advertising them. A little boy came up to the farmer and asked to see the puppies. The farmer told him that they were from good parents and cost a lot of money. The boy didn't have enough money, but the farmer let him look at the puppies anyway. The boy noticed one of the puppies was smaller and limping. He asked the farmer about it, and the farmer said that the puppy would never be able to run and play like the others. The boy said he wanted that puppy because he understood what it was like to have a disability. The farmer was touched by the boy's words and gave him the puppy for free."}


In [37]:
question = "Explain me the story puppies for sale in layman terms"

docs = vector_index.get_relevant_documents(question)