In [48]:
from langchain_community.document_loaders import PDFPlumberLoader

In [49]:
loader = PDFPlumberLoader("The hundred page machine learning book.pdf")

In [50]:
result = loader.load()

In [51]:
from langchain.vectorstores import FAISS
from langchain_google_genai import  GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [52]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key="api")

In [53]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(result)

In [54]:
db = FAISS.from_documents(docs, embeddings)
db.save_local("faiss_index")

In [55]:
db = FAISS.load_local("faiss_index",embeddings=embeddings,allow_dangerous_deserialization=True)

In [56]:
from langchain.chains.question_answering import load_qa_chain
from langchain import PromptTemplate

In [57]:
prompt_template = """
  Explore the Text Documents. 
  If the answer is present in the context, provide comprehensive details. 
  If not, derive the answer from the video content anyhow.
  \n\n
  Transcript:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])

In [73]:
question = "who is the author the book"

In [74]:
docs = db.similarity_search(question,k=20)
docs

[Document(page_content="Andriy Burkov's", metadata={'source': 'The hundred page machine learning book.pdf', 'file_path': 'The hundred page machine learning book.pdf', 'page': 51, 'total_pages': 152, 'CreationDate': 'D:20181218050746Z', 'ModDate': 'D:20190122195134Z', 'Producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)'}),
 Document(page_content="Andriy Burkov's", metadata={'source': 'The hundred page machine learning book.pdf', 'file_path': 'The hundred page machine learning book.pdf', 'page': 106, 'total_pages': 152, 'CreationDate': 'D:20181218050746Z', 'ModDate': 'D:20190122195134Z', 'Producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)'}),
 Document(page_content='The\nHundred-\nPage\nMachine\nLearning\nBook\nAndriy Burkov', metadata={'source': 'The hundred page machine learning book.pdf', 'file_path': 'The hundred page machine learning book.pdf', 'page': 0, 'total_pages': 152, 'CreationDate': 'D:20181218050746Z', 'Mod

In [75]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3,google_api_key="api")

In [76]:
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [77]:
response = chain(
    {"input_documents":docs, "question": question}
    , return_only_outputs=True)
response

{'output_text': 'Andriy Burkov'}