In [2]:
# Standard library imports
import os
import io
import getpass

# Third-party library imports
import PyPDF2
import requests
import ipywidgets as widgets
from IPython.display import display, Markdown

# Google Generative AI imports
import google.generativeai as genai  # For using Google's Generative AI

# LangChain imports
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import CharacterTextSplitter
from langchain.schema.document import Document
from langchain_community.document_loaders import TextLoader
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.vectorstores import FAISS

In [None]:
if "GOOGLE_API_KEY" not in os.environ:
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API Key")

In [4]:
# text retrieval through RAG
def generation(retriever):
  llm_text = ChatGoogleGenerativeAI(model="gemini-pro")
  template = """
  ```
  {context}
  ```

  {information}


  Provide brief information.
  """
  prompt = ChatPromptTemplate.from_template(template)

  rag_chain = (
      {"context": retriever, "information": RunnablePassthrough()}
      | prompt
      | llm_text
      | StrOutputParser()
  )
  # Passing text as input data
  result = rag_chain.invoke("what are the Essential prompt keywords. Provide the list ")
  return result

In [5]:
# Method to generate embeddings
def generate_embeddings(text):
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
  vectorstore = FAISS.from_documents(text, embedding=embeddings)
  retriever = vectorstore.as_retriever()
  return retriever

In [6]:
# Method to generate chunks
def get_text_chunks_langchain(text):
  text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=10)
  docs = [Document(page_content=x) for x in text_splitter.split_text(text)]
  return docs

In [None]:
def process_pdf(file_path):
    # PDF processing
    with open(file_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        pdf_pages = pdf_reader.pages

        # Create Context
        context = "\n\n".join(page.extract_text() for page in pdf_pages)

        # Generate chunks
        text = get_text_chunks_langchain(context)

        # Generate embeddings of the text
        retriever = generate_embeddings(text)

        # Generate content
        result =generation(retriever)

        # Display response
        display(Markdown(result))


# Fetch the pdf
pdf_file_path = '/content/Blog.pdf'
process_pdf(pdf_file_path)