In [None]:
from dotenv import load_dotenv
from langchain_groq import ChatGroq

load_dotenv()
chat_model=ChatGroq(model="llama3-8b-8192")

In [None]:
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain_core.tools import tool

def load_pdf(pdfpath: str) -> list:
    """Load pdf at a particular path(only once per file)."""

    if not os.path.exists(pdfpath):
        raise FileNotFoundError(f"PDF file '{pdfpath}' not found.")

    loader = PyPDFLoader(pdfpath)
    pdf = loader.load()
    return pdf

In [4]:
pdf = load_pdf("sp.pdf")
pdf

[Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'creationdate': '2024-09-05T04:52:38+00:00', 'moddate': '2024-09-11T16:38:45+00:00', 'source': 'sp.pdf', 'total_pages': 231, 'page': 0, 'page_label': '1'}, page_content='Master Spring & Spring Boot\nwith Hibernate & React\n1'),
 Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'creationdate': '2024-09-05T04:52:38+00:00', 'moddate': '2024-09-11T16:38:45+00:00', 'source': 'sp.pdf', 'total_pages': 231, 'page': 1, 'page_label': '2'}, page_content='Top frameworks in the Java world today\nSpring Framework\nSpring Boot\nBeginners find the first steps very diﬀicult:\nLot of terminology: Dependency Injection, IOC, Auto\nwiring, Auto configuration, Starter Projects ..\nVariety of applications: Web app, REST API, Full Stack\nVariety of other framework, tool and platform\nintegrations: Maven, Gradle, Spring Data, JPA, Hibernate,\nDocker and Cloud\nGetting Started\n2'),
 Document(metadata={'producer': 'iLovePDF', 'c

In [5]:
def text_extract(pdf:list) -> str:
    """
        Extracts the full text of loaded pdf
    """
    full_text = "\n".join([doc.page_content for doc in pdf])
    return full_text

In [6]:
extracted_text = text_extract(pdf)
extracted_text



In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_pdf(pdf:list) -> list:
    """
        Splits the pdf loaded list into chunks
    """
    splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
    splitted_text = splitter.split_documents(pdf)
    return splitted_text

In [17]:
splitted_text = split_pdf(pdf)

In [18]:
splitted_text

[Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'creationdate': '2024-09-05T04:52:38+00:00', 'moddate': '2024-09-11T16:38:45+00:00', 'source': 'sp.pdf', 'total_pages': 231, 'page': 0, 'page_label': '1'}, page_content='Master Spring & Spring Boot\nwith Hibernate & React\n1'),
 Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'creationdate': '2024-09-05T04:52:38+00:00', 'moddate': '2024-09-11T16:38:45+00:00', 'source': 'sp.pdf', 'total_pages': 231, 'page': 1, 'page_label': '2'}, page_content='Top frameworks in the Java world today\nSpring Framework\nSpring Boot'),
 Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'creationdate': '2024-09-05T04:52:38+00:00', 'moddate': '2024-09-11T16:38:45+00:00', 'source': 'sp.pdf', 'total_pages': 231, 'page': 1, 'page_label': '2'}, page_content='Spring Boot\nBeginners find the first steps very diﬀicult:'),
 Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'creationdate': '2024-09-05T0

In [20]:
from langchain_community.vectorstores import Chroma
import os
from langchain_huggingface import HuggingFaceEmbeddings

os.environ['HF_HOME'] = 'C:/Users/Aditya/Desktop/Langchain/Langchain_Models/LOCALINSTALLEDMODELS'

embedding_model =  HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
PERSIST_DIR = "./chroma_persist"
CHROMA_COLLECTION_NAME = "pdf_docs"

def embed_pdf(splitted_text:list)->Chroma:
    """
        Embedds the chunks into vectors and store in vector space
        Returns Vector Embeddings
    """
     
    if os.path.exists(PERSIST_DIR) and os.listdir(PERSIST_DIR):
        vectordb = Chroma(
            persist_directory=PERSIST_DIR,
            embedding_function=embedding_model,
            collection_name=CHROMA_COLLECTION_NAME
        )
    else:
        vectordb = Chroma.from_documents(
            splitted_text,
            embedding_model,
            persist_directory=PERSIST_DIR,
            collection_name=CHROMA_COLLECTION_NAME
        )
        vectordb.persist()

    return vectordb


In [21]:
vectordb = embed_pdf(splitted_text)

  vectordb.persist()


In [None]:
def get_similar_pages(user_query:str,vectordb:Chroma)->list:
    vectordb_retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 5})
    similar_pages = vectordb_retriever.get_relevant_documents(user_query)
    return similar_pages

In [35]:
similar_pages = get_similar_pages("What is hibernate",vectordb)
similar_pages

[Document(metadata={'creationdate': '2024-09-05T04:52:38+00:00', 'source': 'sp.pdf', 'creator': 'Decktape', 'moddate': '2024-09-11T16:38:45+00:00', 'producer': 'iLovePDF', 'page': 68, 'page_label': '69', 'total_pages': 231}, page_content='Who manages the entities?\nHibernate is one of the popular\nimplementations of JPA'),
 Document(metadata={'moddate': '2024-09-11T16:38:45+00:00', 'source': 'sp.pdf', 'total_pages': 231, 'creator': 'Decktape', 'producer': 'iLovePDF', 'creationdate': '2024-09-05T04:52:38+00:00', 'page_label': '69', 'page': 68}, page_content='Using Hibernate directly would result in a\nlock in to Hibernate'),
 Document(metadata={'producer': 'iLovePDF', 'creator': 'Decktape', 'moddate': '2024-09-11T16:38:45+00:00', 'page_label': '64', 'creationdate': '2024-09-05T04:52:38+00:00', 'page': 63, 'source': 'sp.pdf', 'total_pages': 231}, page_content='COURSE table\n04: Use JPA and Hibernate to play\nwith COURSE table\n05: Use Spring Data JPA to play'),
 Document(metadata={'page_

In [36]:
from langchain_core.tools import tool

def answer_question_on_query(similar_pages:list,user_query:str):
    """
    Answer a question based on input text from the vector store.
    Retrievers similar pages from store and gives answer
    """
    question = user_query

    prompt = f"Answer the question using the following context:\n\n{similar_pages}\n\nQuestion: {question} also add your suggestions in the answer if lack of context"
    return chat_model.invoke(prompt).content


In [None]:
answer_question_on_query(similar_pages,"what is hibernate")

In [38]:
from langchain_core.tools import tool

def summarize_on_topic(similar_pages:list,topic:str):
    """
    Answer a question based on input text from the vector store.
    Retrievers similar pages from store and gives answer
    """

    prompt = f"Summarize the following content in detail:\n\n{topic} using the following context:\n\n {similar_pages} also add your suggestions in the summary if lack of context"
    return chat_model.invoke(prompt).content


In [None]:
summarize_on_topic(similar_pages,"Hibernate")

In [None]:
def generate_quiz_on_topic(similar_pages:list,user_query:str):
    """
    Generate a 10-question quiz from text 
    Returns a set of 10 questions with answers
    """
    prompt = f"Based on the following content {similar_pages}, create a quiz of 10 questions with answers at the end:\n\n{user_query} also add your suggestions in the quiz if lack of context"
    return chat_model.invoke(prompt).content
    

In [None]:
generate_quiz_on_topic(similar_pages,"Hibernate")

In [None]:
def summarize_text(extracted_text:str):
    """
    Summarize text tool
    Returns Summary of provided Chunks
    """
    prompt = f"Summarize the following content in detail:\n\n{extracted_text}"
    return chat_model.invoke(prompt).content

In [None]:
summarize_text(extracted_text)

In [None]:
def generate_quiz(extracted_text: str):
    """
    Generate a 10-question quiz from text 
    Returns a set of 10 questions with answers
    """
    prompt = (
        "Based on the following content, create a quiz of 10 questions "
        "with answers at the end:\n\n"
        f"{extracted_text}"
    )
    return chat_model.invoke(prompt).content

In [None]:
generate_quiz(extracted_text)