In [1]:
import os
import time
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from dotenv import load_dotenv
load_dotenv()
# Load the GROQ and OpenAI API keys
groq_api_key = os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"] = os.getenv('GOOGLE_API_KEY')

print("RAG Doc Assistant Using Llama3")

RAG Doc Assistant Using Llama3


In [7]:
# Initialize the LLM
llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="Llama3-8b-8192"
)

In [None]:
# prompt = ChatPromptTemplate.from_template(
#     """
#     you are a bhagwatgita chatbot , you will have to provide the responses based on the bhagwatgita document provided by the user
#     and the question asked about it you have to provide responses mostly based on the document not in general.
#     <context>
#     {context}
#     <context>
#     Questions:{input}
#     """
# )

In [9]:
# Create the prompt template
prompt = ChatPromptTemplate.from_template(
    """
    You are a document assistant that helps users find information in a context.
    Please provide the most accurate response based on the context and inputs.
    Only give information that is in the context, not general information.
    <context>
    {context}
    <context>
    Questions: {input}
    """
)


In [3]:
# Function to process the uploaded PDF and create vector embeddings
def vector_embedding(file_path):
    print("Processing file:", file_path)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    loader = PyPDFLoader(file_path)  # Load PDF
    docs = loader.load()  # Document Loading
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)  # Chunk Creation
    final_documents = text_splitter.split_documents(docs[:20])  # Splitting
    vectors = FAISS.from_documents(final_documents, embeddings)  # Vector embeddings
    print("Vector Store DB is ready.")
    return vectors

In [None]:
# Input the PDF file path
file_path = input("Enter the path to the PDF file: ").strip()
if not os.path.isfile(file_path):
    print("File not found. Please provide a valid file path.")
else:
    vectors = vector_embedding(file_path)

    # Start the question loop
    while True:
        question = input("Enter your question (or type 'exit' to quit): ").strip()
        if question.lower() == 'exit':
            break
        
        # Create the retrieval chain and get the response
        document_chain = create_stuff_documents_chain(llm, prompt)
        retriever = vectors.as_retriever()
        retrieval_chain = create_retrieval_chain(retriever, document_chain)
        
        start = time.process_time()
        response = retrieval_chain.invoke({'input': question})
        print("Response time:", time.process_time() - start)
        print("Answer:", response['answer'])
        
        # Display similar document chunks
        print("\nDocument Similarity Search Results:")
        for i, doc in enumerate(response["context"]):
            print(f"Chunk {i + 1}:\n{doc.page_content}")
            print("--------------------------------")

Processing file: D:\codespace\ML\NYD\Vedanta_Datasets\PDFs\A4_Gita_Sattvik_Rajasik_Tamasik_Qualities.pdf
Vector Store DB is ready.
Response time: 0.0
Answer: Based on the provided context, it appears that "Gita" refers to the Bhagavad Gita, a Hindu scripture that is part of the Indian epic, the Mahabharata.

Document Similarity Search Results:
Chunk 1:
begrudgingly, is considered to 
be Rajasic.
[Gita 17.21]
The gift that is given in the 
wrong place and at the wrong 
time, to unworthy persons, 
without respect or with insult, is 
declared to be of a Tamasic 
nature.
[Gita 17.22]
ज्ञान
(Knowledge)
That by which one sees the 
unified and indestructible 
reality in all beings, know 
that knowledge to be Sattvic.
[Gita 18.20]
The knowledge, by which one 
perceives the manifold 
entities of different kinds, as 
varying from one another, is 
said to be Rajasic.
[Gita 18.21]
But that which clings blindly to 
one idea as if it were all, without 
logic, significance, or insight, is 
called Tam