In [46]:
from dotenv import load_dotenv , find_dotenv
import os
dotenv_path = find_dotenv()

load_dotenv(find_dotenv())
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")

if api_key is None:
    raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in environment variables")


In [47]:
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator 
from langchain.embeddings import HuggingFaceHubEmbeddings
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoModel
from transformers import AutoModelForCausalLM
from langchain_community.vectorstores import FAISS


In [48]:
#loading the PDF
loader  = PyPDFLoader("C:\\Users\\Admin\\Documents\\gen_ai_training\\pdfs\\rag.pdf")
docs = loader.load()

#splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100, add_start_index=True)
chunks = text_splitter.split_documents(docs)


In [49]:
#Model 
modelPath = "sentence-transformers/all-mpnet-base-v2"
#HuggingFace Embedding 
embeddings = HuggingFaceHubEmbeddings(model=modelPath)

db = FAISS.from_documents(chunks, embedding=embeddings)  # Pass embeddings as parameter
print(f"Total chunks indexed: {db.index.ntotal}")

Total chunks indexed: 276


In [53]:
# Create a pipeline for text generation
generation_model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(generation_model_name)
generation_model = AutoModelForCausalLM.from_pretrained(generation_model_name)


generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)  # device=0 for GPU, -1 for CPU

def generate_response(query, db, top_k=4):
    # Perform similarity search to retrieve relevant text chunks
    chunks = db.similarity_search(query, k=top_k)

    # Combine the content of the top-k chunks into a single text string
    combined_text = "\n".join([chunk.page_content for chunk in chunks])

    # Prepare the prompt
    prompt = f"Based on the following content, answer the query and if it's not in the content then say 'I don't know': {query}\n\nContent:\n{combined_text}\n\nAnswer:"

    # Generate response using Hugging Face model
    response = generator(prompt, max_new_tokens=150, num_return_sequences=1)
    
    # Extract the response text
    response_text = response[0]["generated_text"].strip()
    
    # Return response along with metadata
    return response_text


In [54]:
query = "explain thoroughly the 3 types of RAG?"
response = generate_response(query, db)
print(f"Response to '{query}': {response}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Response to 'explain thoroughly the 3 types of RAG?': Based on the following content, answer the query and if it's not in the content then say 'I don't know': explain thoroughly the 3 types of RAG?

Content:
question, form a comprehensive prompt that empowers LLMs
to generate a well-informed answer.
The RAG research paradigm is continuously evolving, and
we categorize it into three stages: Naive RAG, Advanced
RAG, and Modular RAG, as showed in Figure 3. Despite
RAG method are cost-effective and surpass the performance
of the native LLM, they also exhibit several limitations.
The development of Advanced RAG and Modular RAG is
a response to these specific shortcomings in Naive RAG.
A. Naive RAG
a response to these specific shortcomings in Naive RAG.
A. Naive RAG
The Naive RAG research paradigm represents the earli-
est methodology, which gained prominence shortly after the
In the development of RAG technology, there is a clear
trend towards different specialization directions, such as: 1