### Import Libraries

In [26]:
import os
import warnings
warnings.filterwarnings("ignore")
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import sentence_transformers
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
import time
from tqdm.auto import tqdm
from pprint import pprint


from dotenv import load_dotenv
load_dotenv(override=True)

True

In [2]:
# check the cwd
%pwd

'd:\\RAGProjects\\Q&A\\RAG-Q-A-ChatBot\\NoteBook'

In [3]:
# back to the main dire
os.chdir("..")
%pwd

'd:\\RAGProjects\\Q&A\\RAG-Q-A-ChatBot'

### Work with the data

In [4]:
# function to extract the data from the pdf files
def extracted_data(data_dir):
     loader = DirectoryLoader(path=data_dir,
                              glob="*.pdf",
                              loader_cls=PyPDFLoader)
     document = loader.load()
     return document

extracted_data = extracted_data("DataSet/")

In [5]:
# extracted_data


In [6]:
# function to split the data into chunks
def chunking(extracted_data):
     splitter = RecursiveCharacterTextSplitter(
                                   chunk_size=1000,
                                   chunk_overlap=200,
                                   separators=["\n\n", "\n", ".", " ", ""],
                                   is_separator_regex=False
                                   )
     text_chunks = splitter.split_documents(extracted_data)
     return text_chunks

text_chunks = chunking(extracted_data=extracted_data)
print(f"Text Chunks: {len(text_chunks)}")

Text Chunks: 1101


### Model Embedding

In [7]:
# function to download the embedding model
def download_embedding_model():
     embedding_model = HuggingFaceEmbeddings(model_name="ibm-granite/granite-embedding-125m-english")
     return embedding_model

embedding_model = download_embedding_model() 

  embedding_model = HuggingFaceEmbeddings(model_name="ibm-granite/granite-embedding-125m-english")
No sentence-transformers model found with name ibm-granite/granite-embedding-125m-english. Creating a new one with mean pooling.


In [8]:
embedding_model

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
), model_name='ibm-granite/granite-embedding-125m-english', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [9]:
# test for embedding model
embed = embedding_model.embed_query("Hello Word")
print(f"Dimension: {len(embed)}")

Dimension: 768


### Create the Pinecone vector database

In [10]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "question-answer-chatbot"
pc.create_index(name=index_name,
                dimension=768,
                metric="cosine",
                spec=ServerlessSpec(cloud="aws", region="us-east-1"))

{
    "name": "question-answer-chatbot",
    "metric": "cosine",
    "host": "question-answer-chatbot-u6z9n1i.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 768,
    "deletion_protection": "disabled",
    "tags": null
}

In [11]:
def initialize_pinecone_vector_store(documents, index_name, embedding, retries=3, delay=5):
    for attempt in range(retries):
        try:
            docsearch = PineconeVectorStore.from_documents([], index_name=index_name, embedding=embedding)
            print("✅ Pinecone Vector Store created.")

            for i in tqdm(range(0, len(documents)), desc="Uploading documents", unit="doc"):
                docsearch.add_documents([documents[i]])

            print("✅ All documents uploaded successfully.")
            return docsearch

        except Exception as e:
            print(f"\n❌ Attempt {attempt + 1} failed: {e}")
            if attempt < retries - 1:
                time.sleep(delay)
            else:
                raise e


docsearch = initialize_pinecone_vector_store(
    documents=text_chunks,
    index_name=index_name,
    embedding=embedding_model
)

✅ Pinecone Vector Store created.


Uploading documents: 100%|██████████| 1101/1101 [13:11<00:00,  1.39doc/s]

✅ All documents uploaded successfully.





In [12]:
# load the existing index
doc_search = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embedding_model)
doc_search

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x203787706d0>

### Retrival and the LLM model

In [13]:
# create the retriever 
retriever = doc_search.as_retriever(search_type="similarity", search_kwargs={"k":10})

In [14]:
# test the retriever
retrieved_docs = retriever.invoke("Who Is This Book For?")
retrieved_docs

[Document(id='67de6514-03c7-409f-9162-c846ea2c89c4', metadata={'creationdate': '', 'creator': 'Zamzar', 'page': 11.0, 'page_label': '12', 'producer': 'Zamzar', 'source': 'DataSet\\Natural Language Processing with Transformers Building Language Applications with Hugging Face by Lewis Tunstall  Leandro von Werra  Thomas Wolf.pdf', 'total_pages': 479.0}, page_content='Who Is This Book For?\nThis book is written for data scientists and machine learning engineers who\nmay have heard about the recent breakthroughs involving transformers, but\nare lacking an in-depth guide to help them adapt these models to their own\nuse cases. The book is not meant to be an introduction to machine learning,\nand we assume you are comfortable programming in Python and has a basic\nunderstanding of deep learning frameworks like PyTorch and TensorFlow.\nWe also assume you have some practical experience with training models\non GPUs. Although the book focuses on the PyTorch API of \nTransformers, Chapter 2 show

In [None]:
# LLMs model
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=GEMINI_API_KEY)

system_prompt = (
    "You are a knowledgeable and professional AI assistant specializing in providing accurate information from the given context. "
    "Your role is to:\n\n"
    "1. Provide clear, concise, and accurate answers based solely on the provided context\n"
    "2. If the context doesn't contain enough information to fully answer a question, acknowledge this limitation\n"
    "3. Maintain a professional and helpful tone while ensuring factual accuracy\n"
    "4. Use direct quotes from the context when relevant to support your answers\n"
    "5. Organize complex responses in a structured, easy-to-read format\n"
    "6. If you need to make assumptions, explicitly state them\n\n"
    "Remember:\n"
    "- Stay within the scope of the provided context\n"
    "- Avoid making up information or speculating beyond the given content\n"
    "- If multiple interpretations are possible, present them clearly\n"
    "- Maintain consistency in your responses\n\n"
    "Format your responses in a clear, professional manner using appropriate markdown formatting when helpful.\n\n"
    "Context:\n{context}\n\n"
    "Question: {input}\n\n"
    "Answer: "
)

prompt = ChatPromptTemplate.from_template(system_prompt)

question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
response = rag_chain.invoke({"input": "Give me a summary for this book."})

In [27]:
pprint(response["answer"])

('This book provides a hands-on approach to building products with '
 'state-of-the-art language-processing features using transformer models. It '
 'covers a range of topics, including:\n'
 '\n'
 '*   Text summarization: Chapter 6 "digs into the complex '
 'sequence-to-sequence task of text summarization and explores the metrics '
 'used for this task."\n'
 '*   Other topics include text generation, question answering, and making '
 'transformers efficient in production.\n'
 '\n'
 'The book also provides "a wealth of tips and tricks for getting everything '
 'to work efficiently."')
