In [1]:
# Install required packages
!pip install torch transformers sentence-transformers PyMuPDF tqdm pandas numpy



In [2]:
# Import all required libraries
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM
import fitz  # PyMuPDF for PDF processing
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import textwrap
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("All libraries imported successfully!")

All libraries imported successfully!


In [3]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineG

In [4]:
# Initialize GPU and models
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load embedding model
print("Loading embedding model...")
embedding_model = SentenceTransformer("all-mpnet-base-v2", device=device)
print("Embedding model loaded successfully!")

# Load Gemma LLM model
print("Loading Gemma LLM model...")
model_id = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
llm_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(device)
print("Gemma LLM model loaded successfully!")

# Global variables for chatbot
conversation_history = []
pdf_chunks = []
pdf_embeddings = None
current_pdf_path = None

print("\n All models loaded and ready to use!")

Using device: cpu
Loading embedding model...
Embedding model loaded successfully!
Loading Gemma LLM model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Gemma LLM model loaded successfully!

 All models loaded and ready to use!


In [5]:
def process_pdf(pdf_path):
    """
    Extracts text from a PDF file and splits it into pages.
    """
    doc = fitz.open(pdf_path)
    pages_and_texts = []
    for page_number, page in tqdm(enumerate(doc), desc="Processing PDF pages"):
        text = page.get_text().replace("\n", " ").strip()
        if text:  # Only add non-empty pages
            pages_and_texts.append({"page_number": page_number + 1, "text": text})
    doc.close()
    return pages_and_texts

def chunk_text(text, max_tokens=128):
    """
    Splits a large text into smaller chunks of a specified maximum token length.
    """
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) + 1 > max_tokens:
            if current_chunk:  # Only add non-empty chunks
                chunks.append(" ".join(current_chunk))
            current_chunk = [word]
            current_length = len(word)
        else:
            current_chunk.append(word)
            current_length += len(word) + 1

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

def load_and_process_pdf(pdf_path):
    """
    Loads and processes a PDF file, returning text chunks.
    """
    global pdf_chunks, pdf_embeddings, current_pdf_path

    if not os.path.exists(pdf_path):
        print(f" Error: PDF file not found at {pdf_path}")
        return False

    try:
        print(f" Loading PDF: {os.path.basename(pdf_path)}")

        # Process PDF
        pages_and_texts = process_pdf(pdf_path)

        # Create chunks from all pages
        all_chunks = []
        for page in pages_and_texts:
            page_chunks = chunk_text(page["text"], max_tokens=128)
            all_chunks.extend(page_chunks)

        pdf_chunks = all_chunks
        current_pdf_path = pdf_path

        print(f" PDF processed successfully! ({len(pdf_chunks)} chunks created)")
        return True

    except Exception as e:
        print(f" Error processing PDF: {str(e)}")
        return False

print(" PDF processing functions defined successfully!")

 PDF processing functions defined successfully!


In [6]:
def embed_chunks(chunks):
    """
    Embeds text chunks using the embedding model.
    """
    if not chunks:
        return None

    print(f" Creating embeddings for {len(chunks)} chunks...")
    embeddings = embedding_model.encode(chunks, batch_size=32, convert_to_tensor=True, show_progress_bar=True)
    print(" Embeddings created successfully!")
    return embeddings

def create_pdf_embeddings():
    """
    Creates embeddings for the currently loaded PDF chunks.
    """
    global pdf_embeddings

    if not pdf_chunks:
        print(" No PDF chunks found. Please load a PDF first.")
        return False

    pdf_embeddings = embed_chunks(pdf_chunks)
    if pdf_embeddings is not None:
        print(f" Created embeddings for {len(pdf_chunks)} chunks")
        return True
    else:
        print(" Failed to create embeddings")
        return False

print(" Embedding functions defined successfully!")

 Embedding functions defined successfully!


In [7]:
def retrieve_relevant_chunks(query, embeddings, chunks, top_k=5):
    """
    Retrieves the most relevant chunks for a query using cosine similarity.
    """
    if not query or embeddings is None or not chunks:
        return []

    # Generate query embedding
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)

    # Calculate cosine similarity
    scores = util.cos_sim(query_embedding, embeddings)[0]

    # Get top k results
    top_results = torch.topk(scores, k=min(top_k, len(chunks)))

    # Return chunks with their similarity scores
    relevant_chunks = []
    for idx in top_results.indices:
        chunk_text = chunks[idx]
        similarity_score = scores[idx].item()
        relevant_chunks.append((chunk_text, similarity_score))

    return relevant_chunks

def display_retrieved_chunks(relevant_chunks):
    """
    Display the retrieved chunks with their similarity scores.
    """
    if not relevant_chunks:
        print(" No relevant chunks found.")
        return

    print(f"\n Retrieved {len(relevant_chunks)} relevant chunks:")
    print("=" * 60)

    for i, (chunk, score) in enumerate(relevant_chunks, 1):
        print(f"\n Chunk {i} (Similarity: {score:.3f}):")
        print(f" {chunk[:200]}{'...' if len(chunk) > 200 else ''}")
        print("-" * 60)

print(" Context retrieval functions defined successfully!")

 Context retrieval functions defined successfully!


In [8]:
def format_chatbot_prompt(query, context_items, conversation_history):
    """
    Formats the query, context, and conversation history into a structured prompt for the LLM.
    """
    # Format context
    context = "\n".join([f"- {item}" for item in context_items])

    # Include recent conversation history (last 3 exchanges)
    history_context = ""
    if conversation_history:
        recent_history = conversation_history[-6:]  # Last 3 Q&A pairs
        history_context = "\n".join([f"Previous: {entry}" for entry in recent_history])

    # Create the prompt
    prompt = f"""You are a helpful AI assistant that answers questions based on the provided context from a PDF document.
Use the context to provide accurate and helpful answers. If the context doesn't contain enough information to answer the question, say so politely.

Context from PDF:
{context}

{history_context}

Current Question: {query}

Answer:"""
    return prompt.strip()

def generate_response(query):
    """
    Generates a response for a given query using the RAG pipeline.
    """
    try:
        # Check if we have embeddings
        if pdf_embeddings is None or not pdf_chunks:
            return {
                'answer': "Please load a PDF first before asking questions.",
                'verification': "No PDF loaded ",
                'context_items': []
            }

        # Retrieve relevant chunks
        relevant_chunks = retrieve_relevant_chunks(query, pdf_embeddings, pdf_chunks, top_k=5)
        context_items = [item[0] for item in relevant_chunks]

        if not context_items:
            return {
                'answer': "I couldn't find relevant information in the PDF to answer your question.",
                'verification': "No relevant context found ",
                'context_items': []
            }

        # Format the prompt with conversation history
        prompt = format_chatbot_prompt(query, context_items, conversation_history)

        # Generate response
        input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)

        with torch.no_grad():
            outputs = llm_model.generate(
                input_ids["input_ids"],
                max_new_tokens=256,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        # Decode the response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the new response (after the prompt)
        if "Answer:" in full_response:
            answer = full_response.split("Answer:")[-1].strip()
        else:
            answer = full_response[len(prompt):].strip()

        # Clean up the answer
        answer = answer.replace("Assistant:", "").strip()

        return {
            'answer': answer,
            'context_items': context_items,
            'relevant_chunks': relevant_chunks
        }

    except Exception as e:
        print(f" Error generating response: {str(e)}")
        return {
            'answer': "I encountered an error while generating the response. Please try again.",
            'verification': f"Error: {str(e)} ",
            'context_items': []
        }

print(" LLM response generation functions defined successfully!")

 LLM response generation functions defined successfully!


In [9]:
def verify_facts(answer, context_items, threshold=0.7):
    """
    Verifies if the generated answer aligns with the retrieved context.
    """
    if not answer or not context_items:
        return "No content to verify "

    try:
        # Combine all context items
        combined_context = " ".join(context_items)

        # Generate embeddings for answer and context
        answer_embedding = embedding_model.encode(answer, convert_to_tensor=True)
        context_embedding = embedding_model.encode(combined_context, convert_to_tensor=True)

        # Calculate similarity
        similarity_score = util.cos_sim(answer_embedding, context_embedding).item()

        # Verify based on threshold
        if similarity_score >= threshold:
            return f"Fact Verified (Similarity: {similarity_score:.3f})"
        else:
            return f"Fact Verification Uncertain (Similarity: {similarity_score:.3f})"

    except Exception as e:
        return f"Verification Error ({str(e)})"

def display_verification_details(answer, context_items):
    """
    Display detailed verification information.
    """
    verification_result = verify_facts(answer, context_items)

    print(f"\n Fact Verification:")
    print(f" {verification_result}")

    if context_items:
        print(f" Based on {len(context_items)} context chunks")
        print(f" Context relevance helps ensure answer accuracy")

print(" Fact verification functions defined successfully!")

 Fact verification functions defined successfully!


In [10]:
def clear_conversation():
    """
    Clears the conversation history.
    """
    global conversation_history
    conversation_history = []
    print(" Conversation history cleared!")
    if current_pdf_path:
        print(f" Current PDF: {os.path.basename(current_pdf_path)}")

def display_pdf_info():
    """
    Displays information about the currently loaded PDF.
    """
    if current_pdf_path and pdf_chunks:
        print(f"\ Current PDF: {os.path.basename(current_pdf_path)}")
        print(f" Total chunks: {len(pdf_chunks)}")
        print(f" Average chunk length: {sum(len(chunk.split()) for chunk in pdf_chunks) / len(pdf_chunks):.1f} words")
        print(f" Embeddings: {' Ready' if pdf_embeddings is not None else ' Not created'}")
    else:
        print(" No PDF currently loaded.")

def display_help():
    """
    Display help information.
    """
    print("\n Available commands:")
    print("  • Ask any question about the loaded PDF")
    print("  • 'clear' - Clear conversation history")
    print("  • 'info' - Show PDF information")
    print("  • 'help' - Show this help message")
    print("  • 'quit' or 'exit' - Exit the chatbot")
    print("  • 'show context' - Display retrieved context for last question")

def load_pdf_interactive():
    """
    Interactive PDF loading with user input.
    """
    while True:
        pdf_path = input("\n Enter the path to your PDF file: ").strip()

        if pdf_path.lower() in ['quit', 'exit', 'q']:
            print(" Goodbye!")
            return False

        if not pdf_path:
            print(" Please enter a valid PDF path.")
            continue

        if load_and_process_pdf(pdf_path):
            if create_pdf_embeddings():
                display_pdf_info()
                return True
            else:
                print(" Failed to create embeddings. Please try another PDF.")
        else:
            print(" Please try another PDF or 'quit' to exit.")

print(" Helper functions defined successfully!")

 Helper functions defined successfully!


In [11]:
def chat_with_pdf():
    """
    Main interactive chat loop.
    """
    global conversation_history

    print("\n" + "="*60)
    print(" RAG PDF Q&A Chatbot - Interactive Version")
    print("="*60)
    print(" Powered by Gemma LLM and SentenceTransformers")
    print(" Type 'help' for available commands")
    print("="*60)

    # Store last response for context display
    last_response = None

    while True:
        try:
            # Get user input
            user_input = input("\n You: ").strip()

            # Handle special commands
            if user_input.lower() in ['quit', 'exit', 'q']:
                print(" Thank you for using the RAG PDF Chatbot! Goodbye!")
                break

            elif user_input.lower() == 'clear':
                clear_conversation()
                continue

            elif user_input.lower() == 'help':
                display_help()
                continue

            elif user_input.lower() == 'info':
                display_pdf_info()
                continue

            elif user_input.lower() == 'show context':
                if last_response and 'relevant_chunks' in last_response:
                    display_retrieved_chunks(last_response['relevant_chunks'])
                else:
                    print(" No recent context available. Ask a question first.")
                continue

            elif not user_input:
                print(" Please enter a question or command.")
                continue

            # Check if PDF is loaded
            if not pdf_chunks or pdf_embeddings is None:
                print(" Please load a PDF first!")
                continue

            # Generate response
            print(" AI is thinking...")
            response = generate_response(user_input)

            if response and response['answer']:
                timestamp = datetime.now().strftime("%H:%M:%S")

                # Display the response with text wrapping
                wrapped_answer = textwrap.fill(response['answer'], width=80) # Adjust width as needed
                print(f"\n[{timestamp}]  AI: {wrapped_answer}")

                # Display verification if we have context
                if response['context_items']:
                    verification_result = verify_facts(response['answer'], response['context_items'])
                    print(f" {verification_result}")

                # Update conversation history
                conversation_history.append(f"Q: {user_input}")
                conversation_history.append(f"A: {response['answer']}")

                # Keep only last 10 exchanges to manage memory
                if len(conversation_history) > 20:
                    conversation_history = conversation_history[-20:]

                # Store for context display
                last_response = response

            else:
                print(" Sorry, I couldn't generate a response. Please try again.")

        except KeyboardInterrupt:
            print("\n\n Chat interrupted. Goodbye!")
            break
        except Exception as e:
            print(f" Error: {str(e)}")
            print(" Please try again or type 'help' for commands.")

print(" Main chatbot loop function defined successfully!")

 Main chatbot loop function defined successfully!


In [12]:
# Main execution - Load PDF and start chatbot
print(" Welcome to the RAG PDF Q&A Chatbot!")
print(f" Running on: {device}")
print(" All models loaded and ready!")

# Load PDF first
if load_pdf_interactive():
    # Start the interactive chat
    chat_with_pdf()
else:
    print(" No PDF loaded. Please restart and try again.")

 Welcome to the RAG PDF Q&A Chatbot!
 Running on: cpu
 All models loaded and ready!

 Enter the path to your PDF file: /content/Abraham Silberschatz-Operating System Concepts (9th,2012_12).pdf
 Loading PDF: Abraham Silberschatz-Operating System Concepts (9th,2012_12).pdf


Processing PDF pages: 0it [00:00, ?it/s]

 PDF processed successfully! (18831 chunks created)
 Creating embeddings for 18831 chunks...


Batches:   0%|          | 0/589 [00:00<?, ?it/s]

 Embeddings created successfully!
 Created embeddings for 18831 chunks
\ Current PDF: Abraham Silberschatz-Operating System Concepts (9th,2012_12).pdf
 Total chunks: 18831
 Average chunk length: 20.0 words
 Embeddings:  Ready

 RAG PDF Q&A Chatbot - Interactive Version
 Powered by Gemma LLM and SentenceTransformers
 Type 'help' for available commands

 You: what is an operating system?


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


 AI is thinking...

[10:18:20]  AI: An operating system is a program that manages a computer’s hardware. It also provides a basis for programs that are executed on behalf of the user.
 Fact Verified (Similarity: 0.930)

 You: what is cpu scheduling
 AI is thinking...

[10:20:50]  AI: CPU scheduling is the task of selecting a waiting process from the ready queue and allocating the CPU to it.
 Fact Verified (Similarity: 0.825)

 You: what is disk scheduling
 AI is thinking...

[10:23:21]  AI: Disk scheduling is the task of selecting a waiting disk from the ready queue and allocating the disk to it.
 Fact Verified (Similarity: 0.759)

 You: what are the diffrent types of memory
 AI is thinking...

[10:25:42]  AI: The context does not provide information about the different types of memory, so I cannot answer this question from the context.
 Fact Verification Uncertain (Similarity: 0.690)

 You: what is the memory hierarchy
 AI is thinking...

[10:28:39]  AI: The context does not provide i