In [1]:
import os
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pinecone
from dotenv import load_dotenv
from uuid import uuid4 
load_dotenv()

# Load local embedding model (768-dim)
embedding_model = SentenceTransformer("all-mpnet-base-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [58]:
def is_safe_and_relevant(query):
    # Safety check
    inappropriate_keywords = [
        "kill", "sex", "bomb", "racist", "suicide", "violence", "attack", "hack"
    ]

    lowered_query = query.lower()
    for word in inappropriate_keywords:
        if word in lowered_query:
            return False, "Your question contains inappropriate or unsafe content."

    # 🔍 LLM-based domain relevance check
    relevance_prompt = f"""
You are an expert assistant in data science, artificial intelligence, and linear algebra. Respond clearly, accurately, and with step-by-step reasoning when necessary.

- Always assume the user may be continuing a conversation and respond appropriately to follow-ups like "elaborate" or "explain more."
- Do not reject questions that are vague or short; instead, ask clarifying questions if needed.
- Avoid giving generic or evasive responses. Be specific and clear, even if the answer is limited.
- Only say a question is unrelated if it clearly has no connection to data science, AI, or linear algebra.
- If unsure, admit it instead of guessing or hallucinating. Say "I'm not sure" and explain what additional info you need.


Question: {query}
"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": relevance_prompt}],
        temperature=0.0
    )

    if response.choices[0].message.content.strip().lower() != "yes":
        return False, "Your question seems unrelated to data science or linear algebra."

    return True, ""

In [59]:
import os

from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)
question = input("What is your Question?")
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": question,
        }
    ],
    model="llama-3.3-70b-versatile",
)

print(chat_completion.choices[0].message.content)

It seems like you didn't type anything. Please feel free to ask me anything or start a conversation, and I'll do my best to help.


In [60]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone client
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = 'procurement-chatbot'  # make sure this is correct in your .env

if not index_name:
    raise ValueError("INDEX_NAME is not set in your .env file")

index = pc.Index('procurement-chatbot')






In [61]:
def load_pdfs_from_folder(folder_path="data"):
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Folder '{folder_path}' does not exist. Check the path.")

    documents = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".pdf"):
            loader = PyPDFLoader(os.path.join(folder_path, filename))
            pages = loader.load_and_split()
            for i, page in enumerate(pages):
                documents.append({
                    "content": page.page_content,
                    "metadata": {"filename": filename, "page_number": i + 1}
                })
    return documents

In [62]:
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=512,
        chunk_overlap=100,
        length_function=len,
        is_separator_regex=True,
    )

    chunks = []
    for doc in documents:
        split_texts = text_splitter.split_text(doc["content"])
        for i, chunk_content in enumerate(split_texts):
            chunks.append({
                "content": chunk_content,
                "metadata": {**doc["metadata"], "chunk_id": i}
            })
    return chunks

Embedding function

In [63]:
def get_embedding(text="None"):
    embedding = embedding_model.encode(text).tolist()
    return embedding

In [64]:
# This should be in one of your earlier cells (e.g., Cell 3 in our previous numbering)
from uuid import uuid4 # Ensure this is imported at the top of your notebook

def get_embedding(text="None"):
    # You can use this simpler version, or your MockResponse version, either is fine for the embedding itself
    embedding = embedding_model.encode(text).tolist()
    return embedding

def upsert_chunks_to_pinecone(index, chunks, batch_size=100):
    vectors = []
    for i, chunk in enumerate(chunks):
        content = chunk["content"]
        metadata = chunk.get("metadata", {})

        # IMPORTANT: This line ensures the 'text' key is added to the metadata
        metadata["text"] = content

        embedding = get_embedding(content) # Get embedding after 'content' is part of metadata
        
        # Use uuid4 for unique IDs, consistent with your original notebook's intent
        vector_id = str(uuid4())

        vectors.append((vector_id, embedding, metadata))

        if len(vectors) == batch_size or i == len(chunks) - 1:
            index.upsert(vectors=vectors)
            print(f"Upserted batch ending at chunk {i + 1}")
            vectors = []
    print(f"All {len(chunks)} vectors upserted to Pinecone.")

In [65]:
def count_tokens(text):
    return len(text.split())


In [24]:
# Replace with your actual path
data_path = r"C:\Users\Taj\Documents\Procurement_chatbot\data"

# Load, split, embed, and upsert
documents = load_pdfs_from_folder(data_path)
chunks = split_documents(documents)

print(f"Loaded {len(documents)} pages")
print(f"Generated {len(chunks)} chunks")

upsert_chunks_to_pinecone(index, chunks)


Loaded 238 pages
Generated 860 chunks
Upserted batch ending at chunk 100
Upserted batch ending at chunk 200
Upserted batch ending at chunk 300
Upserted batch ending at chunk 400
Upserted batch ending at chunk 500
Upserted batch ending at chunk 600
Upserted batch ending at chunk 700
Upserted batch ending at chunk 800
Upserted batch ending at chunk 860
All 860 vectors upserted to Pinecone.


In [66]:
# Show first 3 chunks with embeddings
for i in range(min(3, len(chunks))):
    content = chunks[i]["content"]
    embedding = get_embedding(content)
    
    print(f"\nChunk {i}:\n{'-'*60}")
    print(f"Text:\n{content[:500]}{'...' if len(content) > 500 else ''}")  # Truncated view
    print(f"\nEmbedding (len={len(embedding)}):\n{embedding[:10]} ...")



Chunk 0:
------------------------------------------------------------
Text:
www.innovatics.ai
+1 (678) 209 9780
register@innovatics.ai
From Zero to Hero 40-Week Training Program
 LIVE ONLINE 
TRAINING PROGRAM
BATCH 22
FEBruary
202522
nd
40 WEEKS
The CPD Certification
Service
Marquis Who’s Who 2024
over 125 years
Internet 2.0 Outstanding
Leadership Award-Dubai 2022

Embedding (len=768):
[-0.01627170667052269, -0.009842539206147194, -0.04329383745789528, 0.0041566104628145695, -0.019876964390277863, -0.03534669056534767, 0.07136531174182892, 0.021396152675151825, -0.04936816915869713, 0.01842551678419113] ...

Chunk 1:
------------------------------------------------------------
Text:
Service
Marquis Who’s Who 2024
over 125 years
Internet 2.0 Outstanding
Leadership Award-Dubai 2022
/gid00042/gid00077/gid00083/gid00068/gid00081/gid00077/gid00064/gid00083/gid00072/gid00078/gid00077/gid00064/gid00075/gid00001/gid00034/gid00082/gid00082/gid00078/gid00066/gid00072/gid00064/gid00083/gid00072/

In [67]:
from sentence_transformers import SentenceTransformer

# Load your embedding model
embedding_model = SentenceTransformer('all-mpnet-base-v2')

# Get the user query
user_query = input("Ask something: ")

# Convert the query to an embedding
query_embedding = embedding_model.encode(user_query).tolist()

# Search Pinecone index
top_k = 5
results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)

# Extract relevant chunks for LLM context
relevant_chunks_text = [match['metadata']['text'] for match in results['matches']]

# Combine chunks into a single context string
context = "\n\n".join(relevant_chunks_text)

# Formulate the prompt for the LLM
prompt_for_llm = f"""Based on the following context, please answer the question.
If the answer is not available in the context, state that you cannot answer from the provided information.

Context:
{context}

Question: {user_query}

Answer:"""

# Call the Groq LLM for inference
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt_for_llm,
        }
    ],
    model="llama-3.3-70b-versatile", # Or your preferred Groq model
    temperature=0.7,
    max_tokens=500,
)

# Print the LLM's generated response
print("\n--- LLM Generated Response ---")
print(chat_completion.choices[0].message.content)

# Optional: Print retrieved chunks with metadata for verification
print("\n--- Retrieved Relevant Chunks with Metadata ---")
for i, match in enumerate(results['matches']):
    chunk_text = match['metadata']['text']
    filename = match['metadata'].get('filename', 'N/A')
    page_number = match['metadata'].get('page_number', 'N/A')
    score = match['score']

    print(f"\nChunk {i+1}:")
    print(f"  Score: {score:.4f}")
    print(f"  Source File: {filename}")
    print(f"  Page Number: {page_number}")
    print(f"  Content:\n{chunk_text}")
    print("-" * 60)


--- LLM Generated Response ---
It seems like you forgot to include the question. Please go ahead and provide the question, and I'll do my best to answer it based on the provided context. If the answer is not available in the context, I will let you know that as well.

--- Retrieved Relevant Chunks with Metadata ---

Chunk 1:
  Score: 0.2450
  Source File: 1- InnovatiCS - DS & AI Zero to Hero  Batch 22 ( Feb 22 -2025).pdf
  Page Number: 1.0
  Content:
Service
Marquis Who’s Who 2024
over 125 years
Internet 2.0 Outstanding
Leadership Award-Dubai 2022
/gid00042/gid00077/gid00083/gid00068/gid00081/gid00077/gid00064/gid00083/gid00072/gid00078/gid00077/gid00064/gid00075/gid00001/gid00034/gid00082/gid00082/gid00078/gid00066/gid00072/gid00064/gid00083/gid00072/gid00078/gid00077/gid00001/gid00078/gid00069/gid00001
------------------------------------------------------------

Chunk 2:
  Score: 0.2450
  Source File: 1- InnovatiCS - DS & AI Zero to Hero  Batch 22 ( Feb 22 -2025).pdf
  Page Number:

In [68]:
def get_answer_with_guardrails(query):
    # Step 1: Guardrail check
    is_ok, msg = is_safe_and_relevant(query)
    if not is_ok:
        return msg

    # Step 2: Embed the query
    query_embedding = model.encode(query).tolist()

    # Step 3: Query Pinecone for context
    results = index.query(vector=query_embedding, top_k=3, include_metadata=True)

    # Step 4: Relevance guardrail
    if not results.matches or results.matches[0].score < 0.75:
        return "Sorry, I couldn't find enough relevant information to answer that."

    # Step 5: Compile the context
    context = "\n\n".join([match.metadata['text'] for match in results.matches])

    # Step 6: Create separate system + user prompts
    system_prompt = """
You are an assistant that only answers using the provided document context.
If the answer is not in the context, respond:
"I'm sorry, I can't find that information in the provided documents."

Do not answer inappropriate or off-topic questions.
""".strip()

    user_prompt = f"""Context:
{context}

Question: {query}
"""

    # Step 7: Generate answer with llama3
    completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt.strip()}
        ]
    )

    return completion.choices[0].message.content


In [69]:
# --- Main Chat Logic with Integrated Guardrails ---
import gradio as gr
def rag_chat_with_guardrails(user_message, chat_history):
    """
    This function combines the RAG logic with the guardrails.
    It will be used by the Gradio interface.
    """
    try:
        # 1. --- Input Guardrail ---
        # Check if the query is safe and relevant before doing anything else.
        is_ok, guardrail_msg = is_safe_and_relevant(user_message)
        if not is_ok:
            chat_history.append([user_message, guardrail_msg])
            # Return an empty string for the message box, the updated history,
            # and a message for the chunks display.
            return "", chat_history, f"Query rejected by guardrail: {guardrail_msg}"

        # 2. --- Retrieval ---
        # If the input is valid, proceed with embedding and retrieval.
        query_embedding = embedding_model.encode(user_message).tolist()
        results = index.query(vector=query_embedding, top_k=5, include_metadata=True)

        # ---- TEMPORARY DEBUG CODE START ----
        # This block will print the scores so you can see them in your terminal/output.
        if results.matches:
            print("--- DEBUG: Top Match Scores ---")
            for match in results.matches:
                print(f"  Score: {match['score']:.4f}")
            print("-----------------------------")
        # ---- TEMPORARY DEBUG CODE END ----


        # 3. --- Relevance Guardrail ---
        # Adjust the threshold (e.g., to 0.5) based on the debug output you see.
        # I've set it to 0.5 as a reasonable starting point.
        relevance_threshold = 0.5
        if not results.matches or results.matches[0].score < relevance_threshold:
            response = "Sorry, I couldn't find enough relevant information to answer that."
            chat_history.append([user_message, response])
            # This message below helps you see why it stopped in the UI.
            top_score_msg = f"\n\nTop score was: {results.matches[0].score:.4f}" if results.matches else ""
            chunks_display_str = f"No relevant chunks found with a high enough score (>{relevance_threshold}).{top_score_msg}"
            return "", chat_history, chunks_display_str

        # If chunks are relevant, prepare them for display and for the LLM.
        relevant_chunks_text = [match['metadata']['text'] for match in results['matches']]
        context = "\n\n".join(relevant_chunks_text)

        chunks_display_str = ""
        for i, match in enumerate(results['matches']):
            chunks_display_str += (
                f"Chunk {i+1}: (Score: {match['score']:.4f})\n"
                f"Source: {match['metadata'].get('filename', 'N/A')}, Page: {match['metadata'].get('page_number', 'N/A')}\n"
                f"Content: {match['metadata']['text']}\n"
                "------------------------------------------------------------\n\n"
            )

        # 4. --- Output Guardrail (System Prompt) ---
        # Build the prompt with strict instructions for the LLM.
        system_prompt = """
You are an assistant that only answers using the provided document context.
If the answer is not in the context, respond:
"I'm sorry, I can't find that information in the provided documents."
You are an expert assistant in data science, artificial intelligence, and linear algebra.

- Answer with clarity, depth, and step-by-step reasoning when helpful.
- Maintain context of the ongoing conversation to answer follow-up questions like "explain more" or "elaborate."
- Only answer questions relevant to data science, AI, or linear algebra.
- If a question is unclear or lacks context, ask for clarification rather than guessing.
- If you don’t know the answer or can't find enough information, say "I’m not sure" and suggest what the user can provide to help.
Do not answer inappropriate or off-topic questions.
""".strip()

        user_prompt = f"Context:\n{context}\n\nQuestion: {user_message}"

        # 5. --- LLM Call ---
        # Call the LLM with the guarded prompt.
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            model="llama-3.3-70b-versatile",
            temperature=0.4,
            max_tokens=500,
        )

        answer = chat_completion.choices[0].message.content
        chat_history.append([user_message, answer])

        return "", chat_history, chunks_display_str

    except Exception as e:
        error_message = f"An error occurred: {e}"
        chat_history.append([user_message, error_message])
        return "", chat_history, f"Error during processing: {e}"

# --- Helper functions for UI interaction ---

def edit_last_message(chat_history):
    if chat_history:
        last_user_message = chat_history[-1][0]
        chat_history = chat_history[:-1]
        return last_user_message, chat_history
    return "", chat_history

def delete_last_message(chat_history):
    if chat_history:
        return chat_history[:-1]
    return chat_history

# --- Gradio UI Block ---

with gr.Blocks(theme="default") as demo:
    gr.Markdown("# 🤖 RAG Chatbot with Guardrails")
    gr.Markdown("This chatbot answers questions based on the provided documents. It will reject inappropriate or off-topic questions.")

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat History", bubble_full_width=False)
            msg = gr.Textbox(placeholder="Ask me about data science or linear algebra...", label="Your Question")
            with gr.Row():
                send_btn = gr.Button("Send")
                edit_btn = gr.Button("✏️ Edit Last")
                delete_btn = gr.Button("🗑️ Delete Last")
                clear_btn = gr.Button("✨ Clear Chat")
        with gr.Column(scale=1):
            relevant_chunks_display = gr.Textbox(
                label="Retrieved Context from Documents (RAG)",
                interactive=False,
                lines=25,
                max_lines=25,
            )

    # Wire up the UI components to the functions
    send_btn.click(
        rag_chat_with_guardrails,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot, relevant_chunks_display]
    )
    msg.submit(
        rag_chat_with_guardrails,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot, relevant_chunks_display]
    )

    clear_btn.click(lambda: ([], "", ""), outputs=[chatbot, msg, relevant_chunks_display])
    edit_btn.click(edit_last_message, inputs=[chatbot], outputs=[msg, chatbot])
    delete_btn.click(delete_last_message, inputs=[chatbot], outputs=[chatbot])

# Launch the full-featured demo
demo.launch()

  chatbot = gr.Chatbot(label="Chat History", bubble_full_width=False)
  chatbot = gr.Chatbot(label="Chat History", bubble_full_width=False)


* Running on local URL:  http://127.0.0.1:7868
* To create a public link, set `share=True` in `launch()`.


