## Import Required Libraries and Initialize Embedding Model

In [28]:
import os
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
from uuid import uuid4 
from groq import Groq
import gradio as gr
import pinecone
from dotenv import load_dotenv
from uuid import uuid4 
import requests
from langchain_core.documents import Document
load_dotenv()

# Load local embedding model (768-dim)
embedding_model = SentenceTransformer("all-mpnet-base-v2")

## Structured Web Search + Per-Source Scoring + Rendering

In [29]:
# Structured Web Search + Per-Source Scoring + Rendering
# This leaves your original `google_search` untouched.
# Dependencies: GOOGLE_API_KEY / GOOGLE_SEARCH_ENGINE_ID env, `requests` imported earlier, `embedding_model` available.

from sklearn.metrics.pairwise import cosine_similarity

def google_search_structured(query: str, num_results: int = 10):
    """
    Google CSE — returns a list of dicts with title, link, snippet.
    Prints 'Searching the internet...' (as required).
    """
    print("Searching the internet...")
    url = "https://www.googleapis.com/customsearch/v1"
    params = {
        "key": os.getenv("GOOGLE_API_KEY"),
        "cx": os.getenv("GOOGLE_SEARCH_ENGINE_ID"),
        "q": query,
        "num": num_results,
    }
    resp = requests.get(url, params=params)
    data = resp.json()
    results = []
    for item in data.get("items", []):
        results.append({
            "title": item.get("title", ""),
            "link": item.get("link", ""),
            "snippet": item.get("snippet", "")
        })
    return results



def render_sources_with_scores(results: list, header: str = "Web Sources"):
    """
    Pretty-prints sources with individual scores (Title, URL, Snippet, Score).
    """
    if not results:
        return f"=== {header} ===\nNo web sources found."
    lines = [f"=== {header} (Top {len(results)}) ==="]
    for i, r in enumerate(results, 1):
        lines.append(
            f"[{i}] {r.get('title','')}\n"
            f"URL: {r.get('link','')}\n"
            f"Score: {r.get('score','0')}\n"
            f"Snippet: {r.get('snippet','')}\n"
        )
    return "\n".join(lines)


## Initialize Pinecone Vector Database

In [30]:
# Initialize Pinecone client (connecting to existing index)
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index('procurement-chatbot')

# Load embedding model for queries only  
embedding_model = SentenceTransformer("all-mpnet-base-v2")

## Load embedding model

In [31]:
# Load embedding model
embedding_model = SentenceTransformer('all-mpnet-base-v2')

# Get the user query
user_query = input("Ask something: ")

# Convert the query to an embedding
query_embedding = embedding_model.encode(user_query).tolist()

# Search Pinecone index
top_k = 5
results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)

# Extract relevant chunks for LLM context
relevant_chunks_text = [match['metadata']['text'] for match in results['matches']]

# Combine chunks into a single context string
context = "\n\n".join(relevant_chunks_text)

# Formulate the prompt for the LLM
prompt_for_llm = f"""Based on the following context, please answer the question.
If the answer is not available in the context, state that you cannot answer from the provided information.

Context:
{context}

Question: {user_query}

Answer:"""

# Call the Groq LLM for inference
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt_for_llm,
        }
    ],
    model="llama-3.3-70b-versatile", # Or preferred Groq model
    temperature=0.7,
    max_tokens=500,
)

# Print the LLM's generated response
print("\n--- LLM Generated Response ---")
print(chat_completion.choices[0].message.content)

print("\n--- Retrieved Relevant Chunks with Metadata ---")
for i, match in enumerate(results['matches']):
    chunk_text = match['metadata']['text']
    filename = match['metadata'].get('filename', 'N/A')
    page_number = match['metadata'].get('page_number', 'N/A')
    score = match['score']

    print(f"\nChunk {i+1}:")
    print(f"  Score: {score:.4f}")
    print(f"  Source File: {filename}")
    print(f"  Page Number: {page_number}")
    print(f"  Content:\n{chunk_text}")
    print("-" * 60)


--- LLM Generated Response ---
You haven't asked a question yet. Please go ahead and ask your question based on the provided context, and I'll do my best to answer it. If the answer is not available in the context, I'll let you know that I cannot answer from the provided information.

--- Retrieved Relevant Chunks with Metadata ---

Chunk 1:
  Score: 0.2451
  Source File: 1- InnovatiCS - DS & AI Zero to Hero  Batch 22 ( Feb 22 -2025).pdf
  Page Number: 1.0
  Content:
Service
Marquis Who’s Who 2024
over 125 years
Internet 2.0 Outstanding
Leadership Award-Dubai 2022
/gid00042/gid00077/gid00083/gid00068/gid00081/gid00077/gid00064/gid00083/gid00072/gid00078/gid00077/gid00064/gid00075/gid00001/gid00034/gid00082/gid00082/gid00078/gid00066/gid00072/gid00064/gid00083/gid00072/gid00078/gid00077/gid00001/gid00078/gid00069/gid00001
------------------------------------------------------------

Chunk 2:
  Score: 0.1941
  Source File: 1- InnovatiCS - DS & AI Zero to Hero  Batch 22 ( Feb 22 -2025).p

## Gradio Chatbot User Interface

In [None]:
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity

# Keep your existing response quality function (unchanged)
def evaluate_response_quality(response, source_chunks):
    if not source_chunks or not response:
        return 0.0
    response_embed = embedding_model.encode([response])
    source_embeds = embedding_model.encode(source_chunks)
    scores = cosine_similarity(response_embed, source_embeds)
    print("scores", scores)
    return float(scores.max())

# Updated strict system prompt
strict_system_prompt = """
You are a STRICT document-based assistant. You can ONLY answer questions that are directly related to the provided document context.

STRICT RULES:
1. If the question cannot be answered using the provided document context, respond with exactly: "This question is outside the scope of the provided documents."
2. Do not use any general knowledge or external information
3. Do not answer questions about general topics (like food, weather, celebrities, etc.) even if you have the knowledge
4. Only answer if the information is explicitly present in the documents
5. Questions related to topics covered in the provided documents ARE in scope
""".strip()

# Helper function to score web results
def score_web_results(query, web_results):
    """Score web results by cosine similarity to query."""
    if not web_results:
        return []
    
    query_embed = embedding_model.encode([query])[0]
    scored_results = []
    
    for result in web_results:
        snippet = result.get("snippet", "")
        if snippet:
            snippet_embed = embedding_model.encode([snippet])[0]
            score = cosine_similarity([query_embed], [snippet_embed])[0][0]
            result["score"] = float(score)
            scored_results.append(result)
    
    # Sort by score descending
    scored_results.sort(key=lambda x: x.get("score", 0), reverse=True)
    return scored_results

def standalone_rag(query):
    try:
        # 1) Embed query and search Pinecone
        query_embedding = embedding_model.encode(query).tolist()
        results = index.query(vector=query_embedding, top_k=10, include_metadata=True)

        # 2) Build RAG doc context + doc source list
        context_chunks = []
        doc_sources = []
        for i, match in enumerate(results.get("matches", []), 1):
            meta = match.get("metadata", {}) or {}
            text = meta.get("text", "")
            doc = meta.get("filename", "Unknown Doc")
            page = meta.get("page_number", "N/A")
            score = match.get("score", 0.0)
            
            context_chunks.append(text)
            doc_sources.append(f"[D{i}] {doc} - Page {page} (Score: {score:.4f})")

        rag_context = "\n\n".join(context_chunks) if context_chunks else "No relevant document context found."
        top_score = results.get("matches", [{}])[0].get("score", 0.0) if results.get("matches") else 0.0

        # 3) LLM decision: Is query related to docs? Need web search?
        decision_prompt = f"""
You are an agent evaluating if a query can be answered from the provided documents.

Query: {query}
Top Vector Score: {top_score:.4f}
Document Context (first 800 chars):
{rag_context[:800]}

Analyze:
1. Is this query related to topics covered in the provided documents?
2. Can it be answered from the provided context?

Reply with EXACTLY ONE of these:
- "NO SEARCH" → Query is related to the documents AND answerable from the context
- "SEARCH" → Query is related to the documents BUT needs additional web context
- "REFUSE" → Query is completely unrelated to the document topics

Consider: If the query is about a topic that appears to be covered in the documents (based on the context), treat it as related even if the exact answer isn't found.
""".strip()

        decision_response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[{"role": "user", "content": decision_prompt}],
            temperature=0,
            max_tokens=20
        )
        decision = decision_response.choices[0].message.content.strip().upper()

        # 4) If REFUSE, return early - LLM decided it's off-topic
        if "REFUSE" in decision:
            return "This question is outside the scope of the provided documents.", "No relevant sources (off-topic query).", "0.000"

        # 5) If SEARCH -> fetch web results
        web_used = ("SEARCH" in decision)
        web_results_scored = []
        web_context = ""
        web_sources_display = ""

        if web_used:
            raw_web = google_search_structured(query, num_results=10)
            web_results_scored = score_web_results(query, raw_web)
            web_sources_display = render_sources_with_scores(web_results_scored, header="Web Sources")

            top_snippets = [r.get("snippet", "") for r in web_results_scored]
            web_context = "\n\n".join(top_snippets)

        # 6) Combine contexts
        combined_context = rag_context
        if web_used and web_context:
            combined_context += "\n\n---\n[Web Context]\n" + web_context

        # 7) Final prompt - LLM answers based on combined context
        final_prompt = f"""
{strict_system_prompt}

Context (Docs + Optional Web):
{combined_context}

User Query: {query}

Instructions:
1. First determine: Can this be answered from the provided context?
2. If YES → Answer it clearly and include inline citations like [D1], [D2] for documents or [W1], [W2] for web sources naturally within your answer
3. If NO → Respond exactly: "This question is outside the scope of the provided documents."

IMPORTANT: 
- Include citations WITHIN your answer text (e.g., "According to [D1], the solution is...")
- DO NOT create a separate "References:" or "Sources:" section at the end
- Just provide a natural answer with inline citations
""".strip()

        final_response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[{"role": "user", "content": final_prompt}],
            temperature=0.1,
            max_tokens=500
        )
        answer = final_response.choices[0].message.content

        # If answer says it's out of scope, clear sources
        if "outside the scope" in answer.lower():
            return answer, "No relevant sources (query could not be answered).", "0.000"

        # 8) Build sources output
        sources_sections = []
        if doc_sources:
            sources_sections.append("=== Document Sources (10) ===\n" + "\n".join(doc_sources))
        if web_used:
            sources_sections.append(web_sources_display)
        sources_text = "\n\n".join(sources_sections) if sources_sections else "No sources."

        # 9) Compute response quality
        source_chunks_for_quality = context_chunks[:]
        if web_used and web_results_scored:
            source_chunks_for_quality.extend([r.get("snippet", "") for r in web_results_scored])
        quality = evaluate_response_quality(answer, source_chunks_for_quality)
        quality_str = f"{quality:.3f}"

        return answer, sources_text, quality_str
    
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        print(error_msg)
        return error_msg, "Error occurred", "0.000"

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("# RAG System")
    gr.Markdown("This assistant answers questions related to the provided documents, supplemented by web search when needed.")
    
    with gr.Row():
        with gr.Column(scale=2):
            answer_output = gr.Textbox(label="Assistant", lines=15)
        with gr.Column(scale=1):
            source_output = gr.Textbox(label="Sources", lines=10.5)
            quality_score = gr.Textbox(label="Response Quality Score", lines=1)

    with gr.Row():
        user_input = gr.Textbox(label="Ask a Question", lines=1, show_label=True)
        submit = gr.Button("Submit")

    def on_submit(query):
        answer, sources, score = standalone_rag(query)
        return answer, sources, score, ""

    submit.click(fn=on_submit, inputs=user_input, outputs=[answer_output, source_output, quality_score, user_input])
    user_input.submit(fn=on_submit, inputs=user_input, outputs=[answer_output, source_output, quality_score, user_input])

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




Searching the internet...
scores [[0.8800718  0.811766   0.71024203 0.685486   0.61453116 0.5918271
  0.53871787 0.5460706  0.52586555 0.48185322 0.88180876 0.8283993
  0.7997967  0.7735303  0.6209624  0.61730707 0.6263708  0.44840848
  0.60307056 0.45156747]]
