In [1]:
!pip install gradio sentence-transformers pymupdf python-docx


Collecting gradio
  Downloading gradio-5.13.2-py3-none-any.whl.metadata (16 kB)
Collecting pymupdf
  Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.7-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.6.0 (from gradio)
  Downloading gradio_client-1.6.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0

In [16]:
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import fitz  # PyMuPDF for PDFs
import numpy as np
import re
import requests

# Initialize Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# NVIDIA API details
NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
API_KEY = "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"  # Replace with your valid NVIDIA API key

# Function to load and parse Q&A from PDF
def extract_qa_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = " ".join([page.get_text() for page in doc])

    # Extract structured Q&A pairs using regex
    qa_pairs = []
    qa_pattern = r'\"question\":\s*\"(.*?)\",\s*\"answer\":\s*\"(.*?)\"'
    matches = re.findall(qa_pattern, text, re.DOTALL)

    for match in matches:
        question, answer = match
        qa_pairs.append({"question": question.strip(), "answer": answer.strip()})

    return qa_pairs

# Retrieve best-matching answer and expand it contextually
def get_best_answer(user_question, qa_pairs):
    questions = [qa["question"] for qa in qa_pairs]
    question_embeddings = model.encode(questions, convert_to_tensor=True)
    user_embedding = model.encode(user_question, convert_to_tensor=True)

    # Compute similarity scores
    scores = util.cos_sim(user_embedding, question_embeddings).cpu().numpy()
    best_match_idx = np.argmax(scores)
    best_match_score = scores[0][best_match_idx]

    # Set a similarity threshold to determine if it's a valid match
    if best_match_score > 0.75:
        return qa_pairs[best_match_idx]["answer"]
    else:
        return None  # No exact match found

# Generate elaborated answer by providing the original answer as context
def generate_elaborated_answer(question, short_answer):
    headers = {"Authorization": f"Bearer {API_KEY}"}
    prompt = f"Please provide a detailed and grammatically complete explanation for the following. Expand on the original answer, ensuring it ends with a clear and properly punctuated sentence. \n\nQuestion: {question}\nAnswer: {short_answer}"

    payload = {
        "model": "microsoft/phi-3-mini-128k-instruct",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.3,
        "top_p": 0.9,
        "max_tokens": 1000,  # Increased token limit for longer responses
    }

    response = requests.post(NVIDIA_API_URL, json=payload, headers=headers)
    response_data = response.json()

    if response.status_code == 200 and "choices" in response_data:
        return response_data["choices"][0]["message"]["content"].strip()
    else:
        return "I'm unable to find an answer. Please try again later with a different question."

# Gradio interface
def qa_interface(pdf, question):
    qa_pairs = extract_qa_from_pdf(pdf.name)
    answer = get_best_answer(question, qa_pairs)

    if answer:
        elaborated_answer = generate_elaborated_answer(question, answer)
        if elaborated_answer and elaborated_answer[-1] not in ['.', '!', '?']:
            elaborated_answer += "."
        return f"✅ Elaborated Answer: {elaborated_answer}"
    else:
        ai_answer = generate_elaborated_answer(question, "No exact match found")
        return f"{ai_answer}"

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Amrita College: Your Interactive Guide to Campus Insights")
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
    answer_output = gr.Textbox(label="Answer", lines=5, interactive=False)

    submit_button = gr.Button("Submit")

    submit_button.click(qa_interface, inputs=[pdf_input, question_input], outputs=[answer_output])

# Launch the Gradio app
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6eb220f22743ae6a99.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


