In [1]:
!pip install gradio sentence-transformers pymupdf python-docx


Collecting gradio
  Downloading gradio-5.13.2-py3-none-any.whl.metadata (16 kB)
Collecting pymupdf
  Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.7-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.6.0 (from gradio)
  Downloading gradio_client-1.6.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0

In [7]:
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import fitz  # PyMuPDF for PDFs
import numpy as np
import re
import requests

# Initialize Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# NVIDIA API details
NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
API_KEY = "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"  # Replace with your valid NVIDIA API key

# Function to load and parse Q&A from PDF
def extract_qa_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = " ".join([page.get_text() for page in doc])

    # Extract structured Q&A pairs using regex
    qa_pairs = []
    qa_pattern = r'\"question\":\s*\"(.*?)\",\s*\"answer\":\s*\"(.*?)\"'
    matches = re.findall(qa_pattern, text, re.DOTALL)

    for match in matches:
        question, answer = match
        qa_pairs.append({"question": question.strip(), "answer": answer.strip()})

    return qa_pairs

# Retrieve best-matching answer
def get_best_answer(user_question, qa_pairs):
    questions = [qa["question"] for qa in qa_pairs]
    question_embeddings = model.encode(questions, convert_to_tensor=True)
    user_embedding = model.encode(user_question, convert_to_tensor=True)

    # Compute similarity scores
    scores = util.cos_sim(user_embedding, question_embeddings).cpu().numpy()
    best_match_idx = np.argmax(scores)
    best_match_score = scores[0][best_match_idx]

    # Set a similarity threshold to determine if it's a valid match
    if best_match_score > 0.75:
        return qa_pairs[best_match_idx]["answer"]
    else:
        return None  # No exact match found

# NVIDIA API for additional responses
def generate_answer_via_nvidia(question):
    headers = {"Authorization": f"Bearer {API_KEY}"}
    payload = {
        "model": "microsoft/phi-3-mini-128k-instruct",
        "messages": [{"role": "user", "content": question}],
        "temperature": 0.2,
        "top_p": 0.8,
        "max_tokens": 300,
    }

    response = requests.post(NVIDIA_API_URL, json=payload, headers=headers)
    response_data = response.json()

    if response.status_code == 200 and "choices" in response_data:
        return response_data["choices"][0]["message"]["content"].strip()
    else:
        return "I'm unable to find an answer."

# Gradio interface
def qa_interface(pdf, question):
    qa_pairs = extract_qa_from_pdf(pdf.name)
    answer = get_best_answer(question, qa_pairs)

    if answer:
        return f"✅ Answer: {answer}"
    else:
        ai_answer = generate_answer_via_nvidia(question)
        return f"❌ Exact match not found.\n🤖 AI-Generated Answer: {ai_answer}"

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Amrita College: Your Interactive Guide to Campus Insights")
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    question_input = gr.Textbox(label="Enter your question")
    answer_output = gr.Textbox(label="Answer")

    submit_button = gr.Button("Submit")
    submit_button.click(qa_interface, inputs=[pdf_input, question_input], outputs=answer_output)

# Launch the Gradio app
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2ad0d3c71d856ba9c5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [9]:
from sentence_transformers import SentenceTransformer, util
import fitz  # PyMuPDF for PDFs
import numpy as np
import re
import requests

# Initialize Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# NVIDIA API details
NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
API_KEY = "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"  # Replace with your valid NVIDIA API key

# Function to load and parse Q&A from PDF
def extract_qa_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = " ".join([page.get_text() for page in doc])

    # Extract structured Q&A pairs using regex
    qa_pairs = []
    qa_pattern = r'\"question\":\s*\"(.*?)\",\s*\"answer\":\s*\"(.*?)\"'
    matches = re.findall(qa_pattern, text, re.DOTALL)

    for match in matches:
        question, answer = match
        qa_pairs.append({"question": question.strip(), "answer": answer.strip()})

    return qa_pairs

# Retrieve best-matching answer
def get_best_answer(user_question, qa_pairs):
    questions = [qa["question"] for qa in qa_pairs]
    question_embeddings = model.encode(questions, convert_to_tensor=True)
    user_embedding = model.encode(user_question, convert_to_tensor=True)

    # Compute similarity scores
    scores = util.cos_sim(user_embedding, question_embeddings).cpu().numpy()
    best_match_idx = np.argmax(scores)
    best_match_score = scores[0][best_match_idx]

    # Set a similarity threshold to determine if it's a valid match
    if best_match_score > 0.75:
        return qa_pairs[best_match_idx]["answer"]
    else:
        return None  # No exact match found

# NVIDIA API for additional responses
def generate_answer_via_nvidia(question):
    headers = {"Authorization": f"Bearer {API_KEY}"}
    payload = {
        "model": "microsoft/phi-3-mini-128k-instruct",
        "messages": [{"role": "user", "content": question}],
        "temperature": 0.2,
        "top_p": 0.8,
        "max_tokens": 300,
    }

    response = requests.post(NVIDIA_API_URL, json=payload, headers=headers)
    response_data = response.json()

    if response.status_code == 200 and "choices" in response_data:
        return response_data["choices"][0]["message"]["content"].strip()
    else:
        return "I'm unable to find an answer."

# Main function with a limit of 5 questions
def main(pdf_path):
    qa_pairs = extract_qa_from_pdf(pdf_path)
    question_count = 0  # Counter for questions

    while question_count < 5:  # Limit to 5 questions
        user_question = input(f"({question_count+1}/5) Enter your question (or type 'exit' to quit): ")
        if user_question.lower() == "exit":
            break

        answer = get_best_answer(user_question, qa_pairs)

        if answer:
            print(f"\n✅ Answer: {answer}")
        else:
            print("\n❌ Exact match not found. Generating a response...")
            ai_answer = generate_answer_via_nvidia(user_question)
            print(f"\n🤖 AI-Generated Answer: {ai_answer}")

        question_count += 1  # Increment question count

    print("\n🎯 You have reached the limit of 5 questions. Exiting...")

# Run with your PDF
pdf_path = "/content/drive/MyDrive/Campus_Pal Content.pdf"
main(pdf_path)

(1/5) Enter your question (or type 'exit' to quit): Who is the founder and Chancellor of Amrita Vishwa Vidyapeetham? 

✅ Answer: Sri Mata Amritanandamayi Devi, affectionately known as 
AMMA, is the founder and Chancellor of the university.
(2/5) Enter your question (or type 'exit' to quit): How many campuses does Amrita Vishwa Vidyapeetham have, and can  you list them? 

✅ Answer: The university has multiple campuses, including 
Amritapuri, Amaravati, Bengaluru, Chennai, Coimbatore, Kochi, Mysuru, and 
Faridabad.
(3/5) Enter your question (or type 'exit' to quit): What is the size of the Amritapuri campus at Amrita Vishwa  Vidyapeetham? 

✅ Answer: The campus spans over 80 acres of land.
(4/5) Enter your question (or type 'exit' to quit): What is the tuition fee per semester for the B.B.A. (Business Analytics)  program for the academic year 2025-26? 

✅ Answer: Rs. 45,900.
(5/5) Enter your question (or type 'exit' to quit): What is the NAAC accreditation grade for Amrita Vishwa Vidyape