In [6]:
!pip install transformers langchain datasets faiss-cpu
!pip install -U langchain-community
!pip install PyPDF2
!pip install gradio

Collecting gradio
  Downloading gradio-5.15.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.28.1 (from gradio)
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Co

In [5]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
import gradio as gr

ModuleNotFoundError: No module named 'gradio'

In [None]:
# Extract text from PDF
pdf_path = "/content/HealthScore_FAQs.pdf"
reader = PdfReader(pdf_path)
faq_text = ""

for page in reader.pages:
    faq_text += page.extract_text()

#  extracted text to verify
print(f"Extracted Text: {faq_text[:729]}")

In [None]:
# Split the text into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
documents = text_splitter.create_documents([faq_text])

In [None]:
# Embed the documents using HuggingFace embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(documents, embedding_model)

In [None]:
#  Define a retrieval function
def retrieve_exact_question_answer(query):

    results = vector_store.similarity_search(query, k=5)

    for result in results:
        content = result.page_content
        lines = content.split("\n")

        for i, line in enumerate(lines):
            if query.lower() in line.lower():  # Match query with a question
                answer_lines = [line.strip()]  # Include the matched question
                for j in range(i + 1, len(lines)):
                    if lines[j].strip() == "" or lines[j].strip()[0].isdigit():  # Detect next question
                        break
                    answer_lines.append(lines[j].strip())
                return "\n".join(answer_lines)  # Return the question and full answer
    return "No exact match found. Please try rephrasing the question."


In [None]:
test_query = "What causes fatigue and lack of energy?"
print("Testing query response:", retrieve_exact_question_answer(test_query))

In [None]:
# Create the Gradio UI for user interaction
def chatbot_interface(user_query):
    if not user_query.strip():
        return "Please enter a valid question."

    try:
        answer = retrieve_exact_question_answer(user_query)
        return answer if answer else "No relevant answer found."
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [None]:
# Launch the Gradio UI
ui = gr.Interface(
    fn=chatbot_interface,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about the document..."),
    outputs="text",
    title="PDF-Based Question Answering System",
    description="Type a question and get AI-generated answers based on the document.",
    allow_flagging="never"  # Disable the flagging option
)
# Run the Gradio app with a public shareable link
ui.launch(share=True)
