<a href="https://colab.research.google.com/github/GenAk95/GenAI_Projects/blob/main/QnA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!pip install PyPDF2 python-docx

In [None]:
import torch
import gradio as gr
from PyPDF2 import PdfReader
from docx import Document

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("question-answering", model="deepset/roberta-base-squad2")

def get_answer(file,question):

  context = extract_text_from_file(file)
  answer = pipe(question = question, context = context)
  return answer["answer"]

# --- Text Extraction Logic ---
def extract_text_from_file(file):
    global document_text  # Store result in a variable for external access
    file_type = file.name.split('.')[-1].lower()

    if file_type == 'pdf':
        reader = PdfReader(file)
        document_text = ""
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                document_text += page_text
    elif file_type == 'docx':
        doc = Document(file)
        document_text = "\n".join([para.text for para in doc.paragraphs])
    else:
        document_text = "Unsupported file type. Please upload a PDF or DOCX."

    return document_text

# --- Gradio Interface ---
gr.Interface(
    fn=get_answer,
    inputs=[
        gr.File(label="Upload your documents here (PDF or DOCX)"),
        gr.Textbox(lines=1, label="Input your question")
    ],
    outputs=gr.Textbox(label="Answer", lines=10),
    title="Questions and Answers from Documents",
    description="Upload a PDF or DOCX file, then ask a question. Uses deepset/roberta-base-squad2 for answering."
).launch()




