<a href="https://colab.research.google.com/github/AswathiViswam/micro-projects/blob/main/QuestionAnswering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# Install dependencies
!pip install transformers torch sentence-transformers gradio

# ----------------------------
# Imports
# ----------------------------
from transformers import pipeline
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import torch

# ----------------------------
# Load models
# ----------------------------
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

# ----------------------------
# Split text into chunks
# ----------------------------
def chunk_text(text, chunk_size=1000):
    chunks = []
    start = 0
    while start < len(text):
        chunks.append(text[start:start+chunk_size])
        start += chunk_size
    return chunks

# ----------------------------
# QA function with retrieval
# ----------------------------
def answer_question(context, question):
    if not context.strip():
        return "Please enter some context text."
    if not question.strip():
        return "Please enter a question."

    chunks = chunk_text(context, 1000)
    chunk_embeddings = embed_model.encode(chunks, convert_to_tensor=True)

    question_embedding = embed_model.encode(question, convert_to_tensor=True)

    # Find top 3 relevant chunks
    cos_scores = util.cos_sim(question_embedding, chunk_embeddings)[0]
    top_results = torch.topk(cos_scores, k=min(3, len(chunks)))

    best_answer = ""
    best_score = -1.0
    for idx in top_results.indices:
        try:
            result = qa_pipeline(question=question, context=chunks[idx])
            if result['score'] > best_score:
                best_score = result['score']
                best_answer = result['answer']
        except:
            continue

    return best_answer if best_answer else "Could not find an answer."

# ----------------------------
# Gradio interface
# ----------------------------
iface = gr.Interface(
    fn=answer_question,
    inputs=[
        gr.Textbox(lines=10, placeholder="Paste your text here...", label="Context"),
        gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question")
    ],
    outputs="text",
    title="Interactive Text QA Bot",
    description="Paste any text in the context box and ask questions. Retrieval-based QA ensures accurate answers."
)

iface.launch()




Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cuda:0


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a6da3aefa670cfa9bd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


