In [2]:
# Install dependencies
!pip install gradio transformers scikit-learn torch

import gradio as gr
import torch
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Sample text corpus
corpus = [
    "Machine learning is a field of artificial intelligence that enables computers to learn from data.",
    "Deep learning is a subset of machine learning that uses neural networks with multiple layers.",
    "Natural language processing allows computers to understand and process human language.",
    "Supervised learning uses labeled data to train models, while unsupervised learning finds patterns in unlabeled data."
]

# Retrieval-based QA using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

def retrieval_based_qa(question):
    """Retrieve the most relevant sentence from the corpus."""
    question_vector = vectorizer.transform([question])
    similarities = np.dot(X, question_vector.T).toarray().flatten()
    best_idx = np.argmax(similarities)
    return corpus[best_idx]

# Load pre-trained model for Generative QA
model_name = "distilbert-base-cased-distilled-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

def generative_qa(question):
    """Generate an answer from the most relevant sentence."""
    context = retrieval_based_qa(question)
    inputs = {"question": question, "context": context}
    result = qa_pipeline(inputs)
    return result['answer']

# Gradio Interface
def qa_system(question, method):
    """Choose QA method: Retrieval-based or Generative"""
    if method == "Retrieval-based":
        return retrieval_based_qa(question)
    elif method == "Generative":
        return generative_qa(question)

gradio_ui = gr.Interface(
    fn=qa_system,
    inputs=[
        gr.Textbox(label="Enter your question"),
        gr.Radio(["Retrieval-based", "Generative"], label="Choose method", value="Retrieval-based")],
    outputs=gr.Textbox(label="Answer"),
    title="Question Answering System",
    description="Ask a question and choose either retrieval-based or generative QA method.(Partha)")

# Launch the app
gradio_ui.launch()


Collecting gradio
  Using cached gradio-5.20.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Using cached fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Using cached gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Using cached groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Using cached MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Using cached python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ru

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://26a71185fafc4bfbb1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


