# 03 — Gradio QA Demo (Retrieval + BioGPT)

In [1]:
!pip -q install transformers datasets gradio scikit-learn

In [2]:
# clone once
!rm -rf pubmedqa-llm-bot
!git clone https://github.com/AnnaJazayeri/pubmedqa-llm-bot.git
%cd /content/pubmedqa-llm-bot

# install dependencies
!pip -q install -r requirements.txt

# make sure Python can see the project root so `src` imports work
import sys
if '/content/pubmedqa-llm-bot' not in sys.path:
    sys.path.append('/content/pubmedqa-llm-bot')

# then jump into notebooks folder if you want to open/run them there
%cd notebooks

/content/pubmedqa-llm-bot
/content/pubmedqa-llm-bot/notebooks


In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
from src.retrieval import SimpleTfidfRetriever
from src.utils import PROMPT_TEMPLATE, normalize_label

# Load a small corpus (contexts) to retrieve from
ds = load_dataset("pubmed_qa", "pqa_labeled")
train_ctxs = [ex["context"] for ex in ds["train"][:2000]]  # cap for speed
retriever = SimpleTfidfRetriever(train_ctxs)

# Load BioGPT (base)
model_name = "microsoft/biogpt"
tok = AutoTokenizer.from_pretrained(model_name)
gen = AutoModelForCausalLM.from_pretrained(model_name)
pipe = pipeline("text-generation", model=gen, tokenizer=tok, device_map="auto", max_new_tokens=64)

def answer_question(question):
    top = retriever.topk(question, k=3)
    idxs = [i for i,_ in top]
    contexts = retriever.fetch(idxs)
    merged = "\n\n".join(contexts)
    prompt = PROMPT_TEMPLATE.format(question=question, context=merged[:2000])
    out = pipe(prompt, do_sample=False)[0]["generated_text"]
    ans = out.split("Answer:")[-1].strip()
    return ans

demo = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(lines=3, placeholder="Ask a biomedical question..."),
    outputs=gr.Textbox(label="LLM Answer (Yes/No/Maybe)"),
    title="PubMedQA — BioGPT (Demo)",
    description="Retrieval-augmented QA for Yes/No/Maybe answers. Uses a small TF–IDF retriever and BioGPT generation."
)

demo.launch(share=True)  # Colab will print a public link

TypeError: string indices must be integers, not 'str'