# 02 => BioGPT Prompting on PubMedQA (small subset)

In [1]:
!pip -q install transformers datasets accelerate evaluate pandas numpy sacremoses

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/897.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# clone once
!rm -rf pubmedqa-llm-bot
!git clone https://github.com/AnnaJazayeri/pubmedqa-llm-bot.git
%cd /content/pubmedqa-llm-bot

# install dependencies
!pip -q install -r requirements.txt

# make sure Python can see the project root so `src` imports work
import sys
if '/content/pubmedqa-llm-bot' not in sys.path:
    sys.path.append('/content/pubmedqa-llm-bot')

# then jump into notebooks folder if you want to open/run them there
%cd notebooks


Cloning into 'pubmedqa-llm-bot'...
remote: Enumerating objects: 32, done.[K
remote: Counting objects: 100% (32/32), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 32 (delta 12), reused 9 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (32/32), 15.32 KiB | 2.55 MiB/s, done.
Resolving deltas: 100% (12/12), done.
/content/pubmedqa-llm-bot
/content/pubmedqa-llm-bot/notebooks


In [3]:
import torch, platform
print("CUDA:", torch.cuda.is_available())
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))


CUDA: True
Tesla T4


In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import numpy as np
import random

# tiny helper: keep the answer to one of {yes, no, maybe}
def normalize_label(text):
    t = (text or "").lower()
    if "yes" in t: return "yes"
    if "no"  in t: return "no"
    return "maybe"

# a short prompt that tells the model exactly what to do
PROMPT = """You are a biomedical Q&A assistant.
Answer with ONE word: Yes, No, or Maybe.
Question: {q}
Context: {c}
Answer:"""

# load the same dataset; this config has only 'train'
ds = load_dataset("pubmed_qa", "pqa_labeled")["train"]

# pick a small random subset to keep it fast (change N if you want)
N = 200
idxs = random.sample(range(len(ds)), k=min(N, len(ds)))
subset = [ds[i] for i in idxs]

# base BioGPT works on free Colab T4 and most GPUs
model_name = "microsoft/biogpt"
tok = AutoTokenizer.from_pretrained(model_name)
mdl = AutoModelForCausalLM.from_pretrained(model_name)

# text-generation pipeline handles tokenization + inference for us
gen = pipeline(
    "text-generation",
    model=mdl,
    tokenizer=tok,
    device_map="auto",
    max_new_tokens=32, # short answers only
    do_sample=False # deterministic output
)

preds, golds = [], []

for ex in subset:
    question = ex["question"]
    # join the list of snippets into one context string
    context = " ".join(ex["context"]["contexts"])
    gold = ex["final_decision"]  # target label

    prompt = PROMPT.format(q=question, c=context[:2000]) # trim very long context
    out = gen(prompt)[0]["generated_text"]

    # take only the text after 'Answer:' and clean it up
    raw_ans = out.split("Answer:")[-1].strip()
    pred = normalize_label(raw_ans)

    preds.append(pred)
    golds.append(gold)

# quick accuracy
acc = (np.array(preds) == np.array(golds)).mean()
print("BioGPT subset accuracy:", round(float(acc), 4))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

pqa_labeled/train-00000-of-00001.parquet:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/595 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.56G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.56G [00:00<?, ?B/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


BioGPT subset accuracy: 0.53
