### If you want to just demo my QA system (no training, no encoding all contexts again), you can make a new notebook with just only import my already saved trained models as below => 

# 1. Imports + device

In [32]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import re

from transformers import AutoTokenizer, AutoModel


In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

# 2. Reload data and embeddings

In [34]:
# Load the dataframe with question/context/id
df_all = pd.read_parquet("df_all_pubmedqa.parquet")   # or read_pickle/read_csv depending on how you saved

# Load the context embedding matrix
context_embs = np.load("context_embs_pubmedqa.npy")   # shape [num_contexts, hidden_size]

df_all.head(), context_embs.shape


(                                            question  \
 0  Do mitochondria play a role in remodelling lac...   
 1  Landolt C and snellen e acuity: differences in...   
 2  Syncope during bathing in infants, a pediatric...   
 3  Are the long-term results of the transanal pul...   
 4  Can tailored interventions increase mammograph...   
 
                                              context  id  
 0  Programmed cell death (PCD) is the regulated d...   0  
 1  Assessment of visual acuity depends on the opt...   1  
 2  Apparent life-threatening events in infants ar...   2  
 3  The transanal endorectal pull-through (TERPT) ...   3  
 4  Telephone counseling and tailored print commun...   4  ,
 (273467, 768))

# 3. Recreate the DualEncoder class + load weights

We need the same class definition as below:

In [35]:
model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_length = 128  # same as during training

class DualEncoder(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(encoder_name)
        self.hidden_size = self.encoder.config.hidden_size

    def encode(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        token_embs = outputs.last_hidden_state  # [B, L, H]
        cls_emb = token_embs[:, 0, :]          # [B, H]
        cls_emb = cls_emb / cls_emb.norm(p=2, dim=1, keepdim=True).clamp(min=1e-8)
        return cls_emb

    def forward(self, q_input_ids, q_attention_mask, c_input_ids, c_attention_mask):
        q_emb = self.encode(q_input_ids, q_attention_mask)
        c_emb = self.encode(c_input_ids, c_attention_mask)
        return q_emb, c_emb

dual_encoder = DualEncoder(model_name).to(device)

# Load your fine-tuned weights
state_dict = torch.load("dual_encoder_pubmedqa.pt", map_location=device)
dual_encoder.load_state_dict(state_dict)
dual_encoder.eval()


DualEncoder(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

# 4. Add a small generative model for plain-language answers

This model is just for generation, not training.

In [36]:
import openai
openai.__version__

'2.8.0'

In [37]:
import os
from openai import OpenAI

# Load API key from text file
with open("key.txt", "r") as f:
    os.environ["OPENAI_API_KEY"] = f.read().strip()

# Initialize client using the environment variable
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])


# 5. Spliting retrieval into a function that returns the top docs

following retrieve_topk_docs function will return the top contexts for the generator.

In [55]:
def retrieve_topk_docs(question, top_k_docs=3):
    dual_encoder.eval()

    enc = tokenizer(
        question,
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    )

    with torch.no_grad():
        q_emb = dual_encoder.encode(
            enc["input_ids"].to(device),
            enc["attention_mask"].to(device)
        ).cpu().numpy()[0]

    sims = context_embs @ q_emb
    ranked_indices = np.argsort(-sims)

    docs = []
    for rank_pos in range(top_k_docs):
        cid = ranked_indices[rank_pos]
        score = sims[cid]
        ctx = df_all.loc[df_all["id"] == cid, "context"].values[0]
        docs.append((score, cid, ctx))
    return docs


# 6. Generate a simple English answer from the evidence

Now we use the top-k contexts as “evidence” and ask FLAN-T5 to answer the question in lay terms.

In [56]:
def generate_plain_answer_gpt4o(question, docs, max_chars_per_doc=400):
    # Convert retrieved docs into short evidence snippets
    snippets = []
    for score, cid, ctx in docs:
        snippets.append(ctx[:max_chars_per_doc])
    evidence_text = "\n\n".join(snippets)

    # SEMI-STRICT RAG PROMPT — grounded, but not too restrictive
    prompt = f"""
    You are a biomedical assistant. 
    You MUST answer using ONLY the evidence below.
    However, you ARE allowed to:
    - infer likely meaning if multiple snippets point in the same direction,
    - paraphrase or summarize what the evidence implies,
    - generalize a little ONLY if the evidence strongly suggests it.
    
    You are NOT allowed to:
    - use outside medical knowledge,
    - add unsupported facts,
    - contradict the evidence.
    
    Question:
    {question}
    
    Evidence:
    {evidence_text}
    
    OUTPUT RULES:
    1. Start with EXACTLY one of:
       - Short answer: Yes.
       - Short answer: No.
       - Short answer: It leans toward yes.
       - Short answer: It leans toward no.
       - Short answer: Unclear.
    
    2. Then add 1–2 simple sentences summarizing what the evidence suggests.
    3. If evidence is indirect, incomplete, or off-topic -> choose "Short answer: Unclear."
    """

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0  # no creativity → no hallucination
    )

    return response.choices[0].message.content


# 7. A user-facing question answering system:

in the following it will define a high-level function that:
    
    * Retrieves top-k docs
    * Generates a simple answer
    * Shows both the answer and the evidence snippets

In [57]:
def qa_explained(question, top_k_docs=3, snippet_chars=350):
    # 1) retrieve top docs
    docs = retrieve_topk_docs(question, top_k_docs=top_k_docs)

    print("QUESTION:")
    print(question)
    print("\nSHORT ANSWER (for non-medical audience):\n")

    # 2) generate simple explanation
    plain_answer = generate_plain_answer_gpt4o(question, docs)
    print(plain_answer)
    print("\n---\nEVIDENCE SNIPPETS FROM ARTICLES:\n")

    # 3) show evidence
    for rank_pos, (score, cid, ctx) in enumerate(docs, start=1):
        sentences = re.split(r'(?<=[.!?])\s+', ctx)
        sentences = [s.strip() for s in sentences if len(s.strip()) > 0]

        print(f"=== Document rank {rank_pos} | id={cid} | similarity={score:.3f} ===")
        print("Snippet:")
        print(" ".join(sentences[:2])[:snippet_chars], "...\n")


# 8. Interactive loop using the friendly QA

In [None]:
def interactive_qa_simple():
    print("Type a biomedical question (or just press Enter to quit).")
    while True:
        q = input("\nQ: ").strip()
        if q == "":
            print("Thank you and see you next time.")
            break
        qa_explained(q, top_k_docs=2, snippet_chars=350)

# When you’re ready to demo:
interactive_qa_simple()


Type a biomedical question (or just press Enter to quit).



Q:  is pregnancy cause hair loss?


QUESTION:
is pregnancy cause hair loss?

SHORT ANSWER (for non-medical audience):

Short answer: Unclear.  
The evidence provided does not directly address the relationship between pregnancy and hair loss. It discusses hair loss in the context of androgenetic alopecia and other factors but does not mention pregnancy specifically.

---
EVIDENCE SNIPPETS FROM ARTICLES:

=== Document rank 1 | id=81143 | similarity=0.702 ===
Snippet:
Anecdotal reports suggest that certain disorders are common in African hair and may be associated with hairstyles. A cross-sectional study of 1042 schoolchildren was performed to test this hypothesis. ...

=== Document rank 2 | id=36509 | similarity=0.643 ===
Snippet:
The aim of this study was to evaluate the effects of androgenetic alopecia on males with and without hair loss and to delineate the level of stress gained by this type of alopecia. Two hundred and 52 males (175 with hair loss, 77 without hair loss), between 16 and 72 years of age, participated in


Q:  is smoking bad?


QUESTION:
is smoking bad?

SHORT ANSWER (for non-medical audience):

Short answer: Yes.  
The evidence indicates that tobacco use is a significant public health issue, particularly among Chinese males, suggesting that smoking is associated with various health risks. Additionally, smoking behaviors may be influenced by social acceptance, further highlighting its negative implications.

---
EVIDENCE SNIPPETS FROM ARTICLES:

=== Document rank 1 | id=214442 | similarity=0.858 ===
Snippet:
According to a recent national survey, tobacco use is a critical public health issue in China, with more than two-thirds of Chinese males smoking. Findings in Western populations suggest that smoking may cluster with other health-risk behaviors. ...

=== Document rank 2 | id=230993 | similarity=0.857 ===
Snippet:
Nearly all studies reporting smoking status collect self-reported data. The objective of this study was to assess sociodemographic characteristics and selected, common smoking-related diseases as


Q:  is eating fast food make us fat?


QUESTION:
is eating fast food make us fat?

SHORT ANSWER (for non-medical audience):

Short answer: Unclear.  
The evidence indicates that while fast food consumption is linked to adverse health outcomes, the specific contribution of fast food to overweight or obesity compared to other dietary patterns is not clearly established.

---
EVIDENCE SNIPPETS FROM ARTICLES:

=== Document rank 1 | id=13977 | similarity=0.864 ===
Snippet:
Although fast food consumption has been linked to adverse health outcomes, the relative contribution of fast food itself compared with the rest of the diet to these associations remains unclear. Our objective was to compare the independent associations with overweight/obesity or dietary outcomes for fast food consumption compared with dietary patte ...

=== Document rank 2 | id=88460 | similarity=0.822 ===
Snippet:
To examine associations of the frequency of eating at fast-food restaurants with demographic, behavioural and psychosocial factors and dietary inta


Q:  is eating mercury harmful?


QUESTION:
is eating mercury harmful?

SHORT ANSWER (for non-medical audience):

Short answer: Yes.  
The evidence indicates that high mercury content in fish may negate the cardiovascular benefits of consuming fish, suggesting that mercury exposure can be harmful. Additionally, methylmercury exposure is primarily linked to fish consumption, highlighting the risks associated with mercury in the environment.

---
EVIDENCE SNIPPETS FROM ARTICLES:

=== Document rank 1 | id=201691 | similarity=0.711 ===
Snippet:
Fish consumption is considered the primary pathway of methylmercury (MeHg) exposure for most people in the world. However, in the inland regions of China, most of the residents eat little fish, but they live in areas where a significant amount of mercury (Hg) is present in the environment. ...

=== Document rank 2 | id=52513 | similarity=0.694 ===
Snippet:
Fish consumption has been associated with a decreased risk of coronary artery disease. Recent studies have illustrated that the 


Q:  is Charles the king of England now?


QUESTION:
is Charles the king of England now?

SHORT ANSWER (for non-medical audience):

Short answer: Unclear.  
The provided evidence does not contain any information regarding the current status of Charles as the king of England.

---
EVIDENCE SNIPPETS FROM ARTICLES:

=== Document rank 1 | id=65409 | similarity=0.575 ===
Snippet:
Determining the reasons people choose to study nursing may help educators and managers develop student-focussed and enticing nursing programmes. In Australia, little research has been undertaken with students entering nursing programmes and the reasons for their choice. ...

=== Document rank 2 | id=153117 | similarity=0.574 ===
Snippet:
Between roughly 500 BCE and 300 BCE, three distinct regions, the Yangtze and Yellow River Valleys, the Eastern Mediterranean, and the Ganges Valley, saw the emergence of highly similar religious traditions with an unprecedented emphasis on self-discipline and asceticism and with "otherworldly," often moralizing, doctrines, 