In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from datasets import load_dataset
from huggingface_hub import login
from tqdm import tqdm
from dotenv import load_dotenv
import os
load_dotenv()
token = os.getenv("HF_TOKEN")
print(token)
login(token=token)
import torch.nn.functional as F

from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder
import numpy as np


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [2]:
assert torch.cuda.is_available(), "GPU not available!"


In [3]:
def evaluate_(model, data_loader,  yes_token_id, no_token_id,maybe_token_id,label_encoder, device='cuda'):
    """
    Function to implement zero-shot distilGPT2 inference
    Args:
    model: GPT2 model
    data_loader: Dataloader for the dataset
    ...: Any other arguments you may need

    Returns:
    preds
    """
    model.eval()
    preds = []
    y_true = []
    # model.to(device)
    print(yes_token_id, no_token_id, maybe_token_id)
    for batch in tqdm(data_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        with torch.no_grad():
            with torch.amp.autocast(device_type=device, dtype=torch.float16):
                output = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = output.logits # (batchsize, chunklen, vocab_size)

                last_indices = attention_mask.sum(-1)-1
                final_logits = logits[torch.arange(len(input_ids)), last_indices] # (batchsize,vocab_size)
                probs = F.softmax(final_logits, dim=-1) # (batchsize,vocab_size)

                yes_probs = probs[:, yes_token_id]
                no_probs = probs[:, no_token_id]
                maybe_probs = probs[:, maybe_token_id]

                stacked = torch.stack([no_probs, maybe_probs, yes_probs], axis=1)
                # print(stacked.shape)
                # print(stacked)
                pred_indices = torch.argmax(stacked, axis=1)
                # print(pred_indices.shape)
                label_order = ['no', 'maybe', 'yes']
                pred_labels = [label_order[i] for i in pred_indices]


                preds.extend(label_encoder.transform(pred_labels))
                y_true.extend(label_encoder.transform(batch['answer']))
    return preds, y_true


def generate(pipeline, dataset):
    
    counter = 1
    for data in dataset[:5]:
        question = data['question']
        contexts = data['context']['contexts']
        context = "\n".join([f"Context {i+1}: {c}" for i, c in enumerate(contexts)])
        prompt = (
            f"Contexts:\n{context}\n\n"
            "Based on the contexts above, answer the question below.\n"
            "Provide a short explanation that justifies your answer using evidence from the context.\n"
            f"Question: {question}?\n"
        )

        generated_answer = pipeline(prompt)
        print(f"Prompt {counter}:")
        print(generated_answer)
        counter += 1
        
    
  
   


In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B")   

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Load model/tokenizer
model_8b = 'meta-llama/Meta-Llama-3-8B'
model_1b_instruct = 'meta-llama/Llama-3.2-1B-Instruct'
model_1b = 'meta-llama/Llama-3.2-1B'
model_3b = 'meta-llama/Llama-3.2-3B'

model_id = model_1b_instruct

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="cuda",
    torch_dtype=torch.float16
)
tokenizer.pad_token = tokenizer.eos_token  # Required
model.config.pad_token_id = tokenizer.pad_token_id

print("Tokenizer and model loaded successfully.")

In [6]:
# load dataset
dataset = load_dataset("qiaojin/PubMedQA", "pqa_artificial")["train"]
dataset

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'final_decision'],
    num_rows: 211269
})

In [7]:
check_dataset = dataset.select(range(2000))
dataset = check_dataset

In [8]:
dataset

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'final_decision'],
    num_rows: 2000
})

In [None]:
def make_tokenize(tokenizer):
    def tokenize(examples):
        
        question = examples['question']
        contexts = examples['context']['contexts']
        context = "\n".join([f"Context {i+1}: {c}" for i, c in enumerate(contexts)])
        # prompt= (
        # f"Contexts:\n{context}\n\n"
        # "Based on the contexts above, answer the question below with 'yes', 'no', or 'maybe'.\n"
        # "Then, provide a short explanation that justifies your answer using evidence from the context.\n"
        # f"Question: {question}\n"
        # )
        
        prompt = (
                f"Contexts:\n{context}\n\n"
                "Based on the contexts above, answer the question below with 'yes', 'no', or 'maybe'.\n"
                "Then, provide a short explanation that justifies your answer using evidence from the context.\n"
                f"Question: {question}\n"
                f"Answer: {examples['final_decision']}\n"
                f"Explanation: {examples['long_answer']}\n"
            )
        return tokenizer(prompt,truncation=True)
    return tokenize


def eval_collator(batch, tokenizer):
  input_ids = pad_sequence([torch.tensor(x['input_ids']) for x in batch], batch_first=True,padding_value=tokenizer.pad_token_id)
  attention_mask = pad_sequence([torch.tensor(x['attention_mask']) for x in batch], batch_first=True, padding_value=0)
  answer = ([x['final_decision'] for x in batch])
  return {'input_ids': input_ids, 'attention_mask': attention_mask, 'answer': answer}


In [44]:

for data in dataset.select(range(15)):
    print(data)
    question = data['question']
    contexts = data['context']['contexts']
    context = "\n".join([f"Context {i+1}: {c}" for i, c in enumerate(contexts)])
    prompt = (
    f"Contexts:\n{context}\n\n"
    f"Question: {question}?\n\n"
    "Based on the contexts and the question above answer only yes/no/maybe based on context provided\n"
    "Answer:"
    )

    # print(prompt)

    generated_answer = pipe(prompt)

    print(generated_answer[0]['generated_text'])


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{'pubid': 25429730, 'question': 'Are group 2 innate lymphoid cells ( ILC2s ) increased in chronic rhinosinusitis with nasal polyps or eosinophilia?', 'context': {'contexts': ['Chronic rhinosinusitis (CRS) is a heterogeneous disease with an uncertain pathogenesis. Group 2 innate lymphoid cells (ILC2s) represent a recently discovered cell population which has been implicated in driving Th2 inflammation in CRS; however, their relationship with clinical disease characteristics has yet to be investigated.', 'The aim of this study was to identify ILC2s in sinus mucosa in patients with CRS and controls and compare ILC2s across characteristics of disease.', 'A cross-sectional study of patients with CRS undergoing endoscopic sinus surgery was conducted. Sinus mucosal biopsies were obtained during surgery and control tissue from patients undergoing pituitary tumour resection through transphenoidal approach. ILC2s were identified as CD45(+) Lin(-) CD127(+) CD4(-) CD8(-) CRTH2(CD294)(+) CD161(+) c

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Chronic rhinosinusitis (CRS) is a heterogeneous disease with an uncertain pathogenesis. Group 2 innate lymphoid cells (ILC2s) represent a recently discovered cell population which has been implicated in driving Th2 inflammation in CRS; however, their relationship with clinical disease characteristics has yet to be investigated.
Context 2: The aim of this study was to identify ILC2s in sinus mucosa in patients with CRS and controls and compare ILC2s across characteristics of disease.
Context 3: A cross-sectional study of patients with CRS undergoing endoscopic sinus surgery was conducted. Sinus mucosal biopsies were obtained during surgery and control tissue from patients undergoing pituitary tumour resection through transphenoidal approach. ILC2s were identified as CD45(+) Lin(-) CD127(+) CD4(-) CD8(-) CRTH2(CD294)(+) CD161(+) cells in single cell suspensions through flow cytometry. ILC2 frequencies, measured as a percentage of CD45(+) cells, were compared across C

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Phosphatidylethanolamine N-methyltransferase (PEMT), a liver enriched enzyme, is responsible for approximately one third of hepatic phosphatidylcholine biosynthesis. When fed a high-fat diet (HFD), Pemt(-/-) mice are protected from HF-induced obesity; however, they develop steatohepatitis. The vagus nerve relays signals between liver and brain that regulate peripheral adiposity and pancreas function. Here we explore a possible role of the hepatic branch of the vagus nerve in the development of diet induced obesity and steatohepatitis in Pemt(-/-) mice.
Context 2: 8-week old Pemt(-/-) and Pemt(+/+) mice were subjected to hepatic vagotomy (HV) or capsaicin treatment, which selectively disrupts afferent nerves, and were compared to sham-operated or vehicle-treatment, respectively. After surgery, mice were fed a HFD for 10 weeks.
Context 3: HV abolished the protection against the HFD-induced obesity and glucose intolerance in Pemt(-/-) mice. HV normalized phospholipid 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Psammaplin A (PsA) is a natural product isolated from marine sponges, which has been demonstrated to have anticancer activity against several human cancer cell lines via the induction of cell cycle arrest and apoptosis. New drugs that are less toxic and more effective against multidrug-resistant cancers are urgently needed.
Context 2: We tested cell proliferation, cell cycle progression and autophagic cell death pathway in doxorubicin-resistant MCF-7 (MCF-7/adr) human breast cancer cells. The potency of PsA was further determined using an in vivo xenograft model.

Question: Does psammaplin A induce Sirtuin 1-dependent autophagic cell death in doxorubicin-resistant MCF-7/adr human breast cancer cells and xenografts??

Based on the contexts and the question above answer only yes/no/maybe based on context provided
Answer: Yes, it induces Sirtuin 1-dependent autophagic cell death in doxorubicin
{'pubid': 25431941, 'question': 'Is methylation of the FGFR2 gene associate

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: This study examined links between DNA methylation and birth weight centile (BWC), and explored the impact of genetic variation.
Context 2: Using HumanMethylation450 arrays, we examined candidate gene-associated CpGs in cord blood from newborns with low (<15th centile), medium (40-60th centile) and high (>85th centile) BWC (n = 12). Candidates were examined in an investigation cohort (n = 110) using pyrosequencing and genotyping for putative methylation-associated polymorphisms performed using standard PCR.
Context 3: Array analysis identified 314 candidate genes associated with BWC extremes, four of which showed ≥ 4 BWC-linked CpGs. Of these, PM20D1 and MI886 suggested genetically determined methylation levels. However, methylation at three CpGs in FGFR2 remained significantly associated with high BWC (p = 0.004-0.027).

Question: Is methylation of the FGFR2 gene associated with high birth weight centile in humans??

Based on the contexts and the question above ans

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Tumor microenvironment immunity is associated with breast cancer outcome. A high lymphocytic infiltration has been associated with response to neoadjuvant chemotherapy, but the contribution to response and prognosis of immune cell subpopulations profiles in both pre-treated and post-treatment residual tumor is still unclear.
Context 2: We analyzed pre- and post-treatment tumor-infiltrating immune cells (CD3, CD4, CD8, CD20, CD68, Foxp3) by immunohistochemistry in a series of 121 breast cancer patients homogeneously treated with neoadjuvant chemotherapy. Immune cell profiles were analyzed and correlated with response and survival.
Context 3: We identified three tumor-infiltrating immune cell profiles, which were able to predict pathological complete response (pCR) to neoadjuvant chemotherapy (cluster B: 58%, versus clusters A and C: 7%). A higher infiltration by CD4 lymphocytes was the main factor explaining the occurrence of pCR, and this association was validated 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Hidradenitis suppurativa (HS) is a chronic inflammatory disease involving intertriginous skin. Previous epidemiologic studies have been limited by small sample size.
Context 2: We sought to describe the prevalence and comorbidities of HS in a large patient care database.
Context 3: In this retrospective case-control study, we chart-validated all patients within a hospital database who received at least 1 billing code for HS between 1980 and 2013. Verified cases were matched with controls based on age, gender, and race. Prevalences of a priori selected comorbidities were compared between HS and control groups.
Context 4: A total of 2292 patients at Massachusetts General Hospital received at least 1 code for HS. A total of 1776 cases had a validated diagnosis of HS, yielding a prevalence of 0.08%. In unadjusted analysis, all comorbidities were diagnosed significantly more in HS compared with control including (in rank order of likelihood): smoking, arthropathies, dys

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: The interassay variability found in the measurement of testosterone (T) levels warrants the need for laboratories to validate their methods to establish trustworthy cut-off points for diagnosis of male hypogonadism. The aims of this study were to validate measurement of total T (TT) at our laboratory in order to obtain reference ranges for TT, calculated free T (CFT), calculated bioavailable T (CBT), and salivary T (ST) in healthy young men from the Mediterranean region, and to evaluate the potential clinical value of ST by establishing its correlation with serum T.
Context 2: An observational, cross-sectional study with sequential sampling.
Context 3: men aged 18-30 years with body mass index (BMI)<30.
Context 4: chronic diseases, hepatic insufficiency or use of drugs altering circulating T levels. Main outcome measures TT (chemiluminescent immunoassay UniCell DXI 800 [Access T Beckman Coulter]), CFT and CBT (Vermeulen's formula), and ST (radioimmunoassay for seru

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Secretory phospholipases A2 (sPLA2) initiate the biosynthesis of eicosanoids, are increased in the airways of people with severe asthma, and induce mucin hypersecretion. We used IL-13-transformed, highly enriched goblet cells and differentiated (ciliary cell-enriched) human bronchial epithelial cell culture to evaluate the relative contribution of ciliated and goblet cells to airway sPLA2 generation and response. We wished to determine the primary source(s) of sPLA2 and leukotrienes in human airway epithelial cells.
Context 2: Human bronchial epithelial cells from subjects without lung disease were differentiated to a ciliated-enriched or goblet-enriched cell phenotype. Synthesis of sPLA2, cysteinyl leukotrienes (cysLTs), and airway mucin messenger RNA and protein was measured by real-time-polymerase chain reaction and an enzyme-linked immunosorbent assay, and the localization of mucin and sPLA2 to specific cells types was confirmed by confocal microscopy.
Context 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Publication bias compromises the validity of systematic reviews. This problem can be addressed in part through searching clinical trials registries to identify unpublished studies. This study aims to determine how often systematic reviews published in emergency medicine journals include clinical trials registry searches.
Context 2: We identified all systematic reviews published in the 6 highest-impact emergency medicine journals between January 1 and December 31, 2013. Systematic reviews that assessed the effects of an intervention were further examined to determine whether the authors described searching a clinical trials registry and whether this search identified relevant unpublished studies.
Context 3: Of 191 articles identified through PubMed search, 80 were confirmed to be systematic reviews. Our sample consisted of 41 systematic reviews that assessed a specific intervention. Eight of these 41 (20%) searched a clinical trials registry. For 4 of these 8 review

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Obese patients with idiopathic pulmonary fibrosis (IPF) have higher 90-day mortality after lung transplantation. We sought to determine whether body mass index (BMI) differentially modified the effect of transplant procedure type on 90-day mortality in IPF patients.
Context 2: We analyzed data from the Organ Procurement and Transplantation Network (OPTN) for all patients with IPF who were transplanted between 2000 and 2010. Post-transplant survival was examined using Kaplan-Meier estimates. Multivariable logistic regression modeling was used to determine the difference in 90-day survival. The primary variable of interest was the interaction term between body mass index (BMI) and transplant type.
Context 3: A total of 3,389 (58% single-lung transplant [SLT] and 42% bilateral lung transplant [BLT]) subjects were included. Multivariable logistic regression modeling demonstrated a statistically significant interaction between BMI and transplant type (p = 0.047). Patien

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Schizophrenia is recognized as a disorder of the brain and neuronal connectivity. The neural cell adhesion molecule 1 (NCAM1) gene plays a crucial role in regulating neuronal connectivity.
Context 2: We conducted a two-stage association analysis on 17 NCAM1 SNPs in two independent Han Chinese schizophrenia case-control cohorts (discovery sample from Hunan Province: 986 patients and 1040 normal controls; replication sample from Yunnan Province: 564 cases and 547 healthy controls). Allele, genotype and haplotype frequencies were compared between case and control samples. Transcription factor binding site prediction and luciferase reporter assays were employed to assess the potential function of promoter SNPs. We detected developmental changes at the transcriptional level of NCAM1 during neuron differentiation in Macaca mulatta neural progenitor cells (NPC). Serum levels of NCAM1 were measured in 72 cases and 88 controls.
Context 3: A promoter variant, rs2301228, was 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Oxidative stress generated within inflammatory joints can produce autoimmune phenomena and joint destruction. Radical species with oxidative activity, including reactive nitrogen species, represent mediators of inflammation and cartilage damage.
Context 2: To assess serum nitric oxide as a marker of oxidative stress in Egyptian patients with rheumatoid arthritis and its relation to disease activity.
Context 3: 80 patients with rheumatoid arthritis were divided into 2 groups, according to the DAS-28 score: Group I: 42 patients with disease activity, and Group II: 38 patients with no disease activity. Forty age- and sex-matched individuals were included as control group (Group III). Routine laboratory investigations were done, and nitric oxide was measured using Elisa. Hand plain radiographies were done for radiological status scoring using the Sharp method.
Context 4: A comparison between nitric oxide in all three groups showed a highly significant difference (p < 0

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Contexts:
Context 1: Activation of B cells is a hallmark of systemic lupus erythematosus (SLE). Syk and TRAF6 are key signaling molecules in B-cell activation through BCR and CD40/TLR, respectively. Nevertheless, whether expression of Syk and TRAF6 is altered in SLE B cells remains unknown.
Context 2: Phosphorylation and/or expression of Syk and TRAF6 were analyzed by flow cytometry in peripheral blood mononuclear cells isolated from SLE patients.
Context 3: Pronounced phosphorylation and expression of Syk were noted in B cells from SLE patients compared with healthy donors. Levels of Syk phosphorylation correlated with the disease activity score. TRAF6 was significantly over-expressed in B cells of SLE patients as compared with healthy donors, and significant correlation of levels of TRAF6 expression and Syk phosphorylation was observed in SLE patients. Levels of TRAF6 expression were more pronounced in CD27+ memory B cells than in CD27-naïve B cells. In vitro treatment of SLE B cells

In [39]:
ans = pipe(prompt)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [40]:
print(ans[0]['generated_text'])

Contexts:
Context 1: Chronic rhinosinusitis (CRS) is a heterogeneous disease with an uncertain pathogenesis. Group 2 innate lymphoid cells (ILC2s) represent a recently discovered cell population which has been implicated in driving Th2 inflammation in CRS; however, their relationship with clinical disease characteristics has yet to be investigated.
Context 2: The aim of this study was to identify ILC2s in sinus mucosa in patients with CRS and controls and compare ILC2s across characteristics of disease.
Context 3: A cross-sectional study of patients with CRS undergoing endoscopic sinus surgery was conducted. Sinus mucosal biopsies were obtained during surgery and control tissue from patients undergoing pituitary tumour resection through transphenoidal approach. ILC2s were identified as CD45(+) Lin(-) CD127(+) CD4(-) CD8(-) CRTH2(CD294)(+) CD161(+) cells in single cell suspensions through flow cytometry. ILC2 frequencies, measured as a percentage of CD45(+) cells, were compared across C

In [None]:
tokenized_dataset = dataset.map(make_tokenize(tokenizer), batched=False)


In [None]:
tokenized_dataset

In [None]:
val_dataloader = DataLoader(tokenized_dataset, batch_size=4, collate_fn=lambda batch: eval_collator(batch, tokenizer))


In [None]:
# label encoding
label_encoder = LabelEncoder()
label_encoder.fit(["yes", "no", "maybe"])

In [None]:
label_encoder.transform(["yes", "no", "maybe"])

In [None]:
yes_token_id = tokenizer.encode('yes')[1]
no_token_id = tokenizer.encode('no')[1]
maybe_token_id = tokenizer.encode('maybe')[1]

In [None]:
yes_token_id, no_token_id, maybe_token_id

In [None]:
y_pred, y_true  = evaluate_(model, val_dataloader, yes_token_id, no_token_id,maybe_token_id, label_encoder, 'cuda')

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Accuracy
acc = accuracy_score(y_true, y_pred)

# Precision, Recall, F1 (macro average)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

print(f"Accuracy: {acc:.4f}")
print(f"Precision (macro): {precision:.4f}")
print(f"Recall (macro): {recall:.4f}")
print(f"F1 Score (macro): {f1:.4f}")


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=label_encoder.transform(['no', 'maybe', 'yes']))

# Plot
sns.heatmap(cm, annot=True, fmt='d', xticklabels=['no', 'maybe', 'yes'], yticklabels=['no', 'maybe', 'yes'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.metrics import classification_report

# Print detailed metrics per class
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))