In [1]:
! pip install transformers datasets scikit-learn evaluate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np




# Preparing Dataset for fine-tuning

In [18]:
import pandas as pd

def load_pubmed_rct(filename):
    with open(filename, "r", encoding="utf-8") as f:
        lines = f.read().splitlines()

    abstracts = []
    abstract = []

    for line in lines:
        if line.startswith("###"):
            if abstract:
                abstracts.append(abstract)
                abstract = []
        elif line.strip() == "":
            continue
        else:
            label, text = line.split('\t')
            abstract.append((label, text))

    if abstract:
        abstracts.append(abstract)

    # Flatten into rows
    data = []
    for abs in abstracts:
        for label, text in abs:
            data.append({"label_name": label, "text": text})

    return pd.DataFrame(data)

train_df = load_pubmed_rct("train.txt")
val_df = load_pubmed_rct("dev.txt")
test_df = load_pubmed_rct("test.txt")

print(train_df.head())


  label_name                                               text
0  OBJECTIVE  To investigate the efficacy of 6 weeks of dail...
1    METHODS  A total of 125 patients with primary knee OA w...
2    METHODS  Outcome measures included pain reduction and i...
3    METHODS  Pain was assessed using the visual analog pain...
4    METHODS  Secondary outcome measures included the Wester...


In [19]:
label_list = ['BACKGROUND', 'OBJECTIVE', 'METHODS', 'RESULTS', 'CONCLUSIONS']
label2id = {label: i for i, label in enumerate(label_list)}
id2label = {i: label for label, i in label2id.items()}

train_df["label"] = train_df["label_name"].map(label2id)
val_df["label"] = val_df["label_name"].map(label2id)
test_df["label"] = test_df["label_name"].map(label2id)


In [20]:
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_df[["text", "label"]])
val_dataset = Dataset.from_pandas(val_df[["text", "label"]])
test_dataset = Dataset.from_pandas(test_df[["text", "label"]])


# Fine Tuning BioBERT

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
checkpoint = "dmis-lab/biobert-base-cased-v1.1"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=5,  # BACKGROUND, OBJECTIVE, METHODS, RESULTS, CONCLUSIONS
    id2label={0: "BACKGROUND", 1: "OBJECTIVE", 2: "METHODS", 3: "RESULTS", 4: "CONCLUSIONS"},
    label2id={"BACKGROUND": 0, "OBJECTIVE": 1, "METHODS": 2, "RESULTS": 3, "CONCLUSIONS": 4}
)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [21]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_val.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/180040 [00:00<?, ? examples/s]

Map:   0%|          | 0/30212 [00:00<?, ? examples/s]

Map:   0%|          | 0/30135 [00:00<?, ? examples/s]

In [27]:

import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)


In [12]:
!pip install --upgrade transformers

Collecting transformers
  Downloading transformers-4.54.0-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.34.1-py3-none-any.whl.metadata (14 kB)
Downloading transformers-4.54.0-py3-none-any.whl (11.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m118.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading huggingface_hub-0.34.1-py3-none-any.whl (558 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m558.8/558.8 kB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface-hub, transformers
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.33.4
    Uninstalling huggingface-hub-0.33.4:
      Successfully uninstalled huggingface-hub-0.33.4
  Attempting uninstall: transfor

In [28]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./biobert-results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=100,
    report_to="none",  # Disable W&B/logging
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,

)


  trainer = Trainer(


In [29]:
trainer.train()


  return forward_call(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2463,0.414949,0.877135
2,0.1808,0.439803,0.877334
3,0.1412,0.600075,0.874818


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


TrainOutput(global_step=33759, training_loss=0.17457185921120544, metrics={'train_runtime': 4632.9121, 'train_samples_per_second': 116.583, 'train_steps_per_second': 7.287, 'total_flos': 3.552884277691392e+16, 'train_loss': 0.17457185921120544, 'epoch': 3.0})

# Evaluating Model

In [34]:
metrics = trainer.evaluate(eval_dataset=tokenized_test)
print(metrics)

  return forward_call(*args, **kwargs)


{'eval_loss': 0.6492919325828552, 'eval_accuracy': 0.8663348266135723, 'eval_runtime': 55.9798, 'eval_samples_per_second': 538.32, 'eval_steps_per_second': 33.655, 'epoch': 3.0}


# Saving the model and tokenizer

In [33]:
trainer.save_model("biobert-pubmed20k-classifier")
tokenizer.save_pretrained("biobert-pubmed20k-classifier")


('biobert-pubmed20k-classifier/tokenizer_config.json',
 'biobert-pubmed20k-classifier/special_tokens_map.json',
 'biobert-pubmed20k-classifier/vocab.txt',
 'biobert-pubmed20k-classifier/added_tokens.json',
 'biobert-pubmed20k-classifier/tokenizer.json')

# Testing Performance using example test cases

In [35]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load the saved model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("biobert-pubmed20k-classifier")
tokenizer = AutoTokenizer.from_pretrained("biobert-pubmed20k-classifier")


model.eval()


label_names = ["BACKGROUND", "OBJECTIVE", "METHODS", "RESULTS", "CONCLUSIONS"]

def predict_label(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = F.softmax(logits, dim=1)
        predicted_class_id = torch.argmax(probs).item()
    return label_names[predicted_class_id], probs[0][predicted_class_id].item()

# Example usage:
example_text = "In this study, we explore the effects of a new drug on heart disease."
label, confidence = predict_label(example_text)
print(f"Predicted label: {label} (confidence: {confidence:.2f})")


  return forward_call(*args, **kwargs)


Predicted label: BACKGROUND (confidence: 1.00)


In [36]:
print(predict_label("The trial demonstrated significant improvement in patient survival rates."))
print(predict_label("The methodology involved double-blind, placebo-controlled experiments."))


('BACKGROUND', 0.8751776218414307)
('METHODS', 0.9985818862915039)


In [38]:
!pip install -U "huggingface_hub[cli]"




Collecting InquirerPy==0.3.4 (from huggingface_hub[cli])
  Downloading InquirerPy-0.3.4-py3-none-any.whl.metadata (8.1 kB)
Collecting pfzy<0.4.0,>=0.3.1 (from InquirerPy==0.3.4->huggingface_hub[cli])
  Downloading pfzy-0.3.4-py3-none-any.whl.metadata (4.9 kB)
Downloading InquirerPy-0.3.4-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.7/67.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pfzy-0.3.4-py3-none-any.whl (8.5 kB)
Installing collected packages: pfzy, InquirerPy
Successfully installed InquirerPy-0.3.4 pfzy-0.3.4


In [40]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
The token `smartfoodie` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `smartfoodie`

# Pushing model to huggingface-hub

In [41]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load your saved model and tokenizer from local directory
model = AutoModelForSequenceClassification.from_pretrained("biobert-pubmed20k-classifier")
tokenizer = AutoTokenizer.from_pretrained("biobert-pubmed20k-classifier")

# Push to your Hugging Face repo
model.push_to_hub("SubhaL/biobert-research-insights")
tokenizer.push_to_hub("SubhaL/biobert-research-insights")


README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SubhaL/biobert-research-insights/commit/a1b58a95da2c65e4083cb45f179188fee5057576', commit_message='Upload tokenizer', commit_description='', oid='a1b58a95da2c65e4083cb45f179188fee5057576', pr_url=None, repo_url=RepoUrl('https://huggingface.co/SubhaL/biobert-research-insights', endpoint='https://huggingface.co', repo_type='model', repo_id='SubhaL/biobert-research-insights'), pr_revision=None, pr_num=None)

In [42]:
import numpy as np

predictions_output = trainer.predict(tokenized_test)
logits = predictions_output.predictions
labels = predictions_output.label_ids

# Convert logits to predicted class ids
preds = np.argmax(logits, axis=1)


  return forward_call(*args, **kwargs)


In [43]:
from sklearn.metrics import precision_recall_fscore_support, classification_report

precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')

print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")

# Or print full classification report:
print(classification_report(labels, preds, digits=4))


Precision: 0.8667
Recall:    0.8663
F1-score:  0.8662
              precision    recall  f1-score   support

           0     0.6854    0.7468    0.7148      3621
           1     0.6555    0.5945    0.6235      2333
           2     0.9304    0.9444    0.9374      9897
           3     0.9262    0.9118    0.9189      9713
           4     0.8538    0.8342    0.8439      4571

    accuracy                         0.8663     30135
   macro avg     0.8103    0.8063    0.8077     30135
weighted avg     0.8667    0.8663    0.8662     30135

