In [3]:
!pip install numpy torch transformers datasets huggingface_hub

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [4]:
from dataclasses import dataclass
import numpy as np
import torch
import transformers
from transformers import (
    BitsAndBytesConfig,
    Gemma2ForSequenceClassification,
    GemmaTokenizerFast,
    Gemma2Config,
    PreTrainedTokenizerBase,
    EvalPrediction,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    pipeline
)

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline

from datasets import Dataset
from datasets import load_dataset
from huggingface_hub import login
import re

In [8]:

# check toecker length of all data
def len_article_details(split):
    token_lengths = [len(tokenizer(i['article'])['input_ids']) for i in dataset[split]]
    max_length = max(token_lengths)
    print('article max length',max_length)
    # llama3 context window 128k tokens
    # gemma2 context length of 8192 tokens
    if max_length > 128000:
        num_exceeding_128k = sum(1 for length in token_lengths if length > 128000)
        print('number of articles exceeding 128k:',num_exceeding_128k)


In [5]:

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
config = AutoConfig.from_pretrained(model_id)
config.rope_scaling = {"type": "linear", "factor": 2.0}  # Adjust as needed

# Load model with updated config
model = AutoModelForCausalLM.from_pretrained(model_id, config=config, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [8]:
dataset = load_dataset("BioLaySumm/BioLaySumm2025-PLOS")

README.md:   0%|          | 0.00/693 [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/169M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/170M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/169M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/28.2M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/3.19M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/24773 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1376 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/142 [00:00<?, ? examples/s]

In [6]:
dataset.column_names

{'train': ['article',
  'summary',
  'section_headings',
  'keywords',
  'year',
  'title'],
 'validation': ['article',
  'summary',
  'section_headings',
  'keywords',
  'year',
  'title'],
 'test': ['article',
  'summary',
  'section_headings',
  'keywords',
  'year',
  'title']}

In [11]:
len_article_details('train')
len_article_details('validation')
len_article_details('test')

article max length 32623
article max length 24751
article max length 20415


In [9]:

prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a skilled science communicator. Your task is to generate a plain-language summary of biomedical research articles, making them accessible to a general audience without specialized knowledge.

<|start_header_id|>user<|end_header_id|>

Generate a plain-language summary with 200-600 words for the following biomedical research article, ensuring clarity, conciseness, and accessibility to a non-expert audience.

Title: {title}

Full Text: {article}

<|start_header_id|>assistant<|end_header_id|>
""".format(title=dataset['train'][0]['title'], article=dataset['train'][0]['article'])


In [10]:
#inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cpu") # CPU for testing purpose only
inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")
summary_ids = model.generate(**inputs, max_new_tokens=400, do_sample=True)
output_text= tokenizer.decode(summary_ids[0], skip_special_tokens=True)
match = re.search(r"assistant\nHere is a plain-language summary of the article:\n\n(.+)", output_text, re.DOTALL)
if match:
    cleaned_summary = match.group(1).strip()
else:
    cleaned_summary = "No summary found."

print(cleaned_summary)

# Initialize pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")

example=dataset['train'][0]['article']

# Run test prompt
print(pipe("Summarize this biomedical research paper:", min_length=200, max_length=400))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


**Understanding Kidney Development: How Kidneys Eliminate Waste**

The kidneys eliminate waste in the body using highly specialized structures called nephrons. A nephron is composed of a blood filter, a tub that recovers or secretes solutes, and a collecting duct. The filter contains epithelial cells called pod that form the slit-diaphrag filtration and collection of substances from blood. In some vertebrates, including mammals, the filter is connected to tub by short ciliated epithelium that guides filtrate into tub.

**Development of the Kidneys**

The kidneys develop from intermediate mesoderm (IM) during embryonic development. The proneph, mesoneph, and meteph are three kidneys that form sequentially from IM. The proneph and mesph degenerate, while meteph serves as adult kidney. Lower vertebrates like fish and amphibians develop prone during embryonic stages and form mesph as adult. Each kidney contains neph as its basic unit.

**Zebraf Kidney Development**

Zebraf is an ideal mode