In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

from huggingface_hub import login
import torch

from datasets import load_dataset
from tqdm import tqdm

from dotenv import load_dotenv
import os
load_dotenv()
token = os.getenv("HF_TOKEN")

login(token = token)


In [2]:
print("Loading tokenizer ...")

model_1b = 'meta-llama/Llama-3.2-1B'
model_8b = 'meta-llama/Meta-Llama-3-8B'


current_model = model_1b

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


tokenizer = AutoTokenizer.from_pretrained(current_model)
print("Loading model ...")
model = AutoModelForCausalLM.from_pretrained(
   current_model,
   torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
   device_map="auto",
   # load_in_4bit=True, 
   )
tokenizer.pad_token = tokenizer.eos_token  # Required
print("Tokenizer and model loaded successfully.")


Loading tokenizer ...
Using device: cuda
Loading model ...
Tokenizer and model loaded successfully.


In [3]:
dataset = load_dataset("qiaojin/PubMedQA",'pqa_artificial')

In [4]:
dataset = dataset['train']

In [5]:
dataset

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'final_decision'],
    num_rows: 211269
})

In [6]:
mini_data = dataset.select(range(1000))
mini_data

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'final_decision'],
    num_rows: 1000
})

In [15]:
mini_data.to("auto")

AttributeError: 'Dataset' object has no attribute 'to'

In [7]:
print("\n".join(mini_data[0]['context']['contexts']))

Chronic rhinosinusitis (CRS) is a heterogeneous disease with an uncertain pathogenesis. Group 2 innate lymphoid cells (ILC2s) represent a recently discovered cell population which has been implicated in driving Th2 inflammation in CRS; however, their relationship with clinical disease characteristics has yet to be investigated.
The aim of this study was to identify ILC2s in sinus mucosa in patients with CRS and controls and compare ILC2s across characteristics of disease.
A cross-sectional study of patients with CRS undergoing endoscopic sinus surgery was conducted. Sinus mucosal biopsies were obtained during surgery and control tissue from patients undergoing pituitary tumour resection through transphenoidal approach. ILC2s were identified as CD45(+) Lin(-) CD127(+) CD4(-) CD8(-) CRTH2(CD294)(+) CD161(+) cells in single cell suspensions through flow cytometry. ILC2 frequencies, measured as a percentage of CD45(+) cells, were compared across CRS phenotype, endotype, inflammatory CRS su

In [8]:
def tokenize_train(example):
    context_text = "\n".join(example['context']['contexts'])

    prompt = (
        f"Contexts:\n{context_text}\n\n"
        "Based on the contexts above, answer the question below with 'Yes', 'No', or 'Maybe'.\n"
        "Then, provide a short explanation that justifies your answer using evidence from the context.\n"
        f"Question: {example['question']}\n"
        f"Answer: {example['final_decision']}\n"
        f"Explanation: {example['long_answer']}\n"
        )
    
    tokenized =  tokenizer(
        prompt,
        truncation=True,
        padding="max_length",         # optional, but safer with Trainer
    )

    tokenized["labels"] = tokenized["input_ids"].copy()
    # tokenized["labels"] = [
    #     token if token != tokenizer.pad_token_id else -100
    #     for token in tokenized["labels"]
    # ]

    return tokenized




tokenized_dataset = mini_data.map(tokenize_train)

# remove the columns that are not needed
# tokenized_dataset = tokenized_dataset.remove_columns(
#     [
#         'pubid',
#         'question',
#         'final_decision',
#         'long_answer',
#         'context'
#     ]
# )



Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [9]:
tokenized_dataset

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'final_decision', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 1000
})

In [10]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,                         # Rank of LoRA updates
    lora_alpha=16,               # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Inject into attention blocks
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)


In [13]:
lengths = tokenized_dataset.map(lambda x: {"length": len(x["input_ids"])})
print(max(lengths["length"])) 

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

KeyboardInterrupt: 

In [11]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters() 

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


In [12]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./lora-llama3-8B",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=100,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=2,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)
trainer.train()

I0000 00:00:1746863147.217869   61406 service.cc:146] XLA service 0x5651d17a3660 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
  trainer = Trainer(
I0000 00:00:1746863147.219673   61406 service.cc:154]   StreamExecutor device (0): NVIDIA L4, Compute Capability 8.9
I0000 00:00:1746863147.224489   61406 se_gpu_pjrt_client.cc:897] Using BFC allocator.
I0000 00:00:1746863147.224597   61406 gpu_helpers.cc:114] XLA backend allocating 17677664256 bytes on device 0 for BFCAllocator.
I0000 00:00:1746863147.225184   61406 gpu_helpers.cc:154] XLA backend will use up to 5892554752 bytes on device 0 for CollectiveBFCAllocator.
  super().__init__(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
I0000 00:00:1746863149.976732   61406 cuda_dnn.cc:530] Loa

RuntimeError: Bad StatusOr access: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 1099528404992 bytes.

In [None]:
33554432/1e6

33.554432

In [None]:
print(torch.cuda.memory_allocated() / 1e9, "GB allocated")
print(torch.cuda.memory_reserved() / 1e9, "GB reserved")

16.074154496 GB allocated
16.076767232 GB reserved
