In [1]:
pip install transformers datasets peft accelerate bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [2]:
#loading the dataset
from datasets import load_dataset
dataset = load_dataset("cnn_dailymail", "3.0.0")
print(dataset["train"][0])

{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office char

In [9]:
#preprocessing and tokenization
from transformers import AutoTokenizer

model_name = "tiiuae/falcon-rw-1b"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [4]:
#formatting the dataset
def format_sample(example):
    input_text = f"Summarize the following article:\n\n{example['article']}\n\nSummary:"
    target_text = example["highlights"]
    return {"prompt": input_text, "target": target_text}

In [5]:
formatted_dataset = dataset.map(format_sample)


Map:   0%|          | 0/287113 [00:00<?, ? examples/s]

Map:   0%|          | 0/13368 [00:00<?, ? examples/s]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [10]:
#tokenizing the dataset
def tokenize_function(example):
    full_text = example["prompt"] + " " + example["target"]
    
    tokenized = tokenizer(
        full_text,
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    prompt_len = len(tokenizer(example["prompt"], truncation=True, max_length=512)["input_ids"])
    labels = [-100] * prompt_len + tokenized["input_ids"][prompt_len:]
    labels = labels[:512]
    labels += [-100] * (512 - len(labels))  # pad if needed
    
    tokenized["labels"] = labels
    return tokenized

In [None]:
tokenized_dataset = {
    split: formatted_dataset[split].map(
        tokenize_function,
        batched=False,
        remove_columns=formatted_dataset[split].column_names,
    )
    for split in formatted_dataset
}


Map:   0%|          | 0/287113 [00:00<?, ? examples/s]

In [10]:
#loading the model and setting the Qlora configs
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType
import torch

from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "tiiuae/falcon-rw-1b",
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.
None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)', 'mps', 'npu', 'hpu', 'xpu', 'cuda'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)', 'mps', 'npu', 'hpu', 'xpu', 'cuda'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [7]:
#data collator
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # This is a causal LM, so we don't use Masked LM
)


In [8]:
#training argument
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./falcon-qlora-summarizer",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    logging_steps=10,
    evaluation_strategy="no",   # or "epoch" if you have validation
    save_strategy="epoch",
    save_total_limit=2,
    fp16=True,  # Use mixed precision
    report_to="none",
    remove_unused_columns=False,  # Important for PEFT models
)


NameError: name 'model' is not defined

In [None]:
#trainer engine
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(1000)),  # use subset for fast testing
    tokenizer=tokenizer,
    data_collator=data_collator,
)


In [None]:
#training/finetuning the model
trainer.train()

In [None]:
#saving the model
model.save_pretrained("falcon-qlora-summarizer")

In [None]:
# Save the tokenizer
tokenizer.save_pretrained("falcon-qlora-summarizer")