In [None]:
!pip install transformers datasets peft accelerate bitsandbytes -q

In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
)
from peft import LoraConfig, get_peft_model

### Load Data

In [None]:
raw_datasets = load_dataset("cnn_dailymail", "3.0.0")
small_train = raw_datasets["train"].select(range(2000))
small_val = raw_datasets["validation"].select(range(400))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### Loading Base Model

In [None]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "<PAD>"})

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto"
)
model.resize_token_embeddings(len(tokenizer))

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/6.62G [00:00<?, ?B/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.61G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Embedding(151665, 3584)

### Preprocessing

In [None]:
def preprocess_function(examples):
    inputs = [f"Summarize the following news article:\n\n{t}\n\nSummary:" for t in examples["article"]]
    model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["highlights"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
tokenized_train = small_train.map(preprocess_function, batched=True)
tokenized_val = small_val.map(preprocess_function, batched=True)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]



Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [None]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

### Generate Function

In [None]:
def generate_summary(text):
    prompt = f"Summarize the following news article briefly and clearly:\n\n{text}\n\nSummary:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=80,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

### Sample

In [None]:
sample_text = small_val[0]["article"]
reference_summary = small_val[0]["highlights"]

### Summary Before Finetuning

In [None]:

print("\n" + "="*80)
print("BEFORE FINE-TUNING")
print("="*80)
print("\n--- FULL ORIGINAL ARTICLE ---\n")
print(sample_text, "\n")
print("--- FULL REFERENCE SUMMARY ---\n")
print(reference_summary, "\n")


BEFORE FINE-TUNING

--- FULL ORIGINAL ARTICLE ---

(CNN)Share, and your gift will be multiplied. That may sound like an esoteric adage, but when Zully Broussard selflessly decided to give one of her kidneys to a stranger, her generosity paired up with big data. It resulted in six patients receiving transplants. That surprised and wowed her. "I thought I was going to help this one person who I don't know, but the fact that so many people can have a life extension, that's pretty big," Broussard told CNN affiliate KGO. She may feel guided in her generosity by a higher power. "Thanks for all the support and prayers," a comment on a Facebook page in her name read. "I know this entire journey is much bigger than all of us. I also know I'm just the messenger." CNN cannot verify the authenticity of the page. But the power that multiplied Broussard's gift was data processing of genetic profiles from donor-recipient pairs. It works on a simple swapping principle but takes it to a much higher le

In [None]:
generated_summary_before = generate_summary(sample_text)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
print("--- GENERATED SUMMARY (Before Fine-tuning) ---\n")
print(generated_summary_before, "\n")

--- GENERATED SUMMARY (Before Fine-tuning) ---

Summarize the following news article briefly and clearly:

(CNN)Share, and your gift will be multiplied. That may sound like an esoteric adage, but when Zully Broussard selflessly decided to give one of her kidneys to a stranger, her generosity paired up with big data. It resulted in six patients receiving transplants. That surprised and wowed her. "I thought I was going to help this one person who I don't know, but the fact that so many people can have a life extension, that's pretty big," Broussard told CNN affiliate KGO. She may feel guided in her generosity by a higher power. "Thanks for all the support and prayers," a comment on a Facebook page in her name read. "I know this entire journey is much bigger than all of us. I also know I'm just the messenger." CNN cannot verify the authenticity of the page. But the power that multiplied Broussard's gift was data processing of genetic profiles from donor-recipient pairs. It works on a sim

### LoRA Config

In [None]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)



### Trainer

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    logging_steps=10,
    save_total_limit=1,
    eval_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,
)

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,2.624,2.882715


TrainOutput(global_step=1000, training_loss=2.77337154006958, metrics={'train_runtime': 1510.6188, 'train_samples_per_second': 1.324, 'train_steps_per_second': 0.662, 'total_flos': 2.1724300050432e+16, 'train_loss': 2.77337154006958, 'epoch': 1.0})

### ---------------------- After Fine-tuning ----------------------

In [None]:
generated_summary_after = generate_summary(sample_text)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
print("\n--- FULL ORIGINAL ARTICLE ---\n")
print(sample_text, "\n")
print("--- FULL REFERENCE SUMMARY ---\n")
print(reference_summary, "\n")
print("--- GENERATED SUMMARY (After Fine-tuning) ---\n")
print(generated_summary_after)


--- FULL ORIGINAL ARTICLE ---

(CNN)Share, and your gift will be multiplied. That may sound like an esoteric adage, but when Zully Broussard selflessly decided to give one of her kidneys to a stranger, her generosity paired up with big data. It resulted in six patients receiving transplants. That surprised and wowed her. "I thought I was going to help this one person who I don't know, but the fact that so many people can have a life extension, that's pretty big," Broussard told CNN affiliate KGO. She may feel guided in her generosity by a higher power. "Thanks for all the support and prayers," a comment on a Facebook page in her name read. "I know this entire journey is much bigger than all of us. I also know I'm just the messenger." CNN cannot verify the authenticity of the page. But the power that multiplied Broussard's gift was data processing of genetic profiles from donor-recipient pairs. It works on a simple swapping principle but takes it to a much higher level, according to Ca