In [1]:
!pip install unsloth # install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # Also get the latest version Unsloth!

Collecting unsloth
  Downloading unsloth-2025.5.1-py3-none-any.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.5.1 (from unsloth)
  Downloading unsloth_zoo-2025.5.1-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.20-py3-none-any.whl.metadata (10 kB)
Collecting transformers!=4.47.0,==4.51.3 (from unsloth)
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth)
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fssp

### Import Libraries & Initialize Huggingface & WnB (Weight and Bias)

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Modules for fine-tuning
from unsloth import FastLanguageModel
import torch # Import PyTorch
from trl import SFTTrainer # Trainer for supervised fine-tuning (SFT)
from unsloth import is_bfloat16_supported # Checks if the hardware supports bfloat16 precision
# Hugging Face modules
from huggingface_hub import login # Lets you login to API
from transformers import TrainingArguments # Defines training hyperparameters
from datasets import load_dataset # Lets you load fine-tuning datasets
# Import weights and biases
import wandb
# Import kaggle secrets
from kaggle_secrets import UserSecretsClient

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-05-12 17:35:53.955525: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747071354.143049      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747071354.202744      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
# Initialize Hugging Face & WnB tokens
user_secrets = UserSecretsClient() # from kaggle_secrets import UserSecretsClient
hugging_face_token = user_secrets.get_secret("HK_TOKEN")
wnb_token = user_secrets.get_secret("wnb")

# Login to Hugging Face
login(hugging_face_token) # from huggingface_hub import login

# Login to WnB
wandb.login(key=wnb_token) # import wandb
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B for Text Summarization', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mstephen-j-pratama[0m ([33mstephen-j-pratama-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [5]:
# Set parameters
max_seq_length = 2048 # Define the maximum sequence length a model can handle (i.e. how many tokens can be processed at once)
dtype = None # Set to default 
load_in_4bit = True # Enables 4 bit quantization — a memory saving optimization 

# Load the DeepSeek R1 model and tokenizer using unsloth — imported using: from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",  # Load the pre-trained DeepSeek R1 model (8B parameter version)
    max_seq_length=max_seq_length, # Ensure the model can process up to 2048 tokens at once
    dtype=dtype, # Use the default data type (e.g., FP16 or BF16 depending on hardware support)
    load_in_4bit=load_in_4bit, # Load the model in 4-bit quantization to save memory
    token=hugging_face_token, # Use hugging face token
)

==((====))==  Unsloth 2025.5.1: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 6.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

### Summarization Example without Finetuning

In [6]:
# Define a system prompt under prompt_style 
prompt_style = """Below is an instruction that describes a task, paired with an input that provides text that needs to be process. 
Write a response that appropriately completes the request.

### Instruction:
You are a language expert capable of both summarizing and expanding (elongating) text depending on the task.
Your task is to generate a concise **summary** that captures the core message.

### Input:
{}

### Response:
<Summary>{}
"""

In [7]:
# Creating a test text to summarize
text = """Hypertension, commonly known as high blood pressure, is a chronic medical condition in which the blood pressure in the arteries is persistently elevated. 
          It is one of the most important risk factors for cardiovascular diseases, including heart attack and stroke. 
          Often referred to as a 'silent killer,' hypertension typically presents with no symptoms, making routine screening crucial. 
          Lifestyle factors such as a high-sodium diet, lack of physical activity, excessive alcohol intake, and chronic stress can contribute to the development of hypertension. 
          If left untreated, it can lead to long-term complications, including damage to the heart, kidneys, and eyes. 
          Management includes both lifestyle modifications and pharmacologic interventions tailored to the individual’s risk profile.
       """

# Enable optimized inference mode for Unsloth models (improves speed and efficiency)
FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!

# Format the text using the structured prompt (`prompt_style`) and tokenize it
inputs = tokenizer([prompt_style.format(text, "", "")], return_tensors="pt").to("cuda")  # Convert input to PyTorch tensor & move to GPU

# Generate a response using the model
outputs = model.generate(
    input_ids=inputs.input_ids, # Tokenized input text
    attention_mask=inputs.attention_mask, # Attention mask to handle padding
    max_new_tokens=1200, # Limit response length to 1200 tokens (to prevent excessive output)
    use_cache=True, # Enable caching for faster inference
)

# Decode the generated output tokens into human-readable text
response = tokenizer.batch_decode(outputs)

# Extract and print only the relevant response part (after "### Response:")
print(response[0].split("### Response:")[1])  


<Summary>
Hypertension, also known as high blood pressure, is a chronic condition where blood pressure consistently stays elevated. It's a major risk factor for heart attacks and strokes, often called a 'silent killer' because it usually has no noticeable symptoms. Regular checking is important because hypertension doesn't always show signs. Factors like a high-sodium diet, lack of exercise, heavy drinking, and stress can contribute to it. If left untreated, it can harm the heart, kidneys, and eyes. Treatment involves both lifestyle changes and medications based on the person's risk factors.
</Summary>

Okay, so I need to figure out how to approach this task. The user provided an instruction where I act as a language expert, capable of summarizing or expanding text. The task is to generate a concise summary that captures the core message of the given input. 

Looking at the input, it's a detailed paragraph about hypertension, explaining what it is, its risks, contributing factors, com

### Load the Dataset

In [8]:
dataset = load_dataset("csv", data_files="/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/train.csv", split = "train[0:500]",trust_remote_code=True)
dataset

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['id', 'article', 'highlights'],
    num_rows: 500
})

In [9]:
dataset[1]

{'id': '0002095e55fcbd3a2f366d9bf92a95433dc305ef',
 'article': '(CNN) -- Ralph Mata was an internal affairs lieutenant for the Miami-Dade Police Department, working in the division that investigates allegations of wrongdoing by cops. Outside the office, authorities allege that the 45-year-old longtime officer worked with a drug trafficking organization to help plan a murder plot and get guns. A criminal complaint unsealed in U.S. District Court in New Jersey Tuesday accuses Mata, also known as "The Milk Man," of using his role as a police officer to help the drug trafficking organization in exchange for money and gifts, including a Rolex watch. In one instance, the complaint alleges, Mata arranged to pay two assassins to kill rival drug dealers. The killers would pose as cops, pulling over their targets before shooting them, according to the complaint. "Ultimately, the (organization) decided not to move forward with the murder plot, but Mata still received a payment for setting up the 

### Data Preparation

In [10]:
# We need to format the dataset to fit our prompt training style 
EOS_TOKEN = tokenizer.eos_token  # Define EOS_TOKEN which the model when to stop generating text during training
EOS_TOKEN

'<｜end▁of▁sentence｜>'

In [11]:
def formatting_prompts_func(examples):  # Takes a batch of dataset examples as input
    inputs = examples["article"]       # Extracts the text from the article dataset
    outputs = examples["highlights"]      # Extracts the final model-generated response (answer)
    
    texts = []  # Initializes an empty list to store the formatted prompts
    
    # Iterate over the dataset, formatting each article text, and response
    for input, output in zip(inputs, outputs):  
        text = prompt_style.format(input, output) + EOS_TOKEN  # Insert values into prompt template & append EOS token
        texts.append(text)  # Add the formatted text to the list

    return {
        "text": texts,  # Return the newly formatted dataset with a "text" column containing structured prompts
    }

In [12]:
# Update dataset formatting
dataset_finetune = dataset.map(formatting_prompts_func, batched = True)
dataset_finetune["text"][0]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

"Below is an instruction that describes a task, paired with an input that provides text that needs to be process. \nWrite a response that appropriately completes the request.\n\n### Instruction:\nYou are a language expert capable of both summarizing and expanding (elongating) text depending on the task.\nYour task is to generate a concise **summary** that captures the core message.\n\n### Input:\nBy . Associated Press . PUBLISHED: . 14:11 EST, 25 October 2013 . | . UPDATED: . 15:36 EST, 25 October 2013 . The bishop of the Fargo Catholic Diocese in North Dakota has exposed potentially hundreds of church members in Fargo, Grand Forks and Jamestown to the hepatitis A virus in late September and early October. The state Health Department has issued an advisory of exposure for anyone who attended five churches and took communion. Bishop John Folda (pictured) of the Fargo Catholic Diocese in North Dakota has exposed potentially hundreds of church members in Fargo, Grand Forks and Jamestown t

### Build the Model

In [13]:
import torch

print("CUDA Available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0))
print("BF16 supported:", torch.cuda.is_bf16_supported())
print("FP16 supported:", torch.cuda.get_device_capability(0)[0] >= 7)


CUDA Available: True
Device: Tesla P100-PCIE-16GB
BF16 supported: False
FP16 supported: False


In [14]:
# Apply LoRA (Low-Rank Adaptation) fine-tuning to the model 
model_lora = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank: Determines the size of the trainable adapters (higher = more parameters, lower = more efficiency)
    target_modules=[  # List of transformer layers where LoRA adapters will be applied
        "q_proj",   # Query projection in the self-attention mechanism
        "k_proj",   # Key projection in the self-attention mechanism
        "v_proj",   # Value projection in the self-attention mechanism
        "o_proj",   # Output projection from the attention layer
        "gate_proj",  # Used in feed-forward layers (MLP)
        "up_proj",    # Part of the transformer’s feed-forward network (FFN)
        "down_proj",  # Another part of the transformer’s FFN
    ],
    lora_alpha=16,  # Scaling factor for LoRA updates (higher values allow more influence from LoRA layers)
    lora_dropout=0,  # Dropout rate for LoRA layers (0 means no dropout, full retention of information)
    bias="none",  # Specifies whether LoRA layers should learn bias terms (setting to "none" saves memory)
    use_gradient_checkpointing="unsloth",  # Saves memory by recomputing activations instead of storing them (recommended for long-context fine-tuning)
    random_state=3407,  # Sets a seed for reproducibility, ensuring the same fine-tuning behavior across runs
    use_rslora=False,  # Whether to use Rank-Stabilized LoRA (disabled here, meaning fixed-rank LoRA is used)
    loftq_config=None,  # Low-bit Fine-Tuning Quantization (LoFTQ) is disabled in this configuration
)

Unsloth 2025.5.1 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [15]:
# Initialize the fine-tuning trainer — Imported using from trl import SFTTrainer
trainer = SFTTrainer(
    model=model_lora,  # The model to be fine-tuned
    tokenizer=tokenizer,  # Tokenizer to process text inputs
    train_dataset=dataset_finetune,  # Dataset used for training
    dataset_text_field="text",  # Specifies which field in the dataset contains training text
    max_seq_length=max_seq_length,  # Defines the maximum sequence length for inputs
    dataset_num_proc=2,  # Uses 2 CPU threads to speed up data preprocessing

    # Define training arguments
    args=TrainingArguments(
        per_device_train_batch_size=2,  # Number of examples processed per device (GPU) at a time
        gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps before updating weights
        num_train_epochs=1, # Full fine-tuning run
        warmup_steps=5,  # Gradually increases learning rate for the first 5 steps
        max_steps=60,  # Limits training to 60 steps (useful for debugging; increase for full fine-tuning)
        learning_rate=2e-4,  # Learning rate for weight updates (tuned for LoRA fine-tuning)
        fp16=not is_bfloat16_supported(),  # Use FP16 (if BF16 is not supported) to speed up training
        bf16=is_bfloat16_supported(),  # Use BF16 if supported (better numerical stability on newer GPUs)
        logging_steps=10,  # Logs training progress every 10 steps
        optim="adamw_8bit",  # Uses memory-efficient AdamW optimizer in 8-bit mode
        weight_decay=0.01,  # Regularization to prevent overfitting
        lr_scheduler_type="linear",  # Uses a linear learning rate schedule
        seed=3407,  # Sets a fixed seed for reproducibility
        output_dir="outputs",  # Directory where fine-tuned model checkpoints will be saved
    ),
)


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

### Train the Model

In [16]:
# Start the fine-tuning process
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 500 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,2.65
20,2.2125
30,2.1075
40,2.0957
50,2.0884
60,2.0521


In [17]:
# Save the fine-tuned model
wandb.finish()

0,1
train/epoch,▁▂▄▅▇██
train/global_step,▁▂▄▅▇██
train/grad_norm,█▂▁▂▂▂
train/learning_rate,█▇▅▄▂▁
train/loss,█▃▂▂▁▁

0,1
total_flos,2.648207955129139e+16
train/epoch,0.96
train/global_step,60.0
train/grad_norm,0.27704
train/learning_rate,0.0
train/loss,2.0521
train_loss,2.20103
train_runtime,3917.4214
train_samples_per_second,0.123
train_steps_per_second,0.015


### Testing the Model

In [18]:
text = """In a groundbreaking discovery, marine biologists from the Oceanic Research Institute have uncovered a previously unknown species of deep-sea creature during a routine expedition to the Mariana Trench. The organism, a bioluminescent cephalopod tentatively named Abyssalia lumina, was found at a depth of over 10,800 meters.
Researchers describe the species as having translucent skin, intricate patterns of light-emitting cells, and highly adaptive camouflage abilities. “This is one of the most unique organisms we've ever encountered,” said Dr. Leila Vargas, the lead scientist on the expedition. “Its ability to produce dynamic light displays suggests complex communication or hunting strategies.”
The discovery has sparked excitement in the scientific community, offering new insights into life in extreme environments. The team is currently analyzing DNA samples and plans to publish their full findings later this year.
Environmental groups have also praised the expedition, emphasizing the importance of preserving deep-sea ecosystems that remain largely unexplored."""

# Load the inference model using FastLanguageModel (Unsloth optimizes for speed)
FastLanguageModel.for_inference(model_lora)  # Unsloth has 2x faster inference!

# Tokenize the input text with a specific prompt format and move it to the GPU
inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")

# Generate a response using LoRA fine-tuned model with specific parameters
outputs = model_lora.generate(
    input_ids=inputs.input_ids,          # Tokenized input IDs
    attention_mask=inputs.attention_mask, # Attention mask for padding handling
    max_new_tokens=1200,                  # Maximum length for generated response
    use_cache=True,                        # Enable cache for efficient generation
)

# Decode the generated response from tokenized format to readable text
response = tokenizer.batch_decode(outputs)

# Extract and print only the model's response part after "### Response:"
print(response[0].split("### Response:")[1])


<Summary>
Marine biologists have uncovered a new species of deep-sea creature .
Abyssalia lumina was found at a depth of over 10,800 meters in the Mariana Trench .
The creature has translucent skin and intricate patterns of light-emitting cells .
<｜end▁of▁sentence｜>


In [19]:
text = """A 12-year-old girl is being hailed as a hero after she bravely rescued her 78-year-old neighbor from a house fire on Monday afternoon. Emily Carter was walking home from school when she noticed smoke pouring from the home of Mr. Leonard Fields, a longtime resident of the neighborhood.
          Without hesitation, Emily ran to the front door, heard Mr. Fields calling for help, and crawled through the smoke-filled hallway to guide him out. Firefighters arrived minutes later and extinguished the blaze, which officials believe was caused by faulty wiring in the kitchen.
          “Her quick thinking and courage saved a life,” said Fire Chief Ronald Greene. “Most adults wouldn’t have reacted as bravely as she did.”
          Mr. Fields was treated for minor smoke inhalation but is expected to make a full recovery. Emily, a sixth grader at Cedarville Middle School, says she wants to become a firefighter when she grows up. “I just did what I thought was right,” she said, smiling shyly.
          Local officials say they plan to recognize her with a community service award next week."""


# Tokenize the input text with a specific prompt format and move it to the GPU
inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")

# Generate a response using LoRA fine-tuned model with specific parameters
outputs = model_lora.generate(
    input_ids=inputs.input_ids,          # Tokenized input IDs
    attention_mask=inputs.attention_mask, # Attention mask for padding handling
    max_new_tokens=1200,                  # Maximum length for generated response
    use_cache=True,                        # Enable cache for efficient generation
)

# Decode the generated response from tokenized format to readable text
response = tokenizer.batch_decode(outputs)

# Extract and print only the model's response part after "### Response:"
print(response[0].split("<Summary>")[1].split("<｜end▁of▁sentence｜>")[0])


Emily Carter, 12, rescued her 78-year-old neighbor Leonard Fields from house fire .
Firefighters arrived minutes later and extinguished the blaze .
Emily was hailed as a hero after her quick thinking saved a life .



In [20]:
text = """A 12-year-old girl is being hailed as a hero after she bravely rescued her 78-year-old neighbor from a house fire on Monday afternoon. Emily Carter was walking home from school when she noticed smoke pouring from the home of Mr. Leonard Fields, a longtime resident of the neighborhood.
          Without hesitation, Emily ran to the front door, heard Mr. Fields calling for help, and crawled through the smoke-filled hallway to guide him out. Firefighters arrived minutes later and extinguished the blaze, which officials believe was caused by faulty wiring in the kitchen.
          “Her quick thinking and courage saved a life,” said Fire Chief Ronald Greene. “Most adults wouldn’t have reacted as bravely as she did.”
          Mr. Fields was treated for minor smoke inhalation but is expected to make a full recovery. Emily, a sixth grader at Cedarville Middle School, says she wants to become a firefighter when she grows up. “I just did what I thought was right,” she said, smiling shyly.
          Local officials say they plan to recognize her with a community service award next week."""


# Tokenize the input text with a specific prompt format and move it to the GPU
inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")

# Generate a response using LoRA fine-tuned model with specific parameters
outputs = model_lora.generate(
    input_ids=inputs.input_ids,          # Tokenized input IDs
    attention_mask=inputs.attention_mask, # Attention mask for padding handling
    max_new_tokens=1200,                  # Maximum length for generated response
    use_cache=True,                        # Enable cache for efficient generation
)

# Decode the generated response from tokenized format to readable text
response = tokenizer.batch_decode(outputs)

# Extract and print only the model's response part after "### Response:"
print(response[0].split("<Summary>")[1].split("<｜end▁of▁sentence｜>")[0])


Emily Carter, 12, helped rescue 78-year-old neighbor from house fire .
Firefighters arrived minutes later and extinguished the blaze .
Emily was hailed as a hero after saving Mr. Leonard Fields .



In [21]:
data_files = {"train": "train.csv", "test": "test.csv"}

test_dataset = load_dataset("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/",  data_files=data_files, trust_remote_code=True)
test_dataset['test']

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['id', 'article', 'highlights'],
    num_rows: 11490
})

In [22]:
test_dataset_finetune = test_dataset['test'].map(formatting_prompts_func, batched = True)
test_dataset_finetune["text"][0]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

"Below is an instruction that describes a task, paired with an input that provides text that needs to be process. \nWrite a response that appropriately completes the request.\n\n### Instruction:\nYou are a language expert capable of both summarizing and expanding (elongating) text depending on the task.\nYour task is to generate a concise **summary** that captures the core message.\n\n### Input:\nEver noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set s

In [23]:
response[0].split("<Summary>\n")[1].split("\n<｜end▁of▁sentence｜>")[0]

'Emily Carter, 12, helped rescue 78-year-old neighbor from house fire .\nFirefighters arrived minutes later and extinguished the blaze .\nEmily was hailed as a hero after saving Mr. Leonard Fields .'

In [24]:
# test_dataset['test']["highlights"]

predictions = []

for i, text in enumerate(test_dataset['test']["article"][:100]):
    # Tokenize the input text with a specific prompt format and move it to the GPU
    inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")
    
    # Generate a response using LoRA fine-tuned model with specific parameters
    outputs = model_lora.generate(
        input_ids=inputs.input_ids,          # Tokenized input IDs
        attention_mask=inputs.attention_mask, # Attention mask for padding handling
        max_new_tokens=1200,                  # Maximum length for generated response
        use_cache=True,                        # Enable cache for efficient generation
    )
    
    # Decode the generated response from tokenized format to readable text
    response = tokenizer.batch_decode(outputs)
    
    # Extract and print only the model's response part after "### Response:"
    result = response[0].split("<Summary>\n")[1].split("\n<｜end▁of▁sentence｜>")[0]
    predictions.append(result)

    print(f"Step {i}: {result}")

Step 0: Passengers on a packed plane may face health and safety risks .
Experts say crowded planes are putting passengers at risk .
The U.S Department of Transportation does not set minimum space for humans .
Step 1: Teenager Rahul Kumar, 17, climbed into lions' enclosure in Ahmedabad .
He shouted he would 'kill them' and tried to run towards the animals .
He fell into a moat and was rescued by zoo security guards .
Step 2: Dougie Freedman is on the verge of agreeing a new two-year deal with Nottingham Forest .
The Scot has stabilised Forest since replacing Stuart Pearce in February .
Forest made an audacious attempt on the play-off places when Freedman replaced Pearce .
Step 3: Liverpool are linked with a move for Fiorentina goalkeeper Neto .
The Brazilian's agent Stefano Castagna says he will leave Fiorentina in the summer .
Neto was reportedly close to a move to Juventus but his agent says no decision has been made .
Step 4: Bruce Jenner will break his silence on his gender identity

In [25]:
print(predictions[0])

Passengers on a packed plane may face health and safety risks .
Experts say crowded planes are putting passengers at risk .
The U.S Department of Transportation does not set minimum space for humans .


### Rogue Evaluation

In [27]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [29]:
# Calculate the ROUGE scores between the predicted summaries and the actual summaries

from rouge import Rouge
rouge = Rouge()
scores = rouge.get_scores(predictions, test_dataset['test']["highlights"][:100], avg=True)

# Print the ROUGE scores
print(f"ROUGE-1: {scores['rouge-1']['f']:.4f}")
print(f"ROUGE-2: {scores['rouge-2']['f']:.4f}")
print(f"ROUGE-L: {scores['rouge-l']['f']:.4f}")

ROUGE-1: 0.3384
ROUGE-2: 0.1199
ROUGE-L: 0.3163


### Saving Model & Results

In [30]:
import pandas as pd

# Example data
data = {
    "articles": test_dataset['test']["article"][:100],
    "prediction": predictions,
    "actual": test_dataset['test']["highlights"][:100]
}

# Create DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,articles,prediction,actual
0,Ever noticed how plane seats appear to be gett...,Passengers on a packed plane may face health a...,Experts question if packed out planes are put...
1,A drunk teenage boy had to be rescued by secur...,"Teenager Rahul Kumar, 17, climbed into lions' ...",Drunk teenage boy climbed into lion enclosure ...
2,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,Liverpool target Neto is also wanted by PSG an...,Liverpool are linked with a move for Fiorentin...,Fiorentina goalkeeper Neto has been linked wit...
4,Bruce Jenner will break his silence in a two-h...,Bruce Jenner will break his silence on his gen...,"Tell-all interview with the reality TV star, 6..."
...,...,...,...
95,As Zlatan Ibrahimovic famously believes the Wo...,Ibrahimovic has never won the Champions League...,Zlatan Ibrahimovic will line up against former...
96,"Jameela spent £3,000 on having all her amalgam...",Jameela Jamil had all her amalgam fillings rem...,"Jameela Jamil, 29, is convinced dental work tr..."
97,A paramedic who pretended he was gay to get cl...,"Paramedic Christopher Bridger, 25, from Steven...","Christopher Bridger, 25, attacked three women ..."
98,"Paris Saint-Germain face Nice on Saturday, hop...",Thiago Silva has been out of action with a thi...,Paris Saint-Germain captain Thiago Silva suffe...


In [31]:
df.to_csv("News Summary Prediction Result.csv")

In [33]:
model_lora.save_pretrained("fine_tuned_deepseek_lora")
tokenizer.save_pretrained("fine_tuned_deepseek_lora")

('fine_tuned_deepseek_lora/tokenizer_config.json',
 'fine_tuned_deepseek_lora/special_tokens_map.json',
 'fine_tuned_deepseek_lora/tokenizer.json')

In [34]:
# Load the DeepSeek R1 model and tokenizer using unsloth — imported using: from unsloth import FastLanguageModel

model_testing, tokenizer_testing = FastLanguageModel.from_pretrained("fine_tuned_deepseek_lora")

==((====))==  Unsloth 2025.5.1: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 6.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [35]:
text = """A 12-year-old girl is being hailed as a hero after she bravely rescued her 78-year-old neighbor from a house fire on Monday afternoon. Emily Carter was walking home from school when she noticed smoke pouring from the home of Mr. Leonard Fields, a longtime resident of the neighborhood.
          Without hesitation, Emily ran to the front door, heard Mr. Fields calling for help, and crawled through the smoke-filled hallway to guide him out. Firefighters arrived minutes later and extinguished the blaze, which officials believe was caused by faulty wiring in the kitchen.
          “Her quick thinking and courage saved a life,” said Fire Chief Ronald Greene. “Most adults wouldn’t have reacted as bravely as she did.”
          Mr. Fields was treated for minor smoke inhalation but is expected to make a full recovery. Emily, a sixth grader at Cedarville Middle School, says she wants to become a firefighter when she grows up. “I just did what I thought was right,” she said, smiling shyly.
          Local officials say they plan to recognize her with a community service award next week."""


# Tokenize the input text with a specific prompt format and move it to the GPU
inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")

# Generate a response using LoRA fine-tuned model with specific parameters
outputs = model_testing.generate(
    input_ids=inputs.input_ids,          # Tokenized input IDs
    attention_mask=inputs.attention_mask, # Attention mask for padding handling
    max_new_tokens=1200,                  # Maximum length for generated response
    use_cache=True,                        # Enable cache for efficient generation
)

# Decode the generated response from tokenized format to readable text
response = tokenizer_testing.batch_decode(outputs)

# Extract and print only the model's response part after "### Response:"
print(response[0].split("<Summary>")[1].split("<｜end▁of▁sentence｜>")[0])


Emily Carter saved her 78-year-old neighbor from a house fire .
She was walking home from school and noticed smoke coming from Mr. Fields' home .
Firefighters arrived minutes later and extinguished the blaze .

