In [1]:
import torch
from datasets import load_dataset, concatenate_datasets
from transformers import (AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments,
                          Seq2SeqTrainer, DataCollatorForSeq2Seq)
from peft import LoraConfig, get_peft_model
import evaluate

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


In [2]:
from torch.backends import cuda

cuda.matmul.allow_tf32 = True
cuda.enable_flash_sdp(True)
cuda.enable_math_sdp(True)
cuda.enable_mem_efficient_sdp(True)


In [3]:
# Load datasets (only the 'train' splits, as we'll split manually)
cnn = load_dataset("cnn_dailymail", "3.0.0", split="train")
xsum = load_dataset("xsum", split="train")

# Take a subset of 1000 each
cnn_small = cnn.select(range(20000))
xsum_small = xsum.select(range(20000))

# Add prefix and rename fields to unify format
def add_prefix_cnn(example):
    return {
        "text": "detailed: " + example["article"], 
        "summary": example["highlights"]
    }
def add_prefix_xsum(example):
    return {
        "text": "extreme: " + example["document"], 
        "summary": example["summary"]
    }

cnn_pfx = cnn_small.map(add_prefix_cnn, remove_columns=cnn_small.column_names)
xsum_pfx = xsum_small.map(add_prefix_xsum, remove_columns=xsum_small.column_names)

# Combine and split (train:80%, val:10%, test:10%)
dataset = concatenate_datasets([cnn_pfx, xsum_pfx])
split1 = dataset.train_test_split(test_size=0.05, seed=42)  # 9500 train, 500 temp
split2 = split1['test'].train_test_split(test_size=0.50, seed=42)  # 250 val, 250 test

train_dataset = split1['train']
val_dataset = split2['train']
test_dataset = split2['test']
print(f"Train/Val/Test sizes: {len(train_dataset)}/{len(val_dataset)}/{len(test_dataset)}")


Using the latest cached version of the dataset since xsum couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at C:\Users\Ignke\.cache\huggingface\datasets\xsum\default\1.2.0\082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71 (last modified on Wed Dec  3 03:57:12 2025).


Train/Val/Test sizes: 38000/1000/1000


In [4]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")

max_source_length = 512
max_target_length = 160

def preprocess(batch):
    inputs = tokenizer(batch["text"], max_length=max_source_length, truncation=True, padding="max_length")
    # Tokenize targets with the `text_target` argument (T5)
    labels = tokenizer(text_target=batch["summary"], max_length=max_target_length, truncation=True, padding="max_length")
    # Replace pad token id's in labels by -100 so they are ignored in loss
    labels["input_ids"] = [
        [(tok if tok != tokenizer.pad_token_id else -100) for tok in label] 
        for label in labels["input_ids"]
    ]
    inputs["labels"] = labels["input_ids"]
    return inputs

# Apply preprocessing
tokenized_train = train_dataset.map(preprocess, batched=True, remove_columns=["text", "summary"])
tokenized_val   = val_dataset.map(preprocess, batched=True, remove_columns=["text", "summary"])
tokenized_test  = test_dataset.map(preprocess, batched=True, remove_columns=["text", "summary"])


In [5]:
model = AutoModelForSeq2SeqLM.from_pretrained(
    "google/flan-t5-large",
    torch_dtype=torch.bfloat16,  # IMPORTANT for speed
    device_map="auto"
)

model.config.use_cache = False
model.gradient_checkpointing_enable()
model.enable_input_require_grads()

# activates xformers / memory-efficient attention where possible
model = model.to("cuda")
model = model.to(memory_format=torch.contiguous_format)



`torch_dtype` is deprecated! Use `dtype` instead!


In [6]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=[
        "SelfAttention.q",
        "SelfAttention.k",
        "SelfAttention.v",
        "SelfAttention.o",
        "EncDecAttention.q",
        "EncDecAttention.k",
        "EncDecAttention.v",
        "EncDecAttention.o",
        "DenseReluDense.wi",
        "DenseReluDense.wo"
    ],
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, lora_config)
print(model.print_trainable_parameters())


trainable params: 12,386,304 || all params: 795,536,384 || trainable%: 1.5570
None


In [7]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./flan-t5-summ-peft",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,
    learning_rate=5e-5,
    num_train_epochs=1,
    logging_steps=200,
    logging_strategy="steps",
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    predict_with_generate=True,
    generation_max_length=128,
    bf16=True,
    fp16=False,
    gradient_checkpointing=True,
    label_smoothing_factor=0.0,
    report_to="tensorboard",
)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, label_pad_token_id=-100)


In [8]:
import numpy as np
import evaluate
rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    preds, labels = eval_pred
    
    # preds may be (generated_ids, scores)
    if isinstance(preds, tuple):
        preds = preds[0]

    preds = np.asarray(preds)
    labels = np.asarray(labels)

    # Replace all negative values (including -100)
    preds = np.where(preds < 0, tokenizer.pad_token_id, preds)
    labels = np.where(labels < 0, tokenizer.pad_token_id, labels)

    # Convert to lists
    preds_list = preds.tolist()
    labels_list = labels.tolist()

    decoded_preds = tokenizer.batch_decode(
        preds_list,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True
    )
    decoded_labels = tokenizer.batch_decode(
        labels_list,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True
    )

    decoded_preds = [p.strip() for p in decoded_preds]
    decoded_labels = [l.strip() for l in decoded_labels]

    result = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        use_stemmer=True,
    )

    out = {}
    for k, v in result.items():
        if hasattr(v, "mid"):
            out[k] = v.mid.fmeasure * 100
        else:
            out[k] = v * 100

    return {
        "rouge1": round(out.get("rouge1", 0), 4),
        "rouge2": round(out.get("rouge2", 0), 4),
        "rougeL": round(out.get("rougeL", 0), 4),
    }


In [9]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

model.to(torch.bfloat16)

trainer.train()

trainer.save_model("flan_t5_detailed_extreme")
tokenizer.save_pretrained("flan_t5_detailed_extreme")

  trainer = Seq2SeqTrainer(
The model is already on multiple devices. Skipping the move to device specified in `args`.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,1.7363,1.528251,38.548,15.9894,29.4461


('flan_t5_detailed_extreme\\tokenizer_config.json',
 'flan_t5_detailed_extreme\\special_tokens_map.json',
 'flan_t5_detailed_extreme\\tokenizer.json')

In [10]:
trainer.save_model("flan_t5_detailed_extreme")
tokenizer.save_pretrained("flan_t5_detailed_extreme")


('flan_t5_detailed_extreme\\tokenizer_config.json',
 'flan_t5_detailed_extreme\\special_tokens_map.json',
 'flan_t5_detailed_extreme\\tokenizer.json')

In [18]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load the fine-tuned model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("flan_t5_detailed_extreme").to(device)
tokenizer = AutoTokenizer.from_pretrained("flan_t5_detailed_extreme")

def summarize(text, style="detailed"):
    prefix = "detailed: " if style=="detailed" else "extreme: "
    input_text = prefix + text
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=max_source_length).to(device)
    outputs = model.generate(**inputs, max_new_tokens=128)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage:
article = ("""
           The gaming industry has transformed dramatically over the past few decades, evolving from a niche hobby into a global cultural and economic powerhouse. What began as simple pixelated games on arcade machines and early home consoles has grown into a complex ecosystem encompassing PC, console, and mobile platforms, competitive esports, and immersive virtual realities. Its influence now extends beyond entertainment, affecting technology, culture, and even social interaction.

One key factor behind the gaming industry’s growth is technological innovation. Advances in graphics, processing power, and internet connectivity have enabled increasingly sophisticated and visually stunning games. Open-world environments, realistic physics, and dynamic storytelling immerse players in experiences that were unimaginable just a decade ago. Mobile technology has expanded gaming’s reach, allowing millions to play anywhere, anytime, breaking down traditional barriers of access and making gaming a truly global phenomenon.

The rise of online multiplayer and social gaming has also reshaped how people interact with games. Platforms like Xbox Live, PlayStation Network, and Steam allow players to connect across continents, forming communities and competing in real-time. Social features—such as streaming on platforms like Twitch or sharing gameplay on YouTube—have turned gaming into a shared cultural experience. Competitive gaming, or esports, has grown into a billion-dollar industry with professional players, sponsored tournaments, and dedicated fanbases rivaling traditional sports.

Economic growth in the gaming sector has been staggering. Global revenue now surpasses that of the film and music industries combined, driven by digital distribution, in-game purchases, and subscription models. Game development studios range from small indie teams creating niche titles to multinational corporations producing blockbuster franchises. This diversity fosters creativity and innovation, allowing unique experiences to reach audiences that were once considered too small to support niche games.

However, the industry faces challenges alongside its growth. Concerns over game addiction, online harassment, and the monetization of microtransactions have sparked debates about regulation and ethical practices. Additionally, the rapid pace of technological change pressures developers to continually innovate, while cybersecurity threats pose risks for both companies and players. Balancing profitability, player engagement, and ethical responsibility is a central challenge for the modern gaming industry.

Looking forward, emerging technologies like virtual reality (VR), augmented reality (AR), and cloud gaming promise to redefine the gaming experience. VR immerses players fully in digital worlds, while AR blends real and virtual environments for interactive experiences. Cloud gaming enables high-quality gameplay without expensive hardware, further democratizing access. These innovations suggest that gaming will continue to expand its influence, blurring the lines between entertainment, education, and social connection.

In conclusion, the gaming industry has grown from a small pastime into a major economic, cultural, and technological force. Its success stems from innovation, accessibility, and community-driven experiences, while its challenges highlight the need for responsible development and ethical consideration. As technology continues to evolve, gaming is poised to play an even larger role in shaping how people interact, learn, and entertain themselves worldwide.
           """)

print("Article words: ", len(article.split()))
print("Detailed summary:", summarize(article, style="detailed"))
print("Extreme summary:", summarize(article, style="extreme"))
print("Detailed summary words: ", len(summarize(article, style="detailed").split()))
print("Extreme summary words: ", len(summarize(article, style="extreme").split()))

Article words:  480
Detailed summary: The gaming industry has grown from a niche hobby into a global cultural and economic powerhouse . Advances in graphics, processing power, and internet connectivity have enabled increasingly sophisticated and visually stunning games . Mobile technology has expanded gaming’s reach, breaking down traditional barriers of access . Social features have turned gaming into a shared cultural experience .
Extreme summary: The gaming industry has grown from a niche hobby into a global cultural and economic powerhouse.
Detailed summary words:  58
Extreme summary words:  16
