use this enviroment to prevent crash the model when it's training

In [1]:
import time
import torch
import datasets
import evaluate
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    GenerationConfig,
)
from peft import LoraConfig, PeftConfig, get_peft_model, TaskType


dataset = datasets.load_dataset("knkarthick/dialogsum")
dataset

  from .autonotebook import tqdm as notebook_tqdm
Using the latest cached version of the dataset since knkarthick/dialogsum couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/ai/.cache/huggingface/datasets/knkarthick___dialogsum/default/0.0.0/a968e7aee0602e257935f1321a02e4287f7d5848 (last modified on Wed Oct  1 16:22:48 2025).


DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [2]:
model_name='google/flan-t5-base'

origin_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [3]:
def NumOfTrainableParams(model):
    total = origin_model.num_parameters()
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    percent = round(trainable/total*100, 2)
    print(f"Number of All parameters: {total}\tNumber of Trainable: {trainable}\nPercentage: {percent}%")

NumOfTrainableParams(origin_model)

Number of All parameters: 247577856	Number of Trainable: 247577856
Percentage: 100.0%


# Pre-Test the FLAN-T5 model

In [4]:
_tdial = dataset["test"][421]["dialogue"]
_tsumm = dataset["test"][421]["summary"]
_tprompt = (
    f""""Write a short summary for this text: {_tdial}"""
)
_tinput = tokenizer(_tprompt, return_tensors="pt")["input_ids"]
_tanswer = tokenizer.decode(
    origin_model.generate(_tinput, max_new_tokens=50)[0], skip_special_tokens=True
)

print(_tprompt)
print(50*"=")
print(f"Human Summary: {_tsumm}\n\nModel Summary: {_tanswer}")

"Write a short summary for this text: #Person1#: Honey, of course I forgive you! I love you so much! I've really missed you. I was wrong to get upset over nothing. 
#Person2#: I'm sorry I haven't called or anything, but right after you decided you wanted a break, I was called up north to put out some major forest fires! I was in the middle of nowhere, working day and night, trying to prevent the blaze from spreading! It was pretty intense. 
#Person1#: Oh, honey, I'm glad you're okay! But I have some exciting news. . . I think I'm pregnant! 
#Person2#: Really? Wow, that's amazing! This is great news! I've always wanted to be a father! We'll go to the doctor first thing in the morning! 
#Person3#: We have your test results back and, indeed, you are pregnant. Let's see here. . . everything seems to be in order. Your approximate due date is October twenty-seventh two thousand and nine, so that means that the baby was conceived on February third, two thousand and nine. 
#Person2#: Are you s

# Fullfiled Fine-Tuning

In [5]:
def tokenize_function(example):
    start_prompt = "Summarize the following conversation.\n\n"
    end_prompt = "\n\nSummary: "
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example["input_ids"] = tokenizer(
        prompt, padding="max_length", truncation=True, return_tensors="pt"
    ).input_ids
    example["labels"] = tokenizer(
        example["summary"], padding="max_length", truncation=True, return_tensors="pt"
    ).input_ids

    return example


ds_tokenized = dataset.map(tokenize_function, batched=True)
ds_tokenized = ds_tokenized.remove_columns(["id", "dialogue", "summary", "topic"])

In [6]:
print(f"shapes of dataset\n", 50*'=')
print(f'Train: {ds_tokenized['train'].shape}')
print(f'Test: {ds_tokenized['test'].shape}')
print(f'Validation: {ds_tokenized['validation'].shape}')

print(f'tokenized dataset:\n{ds_tokenized}')
print(f'\n\ntwo samples:\n {ds_tokenized['train'][:2]['labels']}')

shapes of dataset
Train: (12460, 2)
Test: (1500, 2)
Validation: (500, 2)
tokenized dataset:
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 500
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1500
    })
})


two samples:
 [[1363, 5, 3931, 31, 7, 652, 3, 9, 691, 18, 413, 6, 11, 7582, 12833, 77, 7, 7786, 7, 376, 12, 43, 80, 334, 215, 5, 12833, 77, 7, 31, 195, 428, 128, 251, 81, 70, 2287, 11, 11208, 12, 199, 1363, 5, 3931, 10399, 10257, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

## Fine Tune Section

In [7]:
torch.cuda.empty_cache()
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=32,
    lora_alpha=16,
    target_modules=['v', 'q'],
    bias='none', 
    lora_dropout=0.05
)
lora_model = get_peft_model(origin_model, lora_config)
NumOfTrainableParams(lora_model)

Number of All parameters: 251116800	Number of Trainable: 3538944
Percentage: 1.41%


In [None]:
torch.cuda.empty_cache()
output_dir = f'./saved_models/mtrain-{str(int(time.time()))}'
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=1,
    label_names=["labels"],
    fp16=True
)

trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=ds_tokenized['train'],
    eval_dataset=ds_tokenized['validation']
)

# less memory using but it's slower
# origin_model.gradient_checkpointing_enable()

In [9]:
torch.cuda.empty_cache()
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 11.59 GiB of which 51.50 MiB is free. Including non-PyTorch memory, this process has 11.38 GiB memory in use. Of the allocated memory 11.10 GiB is allocated by PyTorch, and 67.05 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)