In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
model_name = "google/flan-t5-base"
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [6]:
from peft import LoraConfig,get_peft_model, TaskType
lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"
print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 247577856
all model parameters: 247577856
percentage of trainable model parameters: 100.00%


#Reduce learnable parameters in original model

In [8]:
peft_model = get_peft_model(original_model,lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 3538944
all model parameters: 251116800
percentage of trainable model parameters: 1.41%


In [9]:
ds = load_dataset("knkarthick/dialogsum")
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [10]:
def tokenized_function(example):
  strat_prompt = "Summarize the following conversation.\n\n"
  end_prompt = "\n\nSummary: "
  prompt = [strat_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
  example["input_ids"] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
  example["labels"] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids
  return example

tokenized_dataset = ds.map(tokenized_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["id", "topic", "dialogue", "summary",])

In [11]:
tokenized_dataset = tokenized_dataset.filter(lambda example, index: index % 100 == 0, with_indices=True)

In [12]:
output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'
peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3,
    num_train_epochs=1,
    logging_steps=1,
    max_steps=1
)
peft_trainer = Trainer(
    model=original_model,
    args=peft_training_args,
    train_dataset=tokenized_dataset["train"],
)

max_steps is given, it will override any value given in num_train_epochs


In [13]:
#peft_trainer.train()
#peft_model_path = "./peft-dialogue-summary-checkpoint-local/"
#peft_trainer.model.save_pretrained(peft_model_path)
#tokenizer.save_pretrained(peft_model_path)

In [14]:
peft_trainer.train()
peft_model_path = "./peft-dialogue-summary-checkpoint-local/"
peft_model.save_pretrained(peft_model_path)  # Ensure this is called on the actual PEFT model
tokenizer.save_pretrained(peft_model_path)


100%|██████████| 1/1 [05:15<00:00, 315.81s/it]

{'loss': 48.0, 'grad_norm': 9.35593032836914, 'learning_rate': 0.0, 'epoch': 0.06}


100%|██████████| 1/1 [05:17<00:00, 317.66s/it]


{'train_runtime': 317.6583, 'train_samples_per_second': 0.025, 'train_steps_per_second': 0.003, 'train_loss': 48.0, 'epoch': 0.06}


('./peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.json')

In [15]:
from peft import PeftModel, PeftConfig
peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
peft_model = PeftModel.from_pretrained(peft_model,'./peft-dialogue-summary-checkpoint-local/', torch_dtype=torch.bfloat16, is_trainable=False)

In [16]:
input_text = "Summarize the following dialogue: 'Hi, how are you?' 'I'm good, thanks! How about you?'"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = peft_model.generate(**inputs, max_length=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Summary:", generated_text)

Generated Summary: I'm fine.


In [17]:
input_text = "Summarize the following dialogue: 'What do you think about the recent election results?' 'I think it's a clear reflection of how divided the country is. The urban areas overwhelmingly supported one candidate, while rural regions backed the other.' 'Yes, but it seems the economic policies played a major role in this division. One candidate promised tax cuts"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = peft_model.generate(**inputs, max_length=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Summary:", generated_text)

Generated Summary: The other candidate promised tax cuts.
