In [None]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet

%pip install \
    transformers==4.27.2 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    loralib==0.1.1 \
    peft==0..3.0 --quiet

In [14]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [15]:
dataset = load_dataset("knkarthick/dialogsum")

Found cached dataset csv (C:/Users/mobeenH20/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
model_name = 'google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)


In modern deep learning, it is possible to pull out number of parameters & see which one of them are trainable parameters.

In [5]:
def trainable_model_parameters(model):
    trainable_model_params = 0
    total_model_params = 0
    for _, param in model.named_parameters():
        total_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable Model Params: {trainable_model_params} total Model Params: {total_model_params} percentage of trainable Params: {(trainable_model_params/total_model_params)*100}"

In [6]:
trainable_model_parameters(original_model)

'trainable Model Params: 247577856 total Model Params: 247577856 percentage of trainable Params: 100.0'

## PEFT/LoRA model for Fine-Tuning

First we need to setup LoRA/PEFT model  for fine-tuning. Instead of fine-tuning all the parameters of LLM, it fine-tunes small number of trainable parameters to reduce computation and cost.

In [7]:
def tokenize_function(data_instance):
    start_prompt = "Summarize the following dialogue.\n\n"
    end_prompt = "\n\nSummary: "
    
    prompt = [start_prompt + dialogue + end_prompt for dialogue in data_instance['dialogue']]
    
    data_instance['input_ids'] = tokenizer(prompt, padding="max_length", 
                                           truncation=True, return_tensors="pt").input_ids
    data_instance['labels'] = tokenizer(data_instance['summary'], padding="max_length", 
                                           truncation=True, return_tensors="pt").input_ids
    
    return data_instance
def tokenize_function(data_instance):
    start_prompt = "Summarize the following dialogue.\n\n"
    end_prompt = "\n\nSummary: "
    
    prompt = [start_prompt + dialogue + end_prompt for dialogue in data_instance['dialogue']]
    
    data_instance['input_ids'] = tokenizer(prompt, padding="max_length", 
                                           truncation=True, return_tensors="pt").input_ids
    data_instance['labels'] = tokenizer(data_instance['summary'], padding="max_length", 
                                           truncation=True, return_tensors="pt").input_ids
    
    return data_instance
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r = 32,    # rank: defines the dimension of the adatpter model to be trained
    lora_alpha=32,
    target_modules=['q', 'v'],
    lora_dropout=0.05,
    bias='none',
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [8]:
# adding the LoRA layers to the original model

peft_model = get_peft_model(original_model, lora_config)
print(f"number of trainable parameters: {trainable_model_parameters(peft_model)}")

number of trainable parameters: trainable Model Params: 3538944 total Model Params: 251116800 percentage of trainable Params: 1.4092820552029972


## Train PEFT Model

In [10]:
def tokenize_function(data_instance):
    start_prompt = "Summarize the following dialogue.\n\n"
    end_prompt = "\n\nSummary: "
    
    prompt = [start_prompt + dialogue + end_prompt for dialogue in data_instance['dialogue']]
    
    data_instance['input_ids'] = tokenizer(prompt, padding="max_length", 
                                           truncation=True, return_tensors="pt").input_ids
    data_instance['labels'] = tokenizer(data_instance['summary'], padding="max_length", 
                                           truncation=True, return_tensors="pt").input_ids
    
    return data_instance

In [11]:
# to handle dataset for all batches and splits

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary']) 

Loading cached processed dataset at C:\Users\mobeenH20\.cache\huggingface\datasets\knkarthick___csv\knkarthick--dialogsum-cd36827d3490488d\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-66f989879bb1839a.arrow


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Loading cached processed dataset at C:\Users\mobeenH20\.cache\huggingface\datasets\knkarthick___csv\knkarthick--dialogsum-cd36827d3490488d\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-0c39af25fee56ecc.arrow


In [12]:
output_dir = f"./dialogue-summary-peft-training"

training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3,     #higher learning rate than full-fine tuing
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=3,
    max_steps=3
)

peft_trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_datasets['train']
)

In [13]:
peft_trainer.train()
peft_trainer.model.save_pretrained("./dialogue-summary-peft-training-save")
tokenizer.save_pretrained("./dialogue-summary-peft-training-save")



Step,Training Loss
3,46.8333


('./dialogue-summary-peft-training-save\\tokenizer_config.json',
 './dialogue-summary-peft-training-save\\special_tokens_map.json',
 './dialogue-summary-peft-training-save\\spiece.model',
 './dialogue-summary-peft-training-save\\added_tokens.json',
 './dialogue-summary-peft-training-save\\tokenizer.json')

In [16]:
# the trained LoRA/PEFT adapter will be combined with the original FLAN-t5 model 
from peft import PeftModel, PeftConfig

peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

peft_model = PeftModel.from_pretrained(
    peft_model_base,
    "./dialogue-summary-peft-training-save",
    torch_dtype=torch.bfloat16,
    is_trainable=True            # if we need to prepare the model for further training we set to is_trainable=True. rn its  infering PEFT model
)

In [18]:
print(f"number of trainable parameters: {trainable_model_parameters(peft_model)}")

number of trainable parameters: trainable Model Params: 3538944 total Model Params: 251116800 percentage of trainable Params: 1.4092820552029972


## Evaluation of Model (Human Evaluation)

In [19]:
# Model trained previously (full-fine-tuned). 
intruct_model = AutoModelForSeq2SeqLM.from_pretrained("./dialogue-summary-training-save", torch_dtype=torch.float32)

In [21]:
index = 200
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation:
{dialogue}

Summary:
"""

generation_config = GenerationConfig(max_new_tokens=200, num_beams=1)

inputs = tokenizer(prompt, return_tensors="pt")

original_model_output = tokenizer.decode(original_model.generate(input_ids=inputs['input_ids'].to('cuda'), generation_config=generation_config)[0], skip_special_tokens = True)
intruct_model_output = tokenizer.decode(intruct_model.generate(input_ids=inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)
peft_model_output = tokenizer.decode(peft_model.generate(input_ids=inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)


dash_line = "-".join("" for x in range(0,100))
print(dash_line)
print(f"Baseline Human Summary: {summary}")
print(dash_line)
print(f"Original Model Summary: {original_model_output}")
print(dash_line)
print(f"Instruct Model: {intruct_model_output}")
print(dash_line)
print(f"PEFT Model: {peft_model_output}")

---------------------------------------------------------------------------------------------------
Baseline Human Summary: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
Original Model Summary: #Person1#: I'm not sure what exactly I would need to upgrade my software. #Person1#: I'm not sure what exactly I would need to upgrade my hardware. #Person1#: I'm not sure what exactly I would need to upgrade my hardware. #Person1#: I'm not sure what exactly I would need to upgrade my hardware. #Person2#: I'm not sure what I would need to upgrade my hardware. #Person1#: I'm not sure what I would need to upgrade my software. #Person1#: I'm not sure what I would need to upgrade my hardware. #Person2#: I might also want to upgrade my hardware. #Person1#: You might want to add a CD-ROM drive.
--------------------------------------------------------------------

## Evaluation using Rouge (Metric Evaluation)

In [22]:
rouge = evaluate.load('rouge')

In [23]:
dialogues = dataset['test'][0:10]['dialogue']
base_line_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

generation_config = GenerationConfig(max_new_tokens=200, num_beams=1)

for _, dialogue in enumerate(dialogues):
    
    prompt = f"""
Summarize the following conversation:
{dialogue}

Summary:
"""
    inputs = tokenizer(prompt, return_tensors="pt")

    original_model_output = tokenizer.decode(original_model.generate(inputs['input_ids'].to('cuda'), generation_config=generation_config)[0], skip_special_tokens = True)
    original_model_summaries.append(original_model_output)
    
    intruct_model_output = tokenizer.decode(intruct_model.generate(inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)
    instruct_model_summaries.append(intruct_model_output)
    
    peft_model_output = tokenizer.decode(peft_model.generate(input_ids=inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)
    peft_model_summaries.append(peft_model_output)
    
zipped_summaries = list(zip(base_line_summaries, original_model_summaries, instruct_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns=['human_base_line', 'original_model_summaries', 'instruct_model_summaries', 'peft_model_summaries'])

df
    

Unnamed: 0,human_base_line,original_model_summaries,instruct_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,"Ms. Dawson, please take dictation.",#Person1#: I need to take a dictation for you.,The memo is to be distributed to all employees...
1,In order to prevent employees from wasting tim...,#Person1: This is an intra-office memo. #Perso...,#Person1#: I need to take a dictation for you.,The memo is to be distributed to all employees...
2,Ms. Dawson takes a dictation for #Person1# abo...,Message to all employees by the President this...,#Person1#: I need to take a dictation for you.,The memo is to be distributed to all employees...
3,#Person2# arrives late because of traffic jam....,The person who got stuck in traffic is a car d...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
4,#Person2# decides to follow #Person1#'s sugges...,"The person is a little late, but he's finally ...",The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
5,#Person2# complains to #Person1# about the tra...,The driver of a car is having a problem.,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,#Person1: #Person2: What a shame! #Person1: We...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting married.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,Brian is celebrating his birthday.,"#Person1#: Happy Birthday, Brian. #Person2#: I...",Brian's birthday is coming up.


In [24]:

original_model_results = rouge.compute(
        predictions=original_model_summaries, 
        references=base_line_summaries[0:len(instruct_model_summaries)],
        use_aggregator=True,
        use_stemmer=True
    )

instruct_model_results = rouge.compute(
        predictions=instruct_model_summaries,
        references=base_line_summaries[0:len(instruct_model_summaries)],
        use_aggregator=True,
        use_stemmer=True
    )


peft_model_results = rouge.compute(
        predictions=peft_model_summaries,
        references=base_line_summaries[0:len(instruct_model_summaries)],
        use_aggregator=True,
        use_stemmer=True
    )


print("Original Model: ")
print(original_model_results)

print("Instruct Model: ")
print(instruct_model_results)

print("PEFT Model: ")
print(peft_model_results)

Original Model: 
{'rouge1': 0.1937527185828484, 'rouge2': 0.05396814464055102, 'rougeL': 0.16514758907387836, 'rougeLsum': 0.16799179815988158}
Instruct Model: 
{'rouge1': 0.3226723646723646, 'rouge2': 0.1411928370659991, 'rougeL': 0.27076383358992057, 'rougeLsum': 0.27249606271345406}
PEFT Model: 
{'rouge1': 0.30155707905707907, 'rouge2': 0.10492587199223355, 'rougeL': 0.24782560032560028, 'rougeLsum': 0.25171791296791296}


#### PEFT model is little less better than Instruct model but it saves alot more computational power