In [None]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet

%pip install \
    transformers==4.27.2 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    loralib==0.1.1 \
    peft==0..3.0 --quiet

In [3]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [4]:
dataset = load_dataset("knkarthick/dialogsum")

Downloading readme:   0%|          | 0.00/4.65k [00:00<?, ?B/s]

Downloading and preparing dataset csv/knkarthick--dialogsum to C:/Users/mobeenH20/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to C:/Users/mobeenH20/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
model_name = 'google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Downloading config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]


In modern deep learning, it is possible to pull out number of parameters & see which one of them are trainable parameters.

In [81]:
def trainable_model_parameters(model):
    trainable_model_params = 0
    total_model_params = 0
    for _, param in model.named_parameters():
        total_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable Model Params: {trainable_model_params} total Model Params: {total_model_params} percentage of trainable Params: {(trainable_model_params/total_model_params)*100}"

In [82]:
trainable_model_parameters(original_model)

'trainable Model Params: 3538944 total Model Params: 251116800 percentage of trainable Params: 1.4092820552029972'

## PEFT/LoRA model for Fine-Tuning

First we need to setup LoRA/PEFT model  for fine-tuning. Instead of fine-tuning all the parameters of LLM, it fine-tunes small number of trainable parameters to reduce computation and cost.

In [64]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r = 32,    # rank: defines the dimension of the adatpter model to be trained
    lora_alpha=32,
    target_modules=['q', 'v'],
    lora_dropout=0.05,
    bias='none',
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [69]:
# adding the LoRA layers to the original model

peft_model = get_peft_model(original_model, lora_config)
print(f"number of trainable parameters: {trainable_model_parameters(peft_model)}")

number of trainable parameters: trainable Model Params: 3538944 total Model Params: 251116800 percentage of trainable Params: 1.4092820552029972


## Train PEFT Model

In [76]:
output_dir = f"./dialogue-summary-peft-training"

training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3,     #higher learning rate than full-fine tuing
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=3,
    max_steps=3
)

peft_trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_datasets['train']
)

In [77]:
peft_trainer.train()
peft_trainer.model.save_pretrained("./dialogue-summary-peft-training-save")
tokenizer.save_pretrained("./dialogue-summary-peft-training-save")



Step,Training Loss
3,47.5833


('./dialogue-summary-peft-training-save\\tokenizer_config.json',
 './dialogue-summary-peft-training-save\\special_tokens_map.json',
 './dialogue-summary-peft-training-save\\spiece.model',
 './dialogue-summary-peft-training-save\\added_tokens.json',
 './dialogue-summary-peft-training-save\\tokenizer.json')

In [79]:
# the trained LoRA/PEFT adapter will be combined with the original FLAN-t5 model 
from peft import PeftModel, PeftConfig

peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

peft_model = PeftModel.from_pretrained(
    peft_model_base,
    "./dialogue-summary-peft-training-save",
    torch_dtype=torch.bfloat16,
    is_trainable=False            # if we need to prepare the model for further training we set to is_trainable=True. rn its  infering PEFT model
)

In [86]:
print(f"number of trainable parameters: {trainable_model_parameters(peft_model)}")

number of trainable parameters: trainable Model Params: 0 total Model Params: 251116800 percentage of trainable Params: 0.0


## Evaluation of Model (Human Evaluation)

In [90]:
# Model trained previously (full-fine-tuned). 
intruct_model = AutoModelForSeq2SeqLM.from_pretrained("./dialogue-summary-training-save", torch_dtype=torch.float32)

In [95]:
index = 200
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation:
{dialogue}

Summary:
"""

generation_config = GenerationConfig(max_new_tokens=200, num_beams=1)

inputs = tokenizer(prompt, return_tensors="pt")

original_model_output = tokenizer.decode(original_model.generate(input_ids=inputs['input_ids'].to('cuda'), generation_config=generation_config)[0], skip_special_tokens = True)
intruct_model_output = tokenizer.decode(intruct_model.generate(input_ids=inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)
peft_model_output = tokenizer.decode(peft_model.generate(input_ids=inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)


print(dash_line)
print(f"Baseline Human Summary: {summary}")
print(dash_line)
print(f"Original Model Summary: {original_model_output}")
print(dash_line)
print(f"Instruct Model: {intruct_model_output}")
print(dash_line)
print(f"PEFT Model: {peft_model_output}")

---------------------------------------------------------------------------------------------------
Baseline Human Summary: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
Original Model Summary: You'd like to upgrade your system, but you'd like to make your own flyers and banners.
---------------------------------------------------------------------------------------------------
Instruct Model: #Person1#: You'd probably want to upgrade your computer. #Person2#: You could also upgrade your hardware. #Person1#: You'd probably want a faster processor, more memory and a faster modem. #Person2#: You might want to add a CD-ROM drive too.
---------------------------------------------------------------------------------------------------
PEFT Model: #Person1#: You'd probably want to upgrade your computer. #Person2#: You could also upgrade your hardware. #

## Evaluation using Rouge (Metric Evaluation)

In [96]:
rouge = evaluate.load('rouge')

In [97]:
dialogues = dataset['test'][0:10]['dialogue']
base_line_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

generation_config = GenerationConfig(max_new_tokens=200, num_beams=1)

for _, dialogue in enumerate(dialogues):
    
    prompt = f"""
Summarize the following conversation:
{dialogue}

Summary:
"""
    inputs = tokenizer(prompt, return_tensors="pt")

    original_model_output = tokenizer.decode(original_model.generate(inputs['input_ids'].to('cuda'), generation_config=generation_config)[0], skip_special_tokens = True)
    original_model_summaries.append(original_model_output)
    
    intruct_model_output = tokenizer.decode(intruct_model.generate(inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)
    instruct_model_summaries.append(intruct_model_output)
    
    peft_model_output = tokenizer.decode(peft_model.generate(input_ids=inputs['input_ids'], generation_config=generation_config)[0], skip_special_tokens = True)
    peft_model_summaries.append(peft_model_output)
    
zipped_summaries = list(zip(base_line_summaries, original_model_summaries, instruct_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns=['human_base_line', 'original_model_summaries', 'instruct_model_summaries', 'peft_model_summaries'])

df
    

Unnamed: 0,human_base_line,original_model_summaries,instruct_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,The memo should go out to all employees by thi...,#Person1#: I need to take a dictation for you.,The memo is to be distributed to all employees...
1,In order to prevent employees from wasting tim...,#Person1#: This memo is for the intra-office a...,#Person1#: I need to take a dictation for you.,The memo is to be distributed to all employees...
2,Ms. Dawson takes a dictation for #Person1# abo...,"#Person1#: Ms. Dawson, I need to take a dictat...",#Person1#: I need to take a dictation for you.,The memo is to be distributed to all employees...
3,#Person2# arrives late because of traffic jam....,People are talking about the traffic jams in t...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
4,#Person2# decides to follow #Person1#'s sugges...,#Prsect1: I'm finally here. #Prsect1: I got st...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
5,#Person2# complains to #Person1# about the tra...,#Person1#: I got stuck in traffic. #Person2#: ...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are divorced.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,Brian's birthday.,"#Person1#: Happy Birthday, Brian. #Person2#: I...",Brian's birthday is coming up.


In [98]:

original_model_results = rouge.compute(
        predictions=original_model_summaries, 
        references=base_line_summaries[0:len(instruct_model_summaries)],
        use_aggregator=True,
        use_stemmer=True
    )

instruct_model_results = rouge.compute(
        predictions=instruct_model_summaries,
        references=base_line_summaries[0:len(instruct_model_summaries)],
        use_aggregator=True,
        use_stemmer=True
    )


peft_model_results = rouge.compute(
        predictions=peft_model_summaries,
        references=base_line_summaries[0:len(instruct_model_summaries)],
        use_aggregator=True,
        use_stemmer=True
    )


print("Original Model: ")
print(original_model_results)

print("Instruct Model: ")
print(instruct_model_results)

print("PEFT Model: ")
print(peft_model_results)

Original Model: 
{'rouge1': 0.2405180862338082, 'rouge2': 0.09032938076416336, 'rougeL': 0.210439169181522, 'rougeLsum': 0.2116263920463764}
Instruct Model: 
{'rouge1': 0.3226723646723646, 'rouge2': 0.1411928370659991, 'rougeL': 0.27076383358992057, 'rougeLsum': 0.27249606271345406}
PEFT Model: 
{'rouge1': 0.30155707905707907, 'rouge2': 0.10492587199223355, 'rougeL': 0.24782560032560028, 'rougeLsum': 0.25171791296791296}


#### PEFT model is little less better than Instruct model but it saves alot more computational power