In [60]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import pandas as pd 
import numpy as np
import time 
import evaluate


In [61]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [62]:
# Load the Dataset 
dataset = load_dataset ('knkarthick/dialogsum')
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [63]:
checkpoint = 'google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, torch_dtype= torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)



In [64]:
def print_trainable_parameters(model):
    trainable_params = 0
    model_params = 0
    for _, params in model.named_parameters():
        model_params += params.numel()
        if params.requires_grad:
            trainable_params += params.numel()
    return f'trainable model parameters: {trainable_params}\n all model parameters: {model_params} \n percentage of trainable model parameters: {100*trainable_params/model_params:.2f}'
print(print_trainable_parameters(model))

trainable model parameters: 247577856
 all model parameters: 247577856 
 percentage of trainable model parameters: 100.00


In [65]:
index = 100
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f'''Summarize the following Conversation.
{dialogue}
Summary: '''

inputs = tokenizer(prompt, return_tensors='pt')
output = model.generate(inputs['input_ids'], max_new_tokens = 100)
output = tokenizer.decode(
    output[0],
    skip_special_tokens = True
)

dash_line = '-'.join('' for x in range(100))

print(dash_line)
print(f'INPUT PROMPT: \n {prompt}')
print(dash_line)
print(f'HUMAN SUMMARY: \n{summary}\n')
print(dash_line)
print(f'MODEL SUMMARY ZERO SHOT : \n {output}')

---------------------------------------------------------------------------------------------------
INPUT PROMPT: 
 Summarize the following Conversation.
#Person1#: OK, that's a cut! Let's start from the beginning, everyone.
#Person2#: What was the problem that time?
#Person1#: The feeling was all wrong, Mike. She is telling you that she doesn't want to see you any more, but I want to get more anger from you. You're acting hurt and sad, but that's not how your character would act in this situation.
#Person2#: But Jason and Laura have been together for three years. Don't you think his reaction would be one of both anger and sadness?
#Person1#: At this point, no. I think he would react the way most guys would, and then later on, we would see his real feelings.
#Person2#: I'm not so sure about that.
#Person1#: Let's try it my way, and you can see how you feel when you're saying your lines. After that, if it still doesn't feel right, we can try something else.
Summary: 
-------------------

# Fine Tuning

In [66]:
# First we change the format of the Dataset
def tokenize_function (example):
    start_prompt = 'Summarize the Following Dialogue. \n\n'
    end_prompt = '\n\n Summary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example['dialogue']]
    example['input_ids'] = tokenizer(prompt, padding = 'max_length', truncation=True,return_tensors='pt').input_ids
    example['labels'] = tokenizer(example['summary'], padding = 'max_length', truncation=True, return_tensors = 'pt').input_ids
    return example

In [67]:
tokenized_dataset = dataset.map(tokenize_function, batched= True)
tokenized_dataset = tokenized_dataset.remove_columns(['id', 'topic','dialogue','summary'])

In [68]:
# Make a smaller size dataset to save time
tokenized_dataset  = tokenized_dataset.filter(lambda example, index:index&100==0, with_indices=True)

In [69]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1568
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 64
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 192
    })
})

In [70]:
tokenized_dataset['train'].shape

(1568, 2)

In [71]:
tokenized_dataset['test'].shape

(192, 2)

In [72]:
tokenized_dataset['validation'].shape

(64, 2)

In [73]:
output_dir = f'./dialog-summary-trained-flan-t5'

training_args= TrainingArguments(
    output_dir = output_dir,
    learning_rate = 1e-5,
    num_train_epochs=8,
    weight_decay=0.01,
    logging_steps = 2,
    max_steps = 10
)

trainer = Trainer(
    model=model,
    args = training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset = tokenized_dataset['validation']
)

max_steps is given, it will override any value given in num_train_epochs


In [74]:
trainer.train()

  0%|          | 0/10 [00:00<?, ?it/s]

{'loss': 47.75, 'grad_norm': 450.0, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.01}
{'loss': 49.0, 'grad_norm': 430.0, 'learning_rate': 6e-06, 'epoch': 0.02}
{'loss': 47.125, 'grad_norm': 418.0, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.03}
{'loss': 49.0, 'grad_norm': 368.0, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.04}
{'loss': 48.5, 'grad_norm': 442.0, 'learning_rate': 0.0, 'epoch': 0.05}
{'train_runtime': 106.9253, 'train_samples_per_second': 0.748, 'train_steps_per_second': 0.094, 'train_loss': 48.275, 'epoch': 0.05}


TrainOutput(global_step=10, training_loss=48.275, metrics={'train_runtime': 106.9253, 'train_samples_per_second': 0.748, 'train_steps_per_second': 0.094, 'total_flos': 54780588195840.0, 'train_loss': 48.275, 'epoch': 0.05102040816326531})

In [76]:
index = 100
dialogue = dataset['test'][index]['dialogue']
human_summary = dataset['test'][index]['summary']

prompt = f'''Summarize the following : {dialogue} Summary: '''

input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
output = model.generate(input_ids, GenerationConfig(max_new_tokens = 200,num_beams = 1 ))
output = tokenizer.decode(output[0], skip_special_tokens=True)

print(dash_line)
print(f'HUMAN Summary: \n {human_summary}')
print(dash_line)
print(f'Model Summary: \n {output}')

---------------------------------------------------------------------------------------------------
HUMAN Summary: 
 #Person1# and Mike have a disagreement on how to act out a scene. #Person1# proposes that Mike can try to act in #Person1#'s way.
---------------------------------------------------------------------------------------------------
Model Summary: 
 The first person to respond to a woman's request is the one who is the most angry.


# Evaluate The Model with ROUGE Metric

In [78]:
rouge = evaluate.load('rouge')

Downloading builder script: 0.00B [00:00, ?B/s]

In [79]:
dialogue = dataset['test'][10:20]['dialogue']
human_summaries = dataset['test'][10:20]['summary']

model_summaries = []

for _, dialogue in enumerate(dialogue):
    prompt = f'''Summarize the Following Coversation: '
    {dialogue}
    Summary: '''

    inputs_ids = tokenizer(prompt, return_tensors = 'pt').input_ids
    output = model.generate(input_ids, generation_config=GenerationConfig(max_new_tokens = 200))
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    model_summaries.append(output)

zipped_summaries = list(zip(human_summaries, model_summaries))
df = pd.DataFrame(zipped_summaries, columns=['Human Summaries', 'FT Model Summaries'])
df

Unnamed: 0,Human Summaries,FT Model Summaries
0,#Person1# attends Brian's birthday party. Bria...,The two people are trying to figure out how Ja...
1,#Person1# has a dance with Brian at Brian's bi...,Identify the two people who are causing the pr...
2,#Person1# is surprised at the Olympic Stadium'...,#Person1#: It's a cut. #Person1#: I'm not so s...
3,#Person2# shows #Person1# around the construct...,The two speakers are going to try to cut the ice.
4,#Person2# introduces the Olympic Stadium's fin...,"#Person1: I'm not sure what to do, but I think..."
5,#Person1# wants to create a company and is goi...,The problem is that Jason and Laura have been ...
6,#Person1# abandons the idea of creating a comp...,- #P1: I don't know what happened. - #P2: I do...
7,#Person1# wants to start #Person1#'s own busin...,"#Person1#: I'm sorry, but I'm not sure about t..."
8,#Person2# feels itchy. #Person1# doubts it is ...,#Person1: I'm not sure that's the problem. #Pe...
9,#Person1# suspects that #Person2# has chicken ...,#Person1#: I'm sorry to hear that you're actin...


In [80]:
model_results = rouge.compute(
    predictions=model_summaries,
    references=human_summaries[0:len(model_summaries)],
    use_aggregator = True,
    use_stemmer = True
)
print('Model Rouge Score : ', model_results)

Model Rouge Score :  {'rouge1': np.float64(0.1252167060540037), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.1008329165931994), 'rougeLsum': np.float64(0.10173012790887119)}
