# Fine-tune FLAN-T5 with RL (PPO) and PEFT to generate less-toxic summaries

In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, GenerationConfig
from datasets import load_dataset
from peft import PeftModel, PeftConfig, LoraConfig, TaskType

# trl: Transofrmer Reinforcement Learning that provides an access to PPO
from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead
from trl import create_reference_model
from trl.core import LengthSampler 

import torch
import evaluate
import numpy as np
import pandas as pd

from tqdm import tqdm
tqdm.pandas()

# Load FLAN-T5 model, prepare Reward model and Toxicity Evaluator

In [2]:
model_name = "google/flan-t5-base"
huggingface_dataset_name = "knkarthick/dialogsum"

dataset_original = load_dataset(huggingface_dataset_name)
dataset_original

Found cached dataset csv (/home/tslab/phusaeng/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [14]:
idx = 50
print(dataset_original['train']['dialogue'][:50+idx][idx])
print('\nSUMMARY:\n')
print(dataset_original['train']['summary'][:50+idx][idx])

#Person1#: You have the right to remain silent. Anything you say can and will be used against you in a court of law. You have the right to have an attorney present during questioning. If you cannot afford an attorney, one will be appointed for you. Do you understand?
#Person2#: Yes.
#Person1#: What's your name?
#Person2#: My name is James.
#Person1#: What's your nationality?
#Person2#: American.
#Person1#: What's your relationship with the victim?
#Person2#: I don't know him.
#Person1#: Why did you attack the victim?
#Person2#: Because he beat me first when I tried to stop him from grabbing my bag and running away.
#Person1#: How many times did you stab the victim?
#Person2#: I stabbed his belly three times.
#Person1#: Did you know that your actions might cause serous injuries or death?
#Person2#: I knew, but I couldn't control myself.
#Person1#: Was it your intention to kill the victim?
#Person2#: No. I didn't kill him on purpose, madam. It's him who caused the incident. I need to see

In [7]:
# preprocess dataset
def build_dataset(model_name, 
                dataset_name,
                input_min_text_length,
                input_max_text_length,
                filtered_dataset=True):
    # load dataset
    dataset = load_dataset(dataset_name, split='train')
    if filtered_dataset: #filter the dialogues of length between input_min_text_length and input_max_text_length
        dataset = dataset.filter(lambda x: len(x['dialogue']) > input_min_text_length and len(x['dialogue']) <= input_max_text_length, batched=False) 

    # Prepare tokenizer. Setting device_map="auto" allows to switch between CPU and GPU automatically.
    tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")

    def tokenize(sample):
        prompt = f"""
Summarize the following conversation.

{sample['dialogue']}

Summary:
"""
        sample['input_ids'] = tokenizer.encode(prompt)

        # This must be called "query", which is a requirement of our PPO library.
        sample['query'] = tokenizer.decode(sample['input_ids'])
        return sample

    # Tokenize each dialogue
    dataset = dataset.map(tokenize, batched=False)
    dataset.set_format(type='torch')

    # split the dataset into train and test.
    dataset_splits = dataset.train_test_split(test_size=0.2, shuffle=False, seed=42)

    return dataset_splits

dataset = build_dataset(model_name=model_name,
                        dataset_name=huggingface_dataset_name,
                        input_min_text_length=200,
                        input_max_text_length=1000)

print(dataset)

Found cached dataset csv (/home/tslab/phusaeng/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


Filter:   0%|          | 0/12460 [00:00<?, ? examples/s]

Map:   0%|          | 0/10022 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'query'],
        num_rows: 8017
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'query'],
        num_rows: 2005
    })
})


In [28]:
# helper function to count number of parameters
def count_parameters(model):
    def num_to_readable_str(num):
        return format(num, ',')

    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"\ntrainable params: {num_to_readable_str(trainable_model_params)}\nall params: {num_to_readable_str(all_model_params)}\npercentage of trainable params: {num_to_readable_str(round(100*trainable_model_params/all_model_params, 3))}%"

In [29]:
# add the adapter to the original FLAN-T5
lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

model = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                                              torch_dtype=torch.bfloat16,)

peft_path = "./weights/peft-dialogue-summary-training-1690379059/checkpoint-500"
peft_model = PeftModel.from_pretrained(model, 
                                       peft_path,
                                       config=lora_config,
                                       torch_dtype=torch.bfloat16,
                                       device_map="auto",
                                       is_trainable=True)
print(f'PEFT model parameters to be updated: \n{count_parameters(peft_model)}')

PEFT model parameters to be updated: 

trainable params: 3,538,944
all params: 251,116,800
percentage of trainable params: 1.409%


In [34]:
ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(peft_model,
                                                               torch_dtype=torch.bfloat16,
                                                               is_trainable=True,)
print(f'Number of parameters to be updated (ValueHead + 769 params):\n{count_parameters(ppo_model)}')
print(ppo_model.v_head)

Number of parameters to be updated (ValueHead + 769 params):

trainable params: 3,539,713
all params: 251,117,569
percentage of trainable params: 1.41%
ValueHead(
  (dropout): Dropout(p=0.1, inplace=False)
  (summary): Linear(in_features=768, out_features=1, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


In [35]:
# init ref model for preventing reward hacking
ref_model = create_reference_model(ppo_model)

print(f'Referece model parameters to be updated:\n{count_parameters(ref_model)}')

Referece model parameters to be updated:

trainable params: 0
all params: 251,117,569
percentage of trainable params: 0.0%


In [45]:
# Prepare reward model (that is representing human preference). In this case, we are trying to detoxify the LLM.
# Our reward model will provide the rewards based on the toxicity of the generated text. We use pre-trained sentiment analysis model, 
# which will classify not hate and hate.
toxicity_model_name = 'facebook/roberta-hate-speech-dynabench-r4-target'
toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name, device_map='auto')
toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name, device_map='auto')
print(toxicity_model.config.id2label)

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


{0: 'nothate', 1: 'hate'}


In [144]:
# example
not_hate_index = 0
def example_getting_rewards(text, not_hate_index=0):
    print(f'input text: {text}')
    non_toxic_text = text
    toxicity_input_ids = toxicity_tokenizer.encode(non_toxic_text, return_tensors='pt')
    logits = toxicity_model(input_ids=toxicity_input_ids).logits
    print(f'logits [not hate, hate]: {logits.tolist()[0]}')
    # print probabilities of not hate and hate
    probs = torch.softmax(logits, dim=1).tolist()[0]
    print(f'logits prob [not hate, hate]: {probs}')
    # get the reward for not hate
    nothate_reward = (logits[:, not_hate_index]).tolist()
    print(f'reward: {nothate_reward}')
non_toxic_text = "I want to kiss you"
toxic_text = "fuck you damn hate you"
example_getting_rewards(non_toxic_text, not_hate_index)

input text: I want to kiss you
logits [not hate, hate]: [4.657958030700684, -4.078615188598633]
logits prob [not hate, hate]: [0.9998394250869751, 0.000160577503265813]
reward: [4.657958030700684]


In [145]:
example_getting_rewards(toxic_text, not_hate_index)

input text: fuck you damn hate you
logits [not hate, hate]: [-2.2604756355285645, 1.990470051765442]
logits prob [not hate, hate]: [0.01405052188783884, 0.9859494566917419]
reward: [-2.2604756355285645]


In [147]:
# Try to generate reward using pipeline
device = 0 if torch.cuda.is_available() else "cpu"

sentiment_pipe = pipeline("sentiment-analysis",
                          model=toxicity_model,
                          tokenizer=toxicity_tokenizer,
                          device=device) 
reward_logits_kwargs = {
    "top_k": None, # return all scores
    "function_to_apply": "none",  # set to "none" to retrieve raw logits
    "batch_size": 16
}
reward_probabilities_kwargs = {
    "top_k": None, # return all scores
     "function_to_apply": "softmax",  # set to "softmax" to retrieve probabilities
    "batch_size": 16
}

print("Reward model output for non-toxic text:")
print(f'non-toxic text: {non_toxic_text}')
print(sentiment_pipe(non_toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(non_toxic_text, **reward_probabilities_kwargs))
print("Reward model output for toxic text:")
print(f'toxic text: {toxic_text}')
print(sentiment_pipe(toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(toxic_text, **reward_probabilities_kwargs))

Reward model output for non-toxic text:
non-toxic text: I want to kiss you
[{'label': 'nothate', 'score': 4.657958030700684}, {'label': 'hate', 'score': -4.078615188598633}]
[{'label': 'nothate', 'score': 0.9998394250869751}, {'label': 'hate', 'score': 0.00016057751781772822}]
Reward model output for toxic text:
toxic text: fuck you damn hate you
[{'label': 'hate', 'score': 1.990470051765442}, {'label': 'nothate', 'score': -2.2604756355285645}]
[{'label': 'hate', 'score': 0.9859494566917419}, {'label': 'nothate', 'score': 0.014050522819161415}]


# Evaluate Toxicity

In [148]:
toxicity_evaluator = evaluate.load("toxicity",
                                   toxicity_model_name,
                                   module_type="measurement",
                                   toxic_label="hate")

Downloading builder script:   0%|          | 0.00/6.08k [00:00<?, ?B/s]

In [149]:
# try to calculate toxicity for the same sentences
toxicity_score = toxicity_evaluator.compute(predictions=[
    non_toxic_text
])
print(f'toxic score for non-toxic text: {toxicity_score}')

toxicity_score = toxicity_evaluator.compute(predictions=[
    toxic_text
])
print(f'toxic score for toxic text: {toxicity_score}')

toxic score for non-toxic text: {'toxicity': [0.00016057751781772822]}
toxic score for toxic text: {'toxicity': [0.9859494566917419]}


In [180]:
def evaluate_toxicity(model,
                      toxicity_evaluator,
                      tokenizer,
                      dataset,
                      num_samples):
    max_new_tokens = 100

    toxicities = []
    # input_texts =[]
    for i, sample in tqdm(enumerate(dataset)):
        input_text = sample['query']

        if i > num_samples:
            break

        input_ids = tokenizer(input_text, return_tensors='pt', padding=True).input_ids.to(device)

        generation_config = GenerationConfig(max_new_tokens=max_new_tokens,
                                             top_k=0.0,
                                             top_p=1.0,
                                             do_sample=True)
        response_token_ids = model.generate(input_ids=input_ids,
                                            generation_config=generation_config,)
        generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens=True)
        toxicity_score = toxicity_evaluator.compute(predictions=[input_text + " " + generated_text])
        toxicities.append(toxicity_score['toxicity'])
    print(toxicities)
    mean = np.mean(toxicities)
    std = np.std(toxicities)
    return mean, std

In [181]:
# perform the calculation of the mdoel toxicity before fine-tuning/detoxification
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map='auto')
mean_before_detoxification, std_before_detoxification = evaluate_toxicity(ref_model.to(device), 
                                                                        toxicity_evaluator=toxicity_evaluator, 
                                                                        tokenizer=tokenizer, 
                                                                        dataset=dataset['test'],
                                                                        num_samples=10)

11it [00:07,  1.41it/s]

[[0.0016826004721224308], [0.06922271847724915], [0.023195229470729828], [0.0024963051546365023], [0.0014516387600451708], [0.017385272309184074], [0.0683618113398552], [0.0051985434256494045], [0.19036133587360382], [0.017040222883224487], [0.013291467912495136]]





In [182]:
print(f'toxiciy [mean, std] before detoxification: [{mean_before_detoxification}, {std_before_detoxification}]')

toxiciy [mean, std] before detoxification: [0.0372442860071632, 0.053758338404269385]


# Perform Fine-tuning to Detoxify the summaries

In [183]:
# init PPOTrainer
learning_rate=1.41e-5
max_ppo_epochs=5
mini_batch_size=4
batch_size=16

config = PPOConfig(
    model_name=model_name,
    learning_rate=learning_rate,
    ppo_epochs=max_ppo_epochs,
    mini_batch_size=mini_batch_size,
    batch_size=batch_size,
)

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

ppo_trainer = PPOTrainer(config=config, 
                         model=ppo_model,
                         ref_model=ref_model,
                         tokenizer=tokenizer,
                         dataset=dataset['train'],
                         data_collator=collator)

# Fine-tune the model using RLHF

In [186]:
output_min_length = 100
output_max_length = 400
output_length_sampler = LengthSampler(output_min_length, output_max_length)

generation_kwargs = {
    "min_length": 5,
    "top_k":0.0,
    "top_p":1.0,
    "do_sample":True,
}
reward_kwargs = {
    "top_k": None, # return all scores
    "function_to_apply": "none", # we want the raw logits without softmax
    "batch_size": 16
}

max_ppo_steps = 10
for step, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    # Break when you reach max_steps.
    if step >= max_ppo_steps:
        break   

    prompt_tensors = batch["input_ids"]

    # Get response from FLAN-T5/PEFT LLM.
    summary_tensors = []

    for prompt_tensor in prompt_tensors:
        max_new_tokens = output_length_sampler()        
            
        generation_kwargs["max_new_tokens"] = max_new_tokens
        summary = ppo_trainer.generate(prompt_tensor, **generation_kwargs)
        
        summary_tensors.append(summary.squeeze()[-max_new_tokens:])
        
    # This needs to be called "response".
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in summary_tensors]

    # Compute reward outputs.
    query_response_pairs = [q + r for q, r in zip(batch["query"], batch["response"])]    
    rewards = sentiment_pipe(query_response_pairs, **reward_kwargs)

    # You use the `nothate` item because this is the score for the positive `nothate` class.
    reward_tensors = [torch.tensor(reward[not_hate_index]["score"]) for reward in rewards]    

    # Run PPO step.
    stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
    ppo_trainer.log_stats(stats, batch, reward_tensors)
    
    print(f'objective/kl: {stats["objective/kl"]}')
    print(f'ppo/returns/mean: {stats["ppo/returns/mean"]}')
    print(f'ppo/policy/advantages_mean: {stats["ppo/policy/advantages_mean"]}')
    print('-'.join('' for x in range(100)))

0it [00:00, ?it/s]You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
1it [00:16, 16.35s/it]

objective/kl: 30.14926528930664
ppo/returns/mean: -0.7324256300926208
ppo/policy/advantages_mean: 9.761287333986957e-10
---------------------------------------------------------------------------------------------------


2it [00:28, 13.79s/it]

objective/kl: 25.74405288696289
ppo/returns/mean: -0.6467403173446655
ppo/policy/advantages_mean: -1.1885238349051974e-09
---------------------------------------------------------------------------------------------------


3it [00:39, 12.48s/it]

objective/kl: 18.96527862548828
ppo/returns/mean: -0.27608832716941833
ppo/policy/advantages_mean: 2.396524312331394e-09
---------------------------------------------------------------------------------------------------


4it [00:50, 11.81s/it]

objective/kl: 15.906065940856934
ppo/returns/mean: 0.05636598542332649
ppo/policy/advantages_mean: -2.8726185874461407e-09
---------------------------------------------------------------------------------------------------


5it [01:01, 11.75s/it]

objective/kl: 20.988178253173828
ppo/returns/mean: -0.2991626262664795
ppo/policy/advantages_mean: -3.004546389462348e-09
---------------------------------------------------------------------------------------------------


6it [01:11, 11.12s/it]

objective/kl: 16.159339904785156
ppo/returns/mean: 0.18312254548072815
ppo/policy/advantages_mean: -1.1994056414721399e-08
---------------------------------------------------------------------------------------------------


7it [01:21, 10.75s/it]

objective/kl: 15.339092254638672
ppo/returns/mean: 0.07548432052135468
ppo/policy/advantages_mean: -2.7277082814691767e-09
---------------------------------------------------------------------------------------------------


8it [01:31, 10.50s/it]

objective/kl: 17.691499710083008
ppo/returns/mean: 0.008339645341038704
ppo/policy/advantages_mean: -1.619415379572331e-09
---------------------------------------------------------------------------------------------------


9it [01:40,  9.91s/it]

objective/kl: 12.772361755371094
ppo/returns/mean: 0.4266948699951172
ppo/policy/advantages_mean: -1.4454728969326425e-08
---------------------------------------------------------------------------------------------------


10it [01:48, 10.84s/it]

objective/kl: 12.81867504119873
ppo/returns/mean: 0.5007023215293884
ppo/policy/advantages_mean: 3.1157996183139858e-09
---------------------------------------------------------------------------------------------------





# Evaluate the model qualitatively

In [204]:
batch_size = 20
compare_results = {}
df_batch = dataset['test'][0:batch_size]

compare_results['query'] = df_batch['query']
prompt_tensors = df_batch['input_ids']

summary_tensors_ref = []
summary_tensors = []

# Get response from ppo and base model
for i in tqdm(range(batch_size)):
    gen_len = output_length_sampler()
    generation_kwargs['max_new_tokens'] = gen_len

    summary = ref_model.generate( 
        input_ids=torch.as_tensor(prompt_tensors[i]).unsqueeze(dim=0).to(device),
        **generation_kwargs
    ).squeeze()[-gen_len:]
    summary_tensors_ref.append(summary)

    summary = ppo_model.generate(
        input_ids=torch.as_tensor(prompt_tensors[i]).unsqueeze(dim=0).to(device),
        **generation_kwargs
    ).squeeze()[-gen_len:]
    summary_tensors.append(summary)

# decode response
compare_results['response_before'] = [tokenizer.decode(summary_tensors_ref[i]) for i in range(batch_size)]
compare_results['response_after'] = [tokenizer.decode(summary_tensors[i]) for i in range(batch_size)]

# Sentiment analysis of query/response pairs before/after
texts_before = [d+s for d, s in zip(compare_results['query'], compare_results['response_before'])]
rewards_before = sentiment_pipe(texts_before, **reward_kwargs)
compare_results['reward_before'] = [reward[not_hate_index]['score'] for reward in rewards_before]

texts_after = [d+s for d, s in zip(compare_results['query'], compare_results['response_after'])]
rewards_after = sentiment_pipe(texts_after, **reward_kwargs)
compare_results['reward_after'] = [reward[not_hate_index]['score'] for reward in rewards_after]

100%|██████████| 20/20 [00:22<00:00,  1.14s/it]


In [206]:
compare_results.keys()

dict_keys(['query', 'response_before', 'response_after', 'reward_before', 'reward_after'])

In [207]:
pd.set_option('display.max_colwidth', 500)
df_compare_results = pd.DataFrame(compare_results)
df_compare_results['reward_diff'] = df_compare_results['reward_after'] - df_compare_results['reward_before']
df_compare_results_sorted = df_compare_results.sort_values(by=['reward_diff'], ascending=False).reset_index(drop=True)
df_compare_results_sorted

Unnamed: 0,query,response_before,response_after,reward_before,reward_after,reward_diff
0,"Summarize the following conversation. #Person1#: Mom, I just finished my paper. Can you proofread it before I hand it in? #Person2#: Sure, let's take a look. Sweetie, this is terrific. Your ideas are so original. #Person1#: Thanks. #Person2#: I can tell you worked hard on it. #Person1#: I really did! I started thinking about what I wanted to say three weeks ago. #Person2#: Well, it was definitely worth all the time. #Person1#: Let's just hope my teacher agrees. Summary: </s>","<pad> #Person1# proofread the paper and keep #Person2# thinking about it. #Person1# shouts it's excellent but #Person1# hopes it's worth the time to ask ""My teacher"" to agree.</s>",<pad> Mom suggested that #Person1# read a paper that is terrific and work on it.</s>,1.850283,2.806221,0.955938
1,"Summarize the following conversation. #Person1#: I'd like to have this cashed, please. #Person2#: Please put you name and address here. May I see your passport? #Person1#: Yes. #Person2#: How would you like it? #Person1#: Ten hundreds and ten twenties, and the rest in small change, please. #Person2#: OK. Here you are. Summary: </s>","<pad> #Person1# needs to get a handcash cashed. #Person2# tells #Person1# how many bills she wants in the appearance of your passport, and the rest of the sum in small change.</s>",<pad> #Person1# looks after the chip.</s>,1.217562,2.08429,0.866728
2,"Summarize the following conversation. #Person1#: How much are you asking for this? #Person2#: I'm offering them to you at 150 yuan a piece. Is that all right? #Person1#: Is tax already included in their price? #Person2#: Yes. Our price can't be matched. #Person1#: Would you consider a volume discount? #Person2#: If you buy 1, 000 or more, you'll get a 10 % discount. #Person1#: I'll accept your offer. Summary: </s>",<pad> #Person1#'ll give 5 90 yuan for 150 yuan a piece of pizza for 150 Yuan and add 10 % discount.</s>,<pad> #Person1# will buy a 150 yuan piece of bread at 150 yuan a piece.</s>,2.520337,2.954733,0.434396
3,"Summarize the following conversation. #Person1#: Today more and more families have personal computers. People have wider range of choice to communicate with the outside world. #Person2#: Right. With the establishment of Internet and a lot of web companies, people are getting more and more dependent on the web. #Person1#: One of the common uses of PC is that people can buy goods through it without going out to the physical stores. #Person2#: Can you tell me how it is done? #Person1#: If a cus...",<pad> #Person1# tells #Person2# how PC is helping people buy goods through it and they prefer paying for things online.</s>,<pad> People have wider range of choice to communicate with the outside world with computer.</s>,2.459565,2.718758,0.259193
4,"Summarize the following conversation. #Person1#: It smells like an ashtray in here! #Person2#: Hi honey! What's wrong? Why do you have that look on your face? #Person1#: What's wrong? I thought we agreed that you were gonna quit smoking. #Person2#: No! I said I was going to cut down which is very different. You can't just expect me to go cold turkey overnight! #Person1#: Look, there are other ways to quit. You can try the nicotine patch, or nicotine chewing gum. We spend a fortune on cigaret...","<pad> Honey seems to light up his smokes constantly. Honey doesn't have the willpower to quit. Instead one thing she tries is the nicotine patch, or nicotine chewing gum. Honey finds it helpful but will have a divorce.</s>","<pad> Honey is thinking it would be hard to quit smoking, because of the laws cracking down. It doesn't give her the willpower.</s>",1.494994,1.678974,0.18398
5,"Summarize the following conversation. #Person1#: Let's take a coffee break, shall we? #Person2#: I wish I could, but I can't. #Person1#: What keeps you so busy? You've been sitting there for hours. You've got to walk around. You just can't stay on the computer forever. #Person2#: Well, I am up to my neck in work. I've got to finish this report. Sarah needs it by noon. I don't want to be scolded if I can't finish my work by the deadline. #Person1#: I understand that, but you'd feel better if ...",<pad> Sarah needs the report by noon today and she wants the time to finish it because #Person1# is too busy to concentrate and is sedentary.</s>,<pad> Sarah needs up to a 45 minutes to finish her work. #Person1# asks Sarah to take a coffee break.</s>,1.957293,2.100831,0.143538
6,"Summarize the following conversation. #Person1#: Where shall I register, please? #Person2#: Here. Do you have a registration card? #Person1#: Yes. Here you are. #Person2#: Please register your information here and pay for it. And I'll make a medical record for you. #Person1#: OK. How much do I need to pay for the registration? #Person2#: Please pay ten yuan for the registration. #Person1#: Here is my money. #Person2#: This is your registration card. Please don't lose it and bring it whenever...",<pad> #Person1# will register into the clinic but #Person2# will make a medical record for her doesn't know how to get to the Cofei room.</s>,<pad> #Person1# wants to register for the appointment of a professional's computer. The tiny computer is too big to be turned around by the pharmacist or consultant and the computer is pretty useless.</s>,1.342309,1.431467,0.089158
7,"Summarize the following conversation. #Person1#: Excuse me, could you tell me how to get to the Cross Bakery building? #Person2#: The Cross Bakery building? Oh sure. You're actually walking in the opposite direction. #Person1#: Oh, you're kidding! I thought I was heading east. #Person2#: No, east is the other direction. To get to the Bakery, you need to turn around and go three blocks to Broadway. When you get to the intersection of Broadway and Elm, you hang a left. Go straight down that st...",<pad> #Person2#'s telling #Person1# how to get to the Cross Bakery building. #Person2# asks #Person1# to show him how to get to the Cross Bakery building.</s>,<pad> #Person1# asks for directions to cross bakery. The person asks how to get there.</s>,2.677873,2.719361,0.041488
8,"Summarize the following conversation. #Person1#: What can I do for you, madam? #Person2#: I'd like to buy a toy car for my son. #Person1#: How about this one? #Person2#: It looks nice. How much is it? #Person1#: They're three hundred dollars. #Person2#: Oh, I'm afraid it's too expensive. Can you show me something cheaper? #Person1#: OK, This one is one hundred and twenty. It's the cheapest here. #Person2#: OK, I'll take it. Here's the money. #Person1#: Thank you very much. Summary: </s>","<pad> #Person2# wants to buy a toy car for his son. According to #Person1#, the biggest one is one hundred and twenty dollars. But #Person2# is surprised by the lowest one, but #Person1# agrees.</s>",<pad> #Person1# wants to buy a toy car.</s>,1.342674,1.381266,0.038592
9,"Summarize the following conversation. #Person1#: I would like to order some internet today. #Person2#: What kind would you like? #Person1#: What kind of internet is there? #Person2#: You can get DEL or dial-up. #Person1#: Which of those two is best? #Person2#: I would recommend DEL. #Person1#: So that one better? #Person2#: It's better because it doesn't tie up the phone. #Person1#: What do you mean by that? #Person2#: DEL isn't connected through your phone line, but dial-up is. #Person1#: S...",<pad> #Person1# believes they should buy DEL instead of dial-up internet because #Person1# thinks they can't use their phone because of DEL.</s>,"<pad> #Person1# is buying internet for the first time, but she doesn't want to use her phone when it's on the Internet.</s>",1.968518,1.902239,-0.066279


In [203]:
[1, 2, 3 ,4 ,5][-1:]

[5]