In [1]:
import torch

In [1]:
# define model
TOKENIZER_NAME = "openai-community/gpt2-large"
use_peft = False
torch_dtype=torch.float16

In [2]:
if use_peft:
    MODEL_NAME = f"./{TOKENIZER_NAME}-peft=True-fine-tuned-model"
else:
    MODEL_NAME = f"./{TOKENIZER_NAME}-peft=False-fine-tuned-model"

In [3]:
# Formatting libraries
import black
import jupyter_black

# Load jupyter_black settings
jupyter_black.load(
    lab=True,
    line_length=170,
)

## Load and prepare data

In [4]:
from datasets import load_dataset
import torch

In [5]:
dataset = load_dataset("knkarthick/dialogsum")

In [6]:
# define tokenizer. We will use the tokenizer to count the number of tokens per instance
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, padding_side="right")

In [7]:
# define prompt template
prompt_template = """
Summarize the following conversation.

### Conversation:

{dialogue}

### Summary:

"""


# create prompt
def create_prompt(data):
    dialogue = data["dialogue"]
    summary = data["summary"]
    prompt = prompt_template.format(dialogue=dialogue, summary=summary)

    n_tokens_output = len(tokenizer.encode(summary, add_special_tokens=False))
    n_tokens_input = len(tokenizer.encode(prompt, add_special_tokens=False))

    return {"input": prompt, "output": summary, "n_tokens_input": n_tokens_input, "n_tokens_output": n_tokens_output}

In [8]:
dataset = dataset.map(create_prompt)
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input', 'output', 'n_tokens_input', 'n_tokens_output'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input', 'output', 'n_tokens_input', 'n_tokens_output'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input', 'output', 'n_tokens_input', 'n_tokens_output'],
        num_rows: 1500
    })
})

In [9]:
# get 0.95 percentile of dialogue length in training set
dataset["train"].to_pandas().n_tokens_input.quantile(0.99)

597.4099999999999

In [10]:
# get 0.95 quantile of n_tokens_summary in train dataset`
dataset["train"].to_pandas().n_tokens_output.quantile(0.99)

82.0

In [11]:
dataset["train"].to_pandas().describe()

Unnamed: 0,n_tokens_input,n_tokens_output
count,12460.0,12460.0
mean,231.361637,34.867014
std,103.897943,15.124341
min,74.0,7.0
25%,167.0,24.0
50%,212.0,32.0
75%,278.0,42.0
max,1448.0,247.0


In [12]:
# filter very long dialogs and summaries
dataset = dataset.filter(lambda x: x["n_tokens_input"] < 470 and x["n_tokens_output"] < 70)

In [13]:
print(dataset["train"]["input"][0])


Summarize the following conversation.

### Conversation:

#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.

### Summ

In [14]:
print(dataset["train"]["output"][0])

Mr. Smith's getting a check-up, and Doctor Hawkins advises him to have one every year. Hawkins'll give some information about their classes and medications to help Mr. Smith quit smoking.


## Load fine-tined model and prepare Tokenizer

In [15]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from utils import LLMInference
from peft import AutoPeftModelForCausalLM

In [16]:
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, add_special_tokens=False)

# verify the existing special tokens
print(f"Special Tokens: \n{tokenizer.special_tokens_map}")

# if no padding token set eos_token as padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

Special Tokens: 
{'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'}


In [17]:
# load model and tokenizer
if use_peft:
    model = AutoPeftModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cuda")
else:
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cuda", torch_dtype=torch_dtype)

In [18]:
from typing import Tuple, List, Union
from transformers import GenerationConfig
from tqdm.notebook import tqdm
import torch
from torch.nn.utils.rnn import pad_sequence
import os
import transformers
import pandas as pd

In [19]:
llm_inference = LLMInference(model, tokenizer)

In [27]:
llm_inference.make_predictions_and_compute_metrics(
    dataset=dataset["test"],
    batch_size=15,
    source_max_len=470,
    padding_side="left",
    max_new_tokens=70,
    do_sample=True,
    temperature=0.5,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.0,
    # generation_config_kwargs={"num_beams": 5},
)

  0%|          | 0/33 [00:00<?, ?it/s]

Computing BLEU scores
Computing ROUGE scores


In [28]:
llm_inference.metrics

Unnamed: 0,bleu_score,1-gram precision,2-gram precision,3-gram precision,4-gram precision,rouge1,rouge2,rougeL,rougeLsum
0,0.283,0.576,0.355,0.231,0.136,0.479,0.23,0.4,0.4


In [29]:
predictions = pd.read_csv(f"./predictions_and_metric/predictions.csv")

In [30]:
idx = 0

In [31]:
print(predictions.input[idx])


Summarize the following conversation.

### Conversation:

#Person1#:  So is there any other area I should look at as well?
#Person2#: Yes, I'd recommend West Derby. That will be closer to your office.
#Person1#:  That sounds good.
#Person2#:  Yes, and if you have children, it also has very good schools.
#Person1#:  That's not my concern. I live on my own so I'm only looking for a cheap single room, something like a flat.
#Person2#:  Umm, that may be a problem here in this area then, because there are mostly larger houses here. You'd probably be able to share one with other people who want to rent though.
#Person1#:  No, I'm only interested in flats at the moment.
#Person2#:  We actually have another office in South Derby, and the guy who works there is a really good friend of mine. His name is John Godfrey.
#Person1#:  Could you tell me his telephone number?
#Person2#:  It's 074263951.
#Person1#:  Great. Is there a good time to call him? I'm here for a whole week until Sunday tenth.
#

In [32]:
print(predictions.output[idx])

#Person1# wants a cheap single room. #Person2# recommends calling John Godfrey and see him on Saturday.


In [33]:
print(predictions.prediction[idx])

#Person1# wants a flat. #Person2# recommends West Derby and #Person1#'s willing to share a flat with other people. #Person2# recommends another office in South Derby and John's willing to see #Person1# on Saturday.
