In [1]:
# !pip install -q torch torchdata transformers datasets

In [2]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig

In [3]:
dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(dataset_name)

# Human Labelled Summary

In [4]:
example_indices = [12, 30]
seg = "-" * 100
for ind in example_indices:
    print("Dialogue:")
    print(seg)
    print(dataset["train"]["dialogue"][ind])
    print("Human summary:")
    print(dataset["train"]["summary"][ind])

Dialogue:
----------------------------------------------------------------------------------------------------
#Person1#: Did Bean send these dirty jokes to you, too? Look!
#Person2#: What a creep! Phony good luck e-mails are one thing, but sexual harassment is crossing the line.
#Person1#: No wonder he asked for my address first-he just wants to harass me!
#Person2#: You could try using a spam filter to reject stuff that's obviously pornographic or anything else you don't want.
Human summary:
#Person2# suggests #Person1# use a spam filter to reject Bean's pornographic stuff.
Dialogue:
----------------------------------------------------------------------------------------------------
#Person1#: May I take your order?
#Person2#: We haven't decided yet. Could you give us a little longer?
#Person1#: Yes, take your time, please.
#Person2#: Can we get something to drink? We want two bottles of beer.
#Person1#: Fine.
#Person2#: Could you tell us your specials today?
#Person1#: The special t

# Load Model

In [15]:
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

# Sentence Completion

In [39]:
example_indices = [12, 30]
seg = "-" * 100
for ind in example_indices:
    print("Dialogue:")
    print(seg)
    input_sentence = dataset["train"]["dialogue"][ind]
    print(input_sentence)
    print("AI Completion:")
    encoded = tokenizer(input_sentence, return_tensors="pt")
    out = model.generate(**encoded)
    decoded_sentence = tokenizer.decode(out[0], skip_special_tokens=True)
    print(decoded_sentence)

Dialogue:
----------------------------------------------------------------------------------------------------
#Person1#: Did Bean send these dirty jokes to you, too? Look!
#Person2#: What a creep! Phony good luck e-mails are one thing, but sexual harassment is crossing the line.
#Person1#: No wonder he asked for my address first-he just wants to harass me!
#Person2#: You could try using a spam filter to reject stuff that's obviously pornographic or anything else you don't want.
AI Completion:
Bean is a sexy person, but he doesn't want to harass you
Dialogue:
----------------------------------------------------------------------------------------------------
#Person1#: May I take your order?
#Person2#: We haven't decided yet. Could you give us a little longer?
#Person1#: Yes, take your time, please.
#Person2#: Can we get something to drink? We want two bottles of beer.
#Person1#: Fine.
#Person2#: Could you tell us your specials today?
#Person1#: The special today is steak.
#Person2#: W

# Prompt to Ask for Summary

In [46]:
example_indices = [12, 30]
seg = "-" * 100
for ind in example_indices:
    template = f"""
Summarise the following dialogue:
------
{dataset["train"]["dialogue"][ind]}
    """
    print("Dialogue:")
    print(seg)
    print(template)
    encoded = tokenizer.encode(template, return_tensors="pt")
    output = model.generate(encoded)
    output_sentence = tokenizer.decode(output[0], skip_special_tokens=True)
    print("AI Summary:")
    print(output_sentence)

Dialogue:
----------------------------------------------------------------------------------------------------

Summarise the following dialogue:
------
#Person1#: Did Bean send these dirty jokes to you, too? Look!
#Person2#: What a creep! Phony good luck e-mails are one thing, but sexual harassment is crossing the line.
#Person1#: No wonder he asked for my address first-he just wants to harass me!
#Person2#: You could try using a spam filter to reject stuff that's obviously pornographic or anything else you don't want.
    
AI Summary:
Bean is a sexy guy.
Dialogue:
----------------------------------------------------------------------------------------------------

Summarise the following dialogue:
------
#Person1#: May I take your order?
#Person2#: We haven't decided yet. Could you give us a little longer?
#Person1#: Yes, take your time, please.
#Person2#: Can we get something to drink? We want two bottles of beer.
#Person1#: Fine.
#Person2#: Could you tell us your specials today?
#P

# 1, 2, Few Shot Prompt

In [52]:
seg = "-" * 100

input_sentence = dataset["train"]["dialogue"][30]
human_summary = dataset["train"]["summary"][30]
to_summarise = dataset["train"]["dialogue"][12]
template = f"""
{input_sentence}

{human_summary}

Now Summarise the following dialogue:

{to_summarise}
"""
print("Dialogue:")
print(seg)
print(template)
encoded = tokenizer.encode(template, return_tensors="pt")
output = model.generate(encoded)
output_sentence = tokenizer.decode(output[0], skip_special_tokens=True)
print("AI Summary:")
print(output_sentence)

Dialogue:
----------------------------------------------------------------------------------------------------

#Person1#: May I take your order?
#Person2#: We haven't decided yet. Could you give us a little longer?
#Person1#: Yes, take your time, please.
#Person2#: Can we get something to drink? We want two bottles of beer.
#Person1#: Fine.
#Person2#: Could you tell us your specials today?
#Person1#: The special today is steak.
#Person2#: We'll take this steak dinner.
#Person1#: What would you like to go with your steak?
#Person2#: Peas and carrots.
#Person1#: I see. What would you like for dessert?
#Person2#: Icecream, please.

#Person1# serves #Person2# to order two bottles of beer, a steak dinner, and ice cream.

Now Summarise the following dialogue:

#Person1#: Did Bean send these dirty jokes to you, too? Look!
#Person2#: What a creep! Phony good luck e-mails are one thing, but sexual harassment is crossing the line.
#Person1#: No wonder he asked for my address first-he just wants

# Config Tuning

In [62]:
seg = "-" * 100

input_sentence = dataset["train"]["dialogue"][30]
human_summary = dataset["train"]["summary"][30]
to_summarise = dataset["train"]["dialogue"][12]
template = f"""
{input_sentence}

{human_summary}

Now Summarise the following dialogue:

{to_summarise}
"""
print("Dialogue:")
print(seg)
print(template)


config = GenerationConfig(max_new_tokens=100, min_new_tokens=20, temperature=0.2, top_k=100)
encoded = tokenizer.encode(template, return_tensors="pt")
output = model.generate(encoded, generation_config=config)
output_sentence = tokenizer.decode(output[0], skip_special_tokens=True)
print("AI Summary:")
print(output_sentence)

Dialogue:
----------------------------------------------------------------------------------------------------

#Person1#: May I take your order?
#Person2#: We haven't decided yet. Could you give us a little longer?
#Person1#: Yes, take your time, please.
#Person2#: Can we get something to drink? We want two bottles of beer.
#Person1#: Fine.
#Person2#: Could you tell us your specials today?
#Person1#: The special today is steak.
#Person2#: We'll take this steak dinner.
#Person1#: What would you like to go with your steak?
#Person2#: Peas and carrots.
#Person1#: I see. What would you like for dessert?
#Person2#: Icecream, please.

#Person1# serves #Person2# to order two bottles of beer, a steak dinner, and ice cream.

Now Summarise the following dialogue:

#Person1#: Did Bean send these dirty jokes to you, too? Look!
#Person2#: What a creep! Phony good luck e-mails are one thing, but sexual harassment is crossing the line.
#Person1#: No wonder he asked for my address first-he just wants