In [59]:
from datasets import load_dataset
from transformers import  AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

In [60]:
dataset=load_dataset("knkarthick/dialogsum")

In [61]:
examples_indicees=[40,200]

dash_lines='-'.join('' for x in range(100))

for i, index in enumerate(examples_indicees):
    print(dash_lines)
    print("Example",i+1)
    print(dash_lines)
    print("INPUT DIALOGUE:")
    print(dataset['test'][index]['dialogue'])
    print(dash_lines)
    print("BASELINE HUMAN SUMMARY")
    print(dataset['test'][index]['summary'])
    print(dash_lines)
    print()


---------------------------------------------------------------------------------------------------
Example 1
---------------------------------------------------------------------------------------------------
INPUT DIALOGUE:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------------------
Examp

In [62]:
model_name='google/flan-t5-base'
model=AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [63]:
tokenizer=AutoTokenizer.from_pretrained(model_name,use_fast=True)

In [64]:
sentence="what time is it,Tom?"
sentence_encoded=tokenizer(sentence,return_tensors='pt')

sentence_decoded=tokenizer.decode(
    sentence_encoded["input_ids"][0],
    skip_special_tokens=True
)

print("ENCODED SENTENCE:")
print(sentence_encoded["input_ids"][0])
print('\nDecoded sentence:')
print(sentence_decoded)

ENCODED SENTENCE:
tensor([ 125,   97,   19,   34,    6, 3696,   51,   58,    1])

Decoded sentence:
what time is it,Tom?


In [66]:
for i,index in enumerate(examples_indicees):
    dialogue=dataset['test'][index]['dialogue']
    summary=dataset['test'][index]['summary']

    inputs=tokenizer(dialogue,return_tensors='pt')
    output=tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True)
    print(dash_lines)
    print("Example",i+1)
    print(dash_lines)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash_lines)
    print(f'BASHLINE HUMAN SUMMARY:\n{summary}')
    print(dash_lines)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')
    

---------------------------------------------------------------------------------------------------
Example 1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
---------------------------------------------------------------------------------------------------
BASHLINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------
MODEL GENERATION - WITHOUT PROMPT ENGINEERING:
Person1: It's ten to nine.

--------------------------------

## Zeroshot

In [67]:
for i,index in enumerate(examples_indicees):
    dialogue=dataset['test'][index]['dialogue']
    summary=dataset['test'][index]['summary']

    prompt=f"""
Summarize the following conversation.

{dialogue}

Summary:
    """

    inputs=tokenizer(prompt,return_tensors='pt')
    output=tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True)
    print(dash_lines)
    print("Example",i+1)
    print(dash_lines)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_lines)
    print(f'BASHLINE HUMAN SUMMARY:\n{summary}')
    print(dash_lines)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

---------------------------------------------------------------------------------------------------
Example 1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

Summary:
    
---------------------------------------------------------------------------------------------------
BASHLINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------
MODEL GENERATION - WITHOUT PROMPT ENGINEERING:
The t

In [68]:
for i,index in enumerate(examples_indicees):
    dialogue=dataset['test'][index]['dialogue']
    summary=dataset['test'][index]['summary']

    prompt=f"""
Dialogue

{dialogue}

what was going on?
    """

    inputs=tokenizer(prompt,return_tensors='pt')
    output=tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True)
    print(dash_lines)
    print("Example",i+1)
    print(dash_lines)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_lines)
    print(f'BASHLINE HUMAN SUMMARY:\n{summary}')
    print(dash_lines)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

---------------------------------------------------------------------------------------------------
Example 1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Dialogue

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

what was going on?
    
---------------------------------------------------------------------------------------------------
BASHLINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------
MODEL GENERATION - WITHOUT PROMPT ENGINEERING:
Tom is late. He has to c

## One shot

In [77]:
def make_prompt(example_indices_full,example_index_to_summarize):
    prompt=''
    for index in example_indices_full:
        dialogue=dataset['test'][index]['dialogue']
        summary=dataset['test'][index]['summary']

        prompt+=f"""
Dialogue:
{dialogue}

What was going on?
{summary}

"""
        dialogue=dataset['test'][example_index_to_summarize]['dialogue']
        prompt+=f"""
Dialogue:

{dialogue}

What was going on?

"""
    return prompt

In [78]:
example_indices_full=[40]
example_index_to_summarize=200

one_shot_prompt=make_prompt(example_indices_full,example_index_to_summarize)

print(one_shot_prompt)


Dialogue:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

What was going on?
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.


Dialogue:

#Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to begin with. And you also need

In [79]:
summary=dataset['test'][example_index_to_summarize]['summary']

inputs=tokenizer(one_shot_prompt,return_tensors='pt')

output=tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
        skip_special_tokens=True
    )


    



In [80]:
print(dash_lines)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_lines)
print(f'MODEL GENERATION - ONE SHOT:\n{output}')

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

---------------------------------------------------------------------------------------------------
MODEL GENERATION - ONE SHOT:
#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to add a CD-ROM drive.


## Few shot

In [81]:
example_indices_full= [40, 80, 120]
example_index_to_summarize = 200

In [82]:


few_shot_prompt=make_prompt(example_indices_full,example_index_to_summarize)

print(few_shot_prompt)


Dialogue:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

What was going on?
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.


Dialogue:

#Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to begin with. And you also need

In [83]:
summary=dataset['test'][example_index_to_summarize]['summary']

inputs=tokenizer(few_shot_prompt,return_tensors='pt')
output=tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
    skip_special_tokens=True
)

print(dash_lines)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_lines)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')

Token indices sequence length is longer than the specified maximum sequence length for this model (1275 > 512). Running this sequence through the model will result in indexing errors


---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
#Person1 wants to upgrade his computer. #Person2 wants to upgrade his computer.


## Generation Config

In [89]:
generation_config=GenerationConfig(max_new_tokens=50)
#generation_config=GenerationConfig(max_new_tokens=50,do_sample=True,temperature=0.1)
#generation_config=GenerationConfig(max_new_tokens=50,do_sample=True,temperature=0.5)
#generation_config=GenerationConfig(max_new_tokens=50,do_sample=True,temperature=0.7)


inputs=tokenizer(few_shot_prompt,return_tensors='pt')
output=tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        generation_config=generation_config,
    )[0],
    skip_special_tokens=True
)

print(dash_lines)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')
print(dash_lines)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')

---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
#Person1 wants to upgrade his computer. #Person2 wants to upgrade his computer.
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

