In [54]:
import torch
import torchdata
import transformers
from datasets import load_dataset

from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
import huggingface_hub


In [55]:
from datasets import load_dataset

ds = load_dataset("knkarthick/dialogsum")

In [56]:
## good representation here

example_indices = [40,200]

dash_line = '_'.join('' for x in range(100))

for i, index in enumerate(example_indices):
  print(dash_line)
  print('Example', i+1)
  print(dash_line)
  print('Input')
  print(ds['test'][index]['dialogue'])
  print(dash_line)
  print('Baseline Human summary')
  print(ds['test'][index]['summary'])
  print()

___________________________________________________________________________________________________
Example 1
___________________________________________________________________________________________________
Input
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
___________________________________________________________________________________________________
Baseline Human summary
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.

___________________________________________________________________________________________________
Example 2
___________________________________________________________________________________________________
Input

In [57]:
model_name = 'google/flan-t5-base'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [58]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast = True)

In [59]:
sentence ='What time is it, Tom?'

sentence_encoded = tokenizer(sentence, return_tensors='pt')


sentence_decoded = tokenizer.decode(sentence_encoded['input_ids'][0], skip_special_tokens= True)


print(sentence_encoded['input_ids'][0])
print(sentence_decoded)

tensor([ 363,   97,   19,   34,    6, 3059,   58,    1])
What time is it, Tom?


In [83]:
## random word decoding - for fun
tensor1=torch.tensor([563, 19])

sentence_decoded1 = tokenizer.decode(tensor1, skip_special_tokens= True)
print(sentence_decoded1)

group is


In [121]:
dialogue = ds['test'][41]['dialogue']
print(dialogue)
summary = ds['test'][41]['summary']

input = tokenizer(dialogue, return_tensors='pt')
input_tensor = input['input_ids']

output= model.generate(input_tensor, max_new_tokens = 500)


#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.


In [122]:
print(output)
decoded_output = tokenizer.decode(output[0], skip_special_tokens= True)
print('model summary : ', decoded_output)
print('baseline summary: ',summary)

tensor([[   0, 5780,  536,   10,   94,   31,    7,    3,  324,   12, 4169,    5,
            1]])
model summary :  Person1: It's ten to nine.
baseline summary:  #Person1# is rushing to catch a train but Tom thinks it isn't necessary.


## Zero shot inference
instructing the model to perform a task

In [128]:
## good representation here

example_indices = [40,42]

dash_line = '_'.join('' for x in range(100))

for i, index in enumerate(example_indices):
  dialogue = ds['test'][index]['dialogue']
  summary = ds['test'][index]['summary']
  prompt = f"""

  summarize the following conversation :
  {dialogue}

  Summary:
  """

  input = tokenizer(prompt, return_tensors='pt')
  input_tensor = input['input_ids']

  output= model.generate(input_tensor, max_new_tokens = 50)
  decoded_output = tokenizer.decode(output[0], skip_special_tokens= True)
  print(dash_line)
  print('Example', i+1)
  print(dash_line)
  print('Input')
  print(ds['test'][index]['dialogue'])
  print(dash_line)
  print('Zero shot model summary ')
  print(decoded_output)
  print('Baseline Human summary')
  print(summary)
  print()

___________________________________________________________________________________________________
Example 1
___________________________________________________________________________________________________
Input
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
___________________________________________________________________________________________________
Zero shot model summary 
The train is about to leave.
Baseline Human summary
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.

___________________________________________________________________________________________________
Example 2
___________________________________________________

In [129]:
## good representation here

example_indices = [40,42]

dash_line = '_'.join('' for x in range(100))

for i, index in enumerate(example_indices):
  dialogue = ds['test'][index]['dialogue']
  summary = ds['test'][index]['summary']
  prompt = f"""

  summarize the following conversation :
  {dialogue}

  what is going on here, tell what is happening:
  """

  input = tokenizer(prompt, return_tensors='pt')
  input_tensor = input['input_ids']

  output= model.generate(input_tensor, max_new_tokens = 50)
  decoded_output = tokenizer.decode(output[0], skip_special_tokens= True)
  print(dash_line)
  print('Example', i+1)
  print(dash_line)
  print('Input')
  print(ds['test'][index]['dialogue'])
  print(dash_line)
  print('Zero shot model summary ')
  print(decoded_output)
  print('Baseline Human summary')
  print(summary)
  print()

___________________________________________________________________________________________________
Example 1
___________________________________________________________________________________________________
Input
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
___________________________________________________________________________________________________
Zero shot model summary 
#Person1: It's nine thirty.
Baseline Human summary
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.

___________________________________________________________________________________________________
Example 2
____________________________________________________