In [37]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

In [38]:
huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)

In [39]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [40]:
example_index = [0,1]
one_shot_template = ''
for i, index in enumerate(example_index):
    dialogue = dataset['test'][index]['dialogue']
    summary = '\nSummary: '+dataset['test'][index]['summary']
    one_shot_template += dialogue
    if i == len(example_index)-1:
        one_shot_template += '\nSummary: '
    else:
        one_shot_template += summary + '\n\n'    
print(one_shot_template)

#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to communicate with their clients.
#Person1#: They will just have to change their communication methods. I don't want any - one using Instant Messaging in this office. It wastes too much time! Now, please continue with the memo. Wh

In [47]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name='google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [54]:
sentence = 'Hi, how are you today?'
encoded_sentence = tokenizer(sentence,return_tensors='pt')
decoded_sentence = tokenizer.decode(encoded_sentence['input_ids'][0], skip_special_tokens=True)

print('Encoded - ',encoded_sentence['input_ids'][0])
print('Decoded - ',decoded_sentence)

Encoded -  tensor([2018,    6,  149,   33,   25,  469,   58,    1])
Decoded -  Hi, how are you today?


In [None]:
inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )

In [70]:
def summarize_conversation(prompt, tokenizer, model, generation_config):
    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            generation_config=generation_config,
        )[0],
        skip_special_tokens=True
    )
    
    return prompt,output

In [73]:
generation_config = GenerationConfig(
    max_new_tokens=100, do_sample=True, temperature=0.5)
prompt, output = summarize_conversation(prompt=one_shot_template,
                                        tokenizer=tokenizer,
                                        model=model,
                                        generation_config=generation_config)

In [74]:
print(prompt)
print('Model Summary:', output)

#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to communicate with their clients.
#Person1#: They will just have to change their communication methods. I don't want any - one using Instant Messaging in this office. It wastes too much time! Now, please continue with the memo. Wh