In [4]:
from datasets import load_dataset

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig


In [6]:
huggingface_datset_name="knkarthick/dialogsum"

dataset=load_dataset(huggingface_datset_name)

In [12]:
example_indices=[40, 200]

dash_line="_".join("" for i in range(100))

for i, index in enumerate(example_indices):
    print(dash_line)
    print("Example: ", i+1)
    print(dash_line)
    print("INPUT DIALOGUE:")
    print(dataset["test"][index]["dialogue"])
    print(dash_line)
    print("BASELINE HUMAN SUMMARY:")
    print(dataset["test"][index]["summary"])
    print(dash_line)
    print()
    
    
    

___________________________________________________________________________________________________
Example:  1
___________________________________________________________________________________________________
INPUT DIALOGUE:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
___________________________________________________________________________________________________
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
___________________________________________________________________________________________________

___________________________________________________________________________________________________
Ex

In [11]:
model_name="google/flan-t5-base"

model=AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [13]:
tokenizer=AutoTokenizer.from_pretrained(model_name, use_fast=True)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

### Zero shot Inference

In [18]:
for i, index in enumerate(example_indices):
    dialogue=dataset["test"][index]["dialogue"]
    summary=dataset["test"][index]["summary"]
    
    prompt=f'''summarize the following conversation:
    
    {dialogue}
    
    Summary: 
    '''
    inputs= tokenizer(prompt, return_tensors="pt", verbose=True)
    output=tokenizer.decode(model.generate(inputs["input_ids"], max_new_tokens=50)[0],skip_special_tokens=True)
    
    print(dash_line)
    print("Example 1: ", i+1)
    print(dash_line)
    print(f"INPUT PROMPT:\n{prompt}")
    print(dash_line)
    print(f"SUMMARY:\n{summary}")
    print(dash_line)
    print(f"MODEL GENERATED SUMMARY:\n{output}")
    
    
    
    
    

___________________________________________________________________________________________________
Example 1:  1
___________________________________________________________________________________________________
INPUT PROMPT:
summarize the following conversation:
    
    #Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
    
    Summary: 
    
___________________________________________________________________________________________________
SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
___________________________________________________________________________________________________
MODEL GENERATED SUMMARY:
The train is about to 

In [19]:
def make_prompt(example_indices_full,example_indices_to_summary):
    prompt=""
    for index in example_indices_full:
        dialogue=dataset["test"][index]["dialogue"]
        summary=dataset["test"][index]["summary"]
        
        prompt+=f'''
        Dialoague: {dialogue}
        
        Summary: {summary}
        '''
    dialogue=dataset["test"][example_indices_to_summary]["dialogue"]
    
    prompt+=f'''
        Dialoague: {dialogue}
        
        Summary: 
        '''
    return prompt

### One shot inference

In [20]:
example_indices_full=[40]
example_indices_to_summary=200
one_shot_prompt=make_prompt(example_indices_full,example_indices_to_summary )

print(one_shot_prompt)


        Dialoague: #Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
        
        Summary: #Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
        
        Dialoague: #Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to

In [21]:
summary=dataset["test"][example_indices_to_summary]["summary"]

inputs=tokenizer(one_shot_prompt,return_tensors="pt")
output=tokenizer.decode(model.generate(inputs["input_ids"], max_new_tokens=50)[0],skip_special_tokens=True)


print(f"SUMMARY:\n{summary}")
print(dash_line)
print(f"MODEL GENERATED SUMMARY:\n{output}")
    

SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
___________________________________________________________________________________________________
MODEL GENERATED SUMMARY:
#Person1#: Have you considered upgrading your system? #Person2#: Yes, but I'm not sure what exactly I would need. #Person1#: You could add a painting program to your software.


### Few shot inference

In [22]:
example_indices_full1=[40, 80, 120]
example_indices_to_summary1=200
one_shot_prompt1=make_prompt(example_indices_full1,example_indices_to_summary1 )

print(one_shot_prompt1)


        Dialoague: #Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
        
        Summary: #Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
        
        Dialoague: #Person1#: May, do you mind helping me prepare for the picnic?
#Person2#: Sure. Have you checked the weather report?
#Person1#: Yes. It says it will be sunny all day. No sign of rain at all. This is your father's favorite sausage. Sandwiches for you and Daniel.
#Person2#: No, thanks Mom. I'd like some toast and chicken wings.
#Person1#: Okay. Please take some fruit salad and crackers for me.
#Person2#: Done. Oh, don't forget to take napkins disposable plates, cups and picnic b

In [24]:
summary=dataset["test"][example_indices_to_summary]["summary"]

inputs=tokenizer(one_shot_prompt1,return_tensors="pt")
output=tokenizer.decode(model.generate(inputs["input_ids"], max_new_tokens=50)[0],skip_special_tokens=True)


print(f"SUMMARY:\n{summary}")
print(dash_line)
print(f"MODEL GENERATED SUMMARY:\n{output}")
    

SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
___________________________________________________________________________________________________
MODEL GENERATED SUMMARY:
#Person1#: Have you considered upgrading your system? #Person2#: Yes, but I'm not sure what exactly I would need. #Person1#: You could consider adding a painting program to your software


In [25]:
generation_config=GenerationConfig(max_new_tokens=50)

inputs=tokenizer(one_shot_prompt1,return_tensors="pt")
output=tokenizer.decode(model.generate(inputs["input_ids"], generation_config=generation_config)[0],skip_special_tokens=True)




print(f"SUMMARY:\n{summary}")
print(dash_line)
print(f"MODEL GENERATED SUMMARY:\n{output}")

SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
___________________________________________________________________________________________________
MODEL GENERATED SUMMARY:
#Person1#: Have you considered upgrading your system? #Person2#: Yes, but I'm not sure what exactly I would need. #Person1#: You could consider adding a painting program to your software
