In [1]:
#!pip install torch===1.4.0 torchvision===0.5.0 -f https://download.pytorch.org/whl/torch_stable.html
#!pip install transformers==2.9.0
#!pip install pytorch_lightning==0.7.5
#!pip install ipywidgets
#!jupyter nbextension enable --py widgetsnbextension
#!pip install notebook --upgrade
#!pip install --upgrade jupyter_client
#!conda install -n base -c conda-forge widgetsnbextension
#!conda install -n py36 -c conda-forge ipywidgets

## Model trained on Quora dataset

In [6]:
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer

def set_seed(seed):
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

set_seed(42)

model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_paraphraser')
tokenizer = T5Tokenizer.from_pretrained('t5-base')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
model = model.to(device)

sentences = ["Jack died because he could not breathe.",
             "Did Jack die because he could not breathe?"]

for sentence in sentences:
    text =  "paraphrase: " + sentence + " </s>"
    max_len = 256
    encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt")
    input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

    # set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
    beam_outputs = model.generate(
        input_ids=input_ids, attention_mask=attention_masks,
        do_sample=True,
        max_length=256,
        top_k=120,
        top_p=0.98,
        early_stopping=True,
        num_return_sequences=10
    )
    
    print ("\nOriginal Sentence:")
    print (sentence)
    print ("Paraphrased Sentence:")
    final_outputs =[]
    for beam_output in beam_outputs:
        sent = tokenizer.decode(beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        # if sent.lower() != sentence.lower() and sent not in final_outputs:
        final_outputs.append(sent)
    for i, final_output in enumerate(final_outputs):
        print("{}: {}".format(i, final_output))

device  cuda

Original Sentence:
Jack died because he could not breathe.
Paraphrased Sentence:
0: Jack Morton died after a heart attack.
1: Is Jack still unconscious?
2: Why did Jack die, even though he could not breathe?
3: Is Jack dead?
4: What happened to Jack when he couldn’t breathe?
5: Jack died out of a respiratory illness.
6: Jack killed himself because he couldn't breathe.
7: Will Jack die because he unable to breathe?
8: Jack died because he could not breathe. If he could not see someone see him he would probably die.
9: How did Jack die, because he couldn’t breathe?

Original Sentence:
Did Jack die because he could not breathe?
Paraphrased Sentence:
0: Did Jack die because he couldn't breathe?
1: Can Jack die because he cannot breathe?
2: Why & Why did Jack Die?
3: Why did Jack die because he couldn't breathe?
4: Did Jack die when he could not breathe?
5: Did Jack die because he couldn't breathe?
6: Did Jack died because he could not breathe?
7: Did Jack die because he could

## Model trained on MSCOCO dataset

In [8]:
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer

def set_seed(seed):
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

set_seed(42)

model = T5ForConditionalGeneration.from_pretrained('./t5_paraphrase')
tokenizer = T5Tokenizer.from_pretrained('t5-base')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
model = model.to(device)

sentences = ["Jack died because he could not breathe.",
             "Did Jack die because he could not breathe?"]

for sentence in sentences:
    text =  "paraphrase: " + sentence + " </s>"
    max_len = 256
    encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt")
    input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

    # set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
    beam_outputs = model.generate(
        input_ids=input_ids, attention_mask=attention_masks,
        do_sample=True,
        max_length=256,
        top_k=120,
        top_p=0.98,
        early_stopping=True,
        num_return_sequences=10
    )
    
    print ("\nOriginal Sentence:")
    print (sentence)
    print ("Paraphrased Sentence:")
    final_outputs =[]
    for beam_output in beam_outputs:
        sent = tokenizer.decode(beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        # if sent.lower() != sentence.lower() and sent not in final_outputs:
        final_outputs.append(sent)
    for i, final_output in enumerate(final_outputs):
        print("{}: {}".format(i, final_output))

device  cuda

Original Sentence:
Jack died because he could not breathe.
Paraphrased Sentence:
0: A male cowboy is looking at the camera.
1: a man in jeans and a jacket laying on the floor
2: A man dies of his lung damage on a bed.
3: a man is close to a window with a shirt on
4: The young man is holding a stick and close to the camera
5: A man in a white sweater and black mask with red flowers and paper wreaths.
6: A boy wearing an orange suit is on a bike.
7: A dog in a splinter bag lies in a cave.
8: A person that is sitting in the middle of an office.
9: A person asleep in a lounge chair while a group of people watch and a camera.

Original Sentence:
Did Jack die because he could not breathe?
Paraphrased Sentence:
0: A boy is unresponsive and trying to play with a cell phone.
1: A person with a knife and a small boy for a shower.
2: A man wearing a hat who is bending down.
3: A woman holds her hand up to a hat on a black and white drawing.
4: An old image of a person in a car.
5: a