In [1]:
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer
import random

def set_seed(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

model = T5ForConditionalGeneration.from_pretrained('F:\\Paraphrase\\try_new\\model\\')
tokenizer = T5Tokenizer.from_pretrained('F:\\Paraphrase\\try_new\\tokenizer\\')
# model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_paraphraser')
# tokenizer = T5Tokenizer.from_pretrained('t5-base')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
model = model.to(device)

  return torch._C._cuda_getDeviceCount() > 0


device  cpu


In [2]:
sentence = "While it’s easy to get starstruck by its Pro sibling, the OnePlus 9 is a capable alternative at a lower cost. You’re looking at quite a gap, especially in the US where (for some unknown reason), the base 8/128 GB version of the 9 Pro is not available. This means you can have the vanilla phone for $730 or the 12/256 GB Pro for $1,070"

In [9]:
text =  sentence.split('.')

max_len = 64

    

encoding = tokenizer.prepare_seq2seq_batch(text, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)


# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
beam_outputs = model.generate(
    input_ids=input_ids, attention_mask=attention_masks,
    do_sample=True,
    min_length= len(text[0].split()) - 2,
    max_length= len(text[0].split()) + 10,
    top_k=120,
    top_p=0.95,
    temperature=0.98,
    early_stopping=True,
    num_return_sequences=5,
    no_repeat_ngram_size = 5
)

print ("\nOriginal Sentence ::")
print (sentence)
print ("\n")
print ("Paraphrased Sentence :: ")

sent = tokenizer.batch_decode(random.choice(beam_outputs), skip_special_tokens=True,clean_up_tokenization_spaces=True)

print(sent)


Original Sentence ::
While it’s easy to get starstruck by its Pro sibling, the OnePlus 9 is a capable alternative at a lower cost. You’re looking at quite a gap, especially in the US where (for some unknown reason), the base 8/128 GB version of the 9 Pro is not available. This means you can have the vanilla phone for $730 or the 12/256 GB Pro for $1,070


Paraphrased Sentence :: 
['<pad>', 'Though', 'it', '’', 's', 'easy', 'to', 'get', 'stars', 'truck', 'by', 'its', 'Pro', 'si', 'bling', ',', 'the', '', 'OnePlus', '9', 'is', '', 'a', 'capable', 'alternative', 'at', '', 'a', 'lower', 'cost', '.']


In [12]:
final_outputs = []
for beam_output in beam_outputs:
    sent = tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
    final_outputs.append(sent)
    
for i,output in enumerate(final_outputs):
    print(f'{i}: {output}')

0: The prolonged closure of schools is causing an education crisis that may cost Bangladesh and other South Asian countries over $1 trillion in lost earnings in the long run, according to a World Bank report.
1: Sri Lanka's closure of schools is causing an education crisis that may cost Bangladesh and other South Asian countries over $1 trillion in lost earnings in the long run, according to a World Bank report.
2: The prolonged closure of schools is causing an education crisis that may cost Bangladesh and other South Asian countries over $1 trillion in lost earnings in the long run, according to a World Bank report.
3: The prolonged closure of schools is causing an education crisis that may cost Bangladesh and other South Asian countries over $1 trillion in lost earnings in the long run, according to a World Bank report.
4: The prolonged closure of schools is causing an education crisis that may cost Bangladesh and other South Asian countries over $1 trillion in lost earnings in the l

In [None]:
# ======================================== NEW ======================================

In [18]:
def get_response(input_text):
    encoding = tokenizer.prepare_seq2seq_batch(input_text, return_tensors="pt")
    input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
    translated = model.generate(input_ids=input_ids, 
                                attention_mask=attention_masks,
                                do_sample=True,
                                min_length= len(input_text[0].split()) - 2,
                                max_length= len(input_text[0].split()) + 10,
                                top_k=120,
                                top_p=0.95,
                                temperature=0.98,
                                early_stopping=True,
                                num_return_sequences=1,
                                no_repeat_ngram_size = 3
                               )
    
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

In [19]:
sentence = "While it’s easy to get starstruck by its Pro sibling, the OnePlus 9 is a capable alternative at a lower cost. You’re looking at quite a gap, especially in the US where (for some unknown reason), the base 8/128 GB version of the 9 Pro is not available. This means you can have the vanilla phone for $730 or the 12/256 GB Pro for $1,070"

In [20]:
context = sentence.split('.')
target = get_response(context)

print(' '.join(context))
print()
print(' '.join(target))

While it’s easy to get starstruck by its Pro sibling, the OnePlus 9 is a capable alternative at a lower cost  You’re looking at quite a gap, especially in the US where (for some unknown reason), the base 8/128 GB version of the 9 Pro is not available  This means you can have the vanilla phone for $730 or the 12/256 GB Pro for $1,070

Will the OnePlus 9 be a more affordable alternative to its Android rival, the XDA? In the US, the base 8/128 GB version of the 9 Pro is not available. So where is the gap? If I wanted a new phone with 32 GB RAM, I could buy a 256 GB phone for $700 or the vanilla Galaxy


In [8]:
final_outputs[0]

tensor([   0,   37,  359, 2005,   19, 1852,    8,  337, 2005,    5,   37,  359])

In [18]:
from PyDictionary import PyDictionary

In [19]:
dictionary=PyDictionary()

In [29]:
dictionary.synonym('why')

['reason', 'wherefore', 'ground']

In [28]:
dictionary.meaning('sporadic')

{'Adjective': ['recurring in scattered and irregular or unpredictable instances']}

In [54]:
s = ""
for i in text.split()[:-1]:
    try:
        temp = dictionary.synonym(i)[0]
        s+=" "+str(temp)
    except:
        s+=" "+str(i)

OnePlus has no Synonyms in the API
launches, has no Synonyms in the API
it's has no Synonyms in the API
this has no Synonyms in the API


In [55]:
text

"OnePlus has always enjoyed extensive teaser campaigns for its upcoming product launches, but it's definitely outdone itself this time around."

In [56]:
s

" OnePlus old person ever bask extended advert race unwanted engineering future generic launches, only it's emphatically break content this clip"

In [60]:
import nltk

In [None]:
nltk.download(download_dir='F:\\Paraphrase\\',info_or_id='all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\abc.zip.
[nltk_data]    | Downloading package alpino to F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\alpino.zip.
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\biocreative_ppi.zip.
[nltk_data]    | Downloading package brown to F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\brown.zip.
[nltk_data]    | Downloading package brown_tei to F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\brown_tei.zip.
[nltk_data]    | Downloading package cess_cat to F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\cess_cat.zip.
[nltk_data]    | Downloading package cess_esp to F:\Paraphrase\...
[nltk_data]    |   Unzipping corpora\cess_esp.zip.
[nltk_data]    | Downloading package chat80 to F:\Paraphrase\...
[nltk_data]    |   Unzipping corp

In [2]:
from transformers import T5ForConditionalGeneration,T5Tokenizer
T5Tokenizer.from_pretrained('F:\\Paraphrase\\try_new\\tokenizer\\')

PreTrainedTokenizer(name_or_path='F:\Paraphrase\try_new\tokenizer\', vocab_size=32100, model_max_len=512, is_fast=False, padding_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', '<extra_

In [None]:
tagged = nltk.pos_tag(tokens)