In [84]:
import nltk
sentences = [
    "Thank your message to show our words to the doctor, as his next contract checking, to all of us",
    "Anyway, I believe the team, although bit delay and less communication at recent days, they really tried best for paper and cooperation",
]
# simple context free grammar
grammar = nltk.CFG.fromstring("""
    S -> NP VP | VP | Adv S | S Conj S | VP VP
    NP -> Det N | Det N PP | Det Adj N | Det Adj N PP | NP PP | Pronoun | NP Conj NP | Adj N | N 
    PP -> P NP
    VP -> V NP | VP PP | V | Adv VP | VP Adv | V Pronoun
    Det -> 'the' | 'a' | 'your' | 'our' | 'his' | 'all' | 'which' | 'some' | 'their'
    Adj -> 'next' | 'recent' | 'bit' | 'less' | 'best' 
    Adv -> 'anyway' | 'really'  | 'even' | 'lately' | 'there'
    Pronoun -> 'us' | 'they' | 'i' | 'you' | 'he' | 'she' | 'who'
    N -> 'doctor' | 'team' | 'message' | 'words' | 'contract' | 'days' | 'paper' | 'cooperation' | 'delay' | 'communication' | 'thoughts'
    P -> 'to' | 'as' | 'for' | 'of' | 'at' | 'with' | 'in' | 'on' | 'by'
    V -> 'show' | 'believe' | 'tried' | 'thank' | 'helped' | 'assist' | 'convey' | 'will' | 'checking' | 'were' | 'delays' | 'did'
    Conj -> 'and' | 'though' | 'although'
""")
stop_words = [ '.', ',']
# create a parser
parser = nltk.ChartParser(grammar)
# parse the sentences
# fix the sentences
# to make them more grammatically correct
# and to match the grammar
def sentence_fixer(sentence):
    replace = {
        "your message": "you for your message,",
        "to show our words": "which helped convey our thoughts",
        "as his": "who will assist all of us with checking the",
        "checking, to all of us": ".",
        "Anyway, ": "",
        ", although": " really did their best with the paper,",
        "bit delay": "even though there were some delays",
        "at recent days,": "lately.",
        "they really tried best for paper and cooperation": ""
    }
    for key, value in replace.items():
        sentence = sentence.lower().replace(key, value)
    return sentence

sentences_fixed = [sentence_fixer(sentence) for sentence in sentences]

for sentence in sentences_fixed:
    print(f"Parsing: {sentence}")
    tokens = nltk.word_tokenize(sentence)
    tokens = [token.lower() for token in tokens if token not in stop_words]
    print(f"Tokens: {tokens}")
    
    for tree in parser.parse(tokens):
        print(tree)
        tree.pretty_print()
    print("\n")


Parsing: thank you for your message, which helped convey our thoughts to the doctor, who will assist all of us with checking the next contract .
Tokens: ['thank', 'you', 'for', 'your', 'message', 'which', 'helped', 'convey', 'our', 'thoughts', 'to', 'the', 'doctor', 'who', 'will', 'assist', 'all', 'of', 'us', 'with', 'checking', 'the', 'next', 'contract']


Parsing: anyway, i believe the team really did their best with the paper, even though there were some delays and less communication lately. 
Tokens: ['anyway', 'i', 'believe', 'the', 'team', 'really', 'did', 'their', 'best', 'with', 'the', 'paper', 'even', 'though', 'there', 'were', 'some', 'delays', 'and', 'less', 'communication', 'lately']




In [48]:
def extract_n_gram(sentences, n):
    n_grams = []
    for sentence in sentences:
        tokens = nltk.word_tokenize(sentence.lower())
        n_grams.extend(list(nltk.ngrams(tokens, n)))
    return n_grams

def fix_sentence_with_ngrams(sentence, bad_to_good_map, n=2):
    tokens = nltk.word_tokenize(sentence.lower())
    i = 0
    fixed_tokens = []

    while i < len(tokens):
        match_found = False
        if i + n <= len(tokens):
            current_ngram = tuple(tokens[i:i+n])
            if current_ngram in bad_to_good_map:
                # Replace with the good n-gram
                fixed_tokens.extend(bad_to_good_map[current_ngram])
                i += n  # Skip ahead by n tokens
                match_found = True

        if not match_found:
            fixed_tokens.append(tokens[i])
            i += 1

    return ' '.join(fixed_tokens)

# Example replacement rule
bad_to_good = {
    ('thank', 'your'): ('thank', 'you'),
    ('your', 'message'): ('for', 'your', 'message'),
    ('message', 'to'): ('to'),

}

fixed = fix_sentence_with_ngrams(sentences[0], bad_to_good)
print("Fixed sentence:", fixed)
        

Fixed sentence: thank you t o show our words to the doctor , as his next contract checking , to all of us


In [58]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

device = "cpu"

tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")

model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)

def paraphrase(
    question,
    num_beams=5,
    num_beam_groups=5,
    num_return_sequences=3,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.7,
    max_length=128
):
    input_ids = tokenizer(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids.to(device)
    
    outputs = model.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return res

text1 = ["Today is our dragon boat festival, in our Chinese culture, to celebrate it with all safe and great in our lives.",
"Hope you too, to enjoy it as my deepest wishes.",
"Thank your message to show our words to the doctor, as his next contract checking, to all of us.",
"I got this message to see the approved message.",
"In fact, I have received the message from the professor, to show me, this, a couple of days ago.",
"I am very appreciated the full support of the professor, for our Springer proceedings publication"
]

for text in text1:
    print(f"Original: {text}")
    paraphrased = paraphrase(text)
    for p in paraphrased:
        print(f"Paraphrased: {p}")
    print("\n")


Original: Today is our dragon boat festival, in our Chinese culture, to celebrate it with all safe and great in our lives.
Paraphrased: Our Chinese culture celebrates today's dragon boat festival to ensure the safety and prosperity of all who come from this world.
Paraphrased: In our Chinese culture, we celebrate today's dragon boat festival to make it a day to remember with great safety and comfort.
Paraphrased: According to Chinese customs, today's celebration of the dragon boat is a way to honor the safety and greatness of our lives.


Original: Hope you too, to enjoy it as my deepest wishes.
Paraphrased: Let's all have a wonderful time together, dear friends.
Paraphrased: May you also have a good time as my thoughts are with you.
Paraphrased: I hope you enjoy it as much as I do.


Original: Thank your message to show our words to the doctor, as his next contract checking, to all of us.
Paraphrased: As the doctor conducts his next contract check, I thank you for your message on how 

chatgpt_paraphraser:
  author={Vladimir Vorobev, Maxim Kuznetsov},
  title={A paraphrasing model based on ChatGPT paraphrases},
  year={2023}
