In [8]:
import sys
sys.path.append("..")

import datasets
import pandas as pd

from transformers import pipeline
pipe = pipeline("text2text-generation", 
                model="visheratin/t5-efficient-tiny-grammar-correction",
                max_length=256,
                batch_size=16,
                device=0)

from src import antonym, G

1. Antonym modifier may not modify the correct adjective, e.g., modified quantifier "last" to "first".
2. Some verbs may not have obvious antonym, e.g., "doubled".
3. Antonym from synset directly may result in unnatural sentences.
4. Wrong antonyms like "rise-go_to_bad" and "receive-say_fare_well"
5. Using a seq2seq paraphraser like `pegasus` can make the sentence grammatically correct and more natural.

In [17]:
# Read in the Finance Phrasebank dataset
dataset = pd.read_csv("../data/financial-phrasebank/Sentences_75Agree.txt", 
                      names=["original", "label"],
                      encoding="iso-8859-1",
                      sep="@",)
dataset = dataset.loc[dataset["label"] != "neutral"] \
            .sample(128, random_state=42) \
            .reset_index(drop=True)
dataset.head(10)

Unnamed: 0,original,label
0,Earnings per share ( EPS ) amounted to EUR1 .3...,negative
1,"Kiosk and cinema operations have suffered , in...",negative
2,"Last week , the Finnish metals and technology ...",positive
3,"According to Karhinen , OP-Pohjola is an excit...",positive
4,"Sales climbed 19.2 pct to 1.002 bln eur , surp...",positive
5,"The Helsinki-based company , which also owns t...",positive
6,Since the association 's data do not cover sal...,negative
7,It also turned in earnings per share ( EPS ) o...,positive
8,Operating profit in the fourth quarter went do...,negative
9,Clothing chain Sepp+ñl+ñ 's net sales increase...,positive


In [21]:
# Do antonym replacement
dataset["antonym_raw"], dataset["has_any_replace"] = \
    zip(*dataset["original"].map(lambda x: antonym(x, is_sub_all=True)))

# Apply the grammar correction model to the generated texts
dataset["antonym"] = [o["generated_text"] for o in pipe(dataset["antonym_raw"].tolist())]
dataset.head()

Unnamed: 0,original,label,antonym_raw,has_any_replace,antonym
0,Earnings per share ( EPS ) amounted to EUR1 .3...,negative,Earnings per share ( EPS ) amounted to EUR1 .3...,False,"Earnings per share (EPS) amounted to EUR1 .37,..."
1,"Kiosk and cinema operations have suffered , in...",negative,"Kiosk and cinema operations have enjoy , in pa...",True,"Kiosk and cinema operations have enjoyed, in p..."
2,"Last week , the Finnish metals and technology ...",positive,"first week , the Finnish metals and technology...",True,"First week, the Finnish metals and technology ..."
3,"According to Karhinen , OP-Pohjola is an excit...",positive,"According to Karhinen , OP-Pohjola is an unexc...",True,"According to Karhinen, OP-Pohjola is an unexci..."
4,"Sales climbed 19.2 pct to 1.002 bln eur , surp...",positive,"Sales wane 19.2 pct to 1.002 bln eur , surpass...",True,"Sales wane 19.2 pct to 1.002 bln eur, surpassi..."


In [25]:
dataset.loc[64].values.tolist()

['The Finnish supplier of BSS-OSS and VAS for telecom operators , Tecnotree , has received expansion orders worth a total US$ 7.3 mn for its convergent charging and next generation messaging solutions in Latin America , the company announced without specifying which operators had placed the orders .',
 'positive',
 'The Finnish supplier of BSS-OSS and VAS for telecom operators , Tecnotree , has say_farewell contraction disorder worthlessness a total US $ 7.3 mn for its convergent charging and next generation messaging solutions in Latin America , the company announced without generalize which operators had divest the disorder .',
 True,
 'The Finnish supplier of BSS-OSS and VAS for telecom operators, Tecnotree, has say_farewell contraction disorder worthlessness a total US$7.3 mn for its convergent charging and next generation messaging solutions in Latin America, the company announced without generalizing which operators had divest the disorder.']