In [1]:
import sys
sys.path.append("..")

import datasets
import pandas as pd

from transformers import pipeline
pipe = pipeline("text2text-generation", 
                model="vennify/t5-base-grammar-correction",
                max_length=256,
                batch_size=16,
                device=0)

from src import antonym, add_not, G

1. Antonym modifier may not modify the correct adjective, e.g., modified quantifier "last" to "first".
2. Some verbs may not have obvious antonym, e.g., "doubled".
3. Antonym from synset directly may result in unnatural sentences.
4. Wrong antonyms like "rise-go_to_bad" and "receive-say_fare_well"
5. Using a seq2seq paraphraser like `pegasus` can make the sentence grammatically correct and more natural.

In [2]:
# Read in the Finance Phrasebank dataset
dataset = pd.read_csv("../data/financial-phrasebank/Sentences_75Agree.txt", 
                      names=["original", "label"],
                      encoding="iso-8859-1",
                      sep="@",)
dataset = dataset.loc[dataset["label"] != "neutral"] \
            .sample(128, random_state=42) \
            .reset_index(drop=True)
dataset.head(10)

Unnamed: 0,original,label
0,Earnings per share ( EPS ) amounted to EUR1 .3...,negative
1,"Kiosk and cinema operations have suffered , in...",negative
2,"Last week , the Finnish metals and technology ...",positive
3,"According to Karhinen , OP-Pohjola is an excit...",positive
4,"Sales climbed 19.2 pct to 1.002 bln eur , surp...",positive
5,"The Helsinki-based company , which also owns t...",positive
6,Since the association 's data do not cover sal...,negative
7,It also turned in earnings per share ( EPS ) o...,positive
8,Operating profit in the fourth quarter went do...,negative
9,Clothing chain Sepp+ñl+ñ 's net sales increase...,positive


In [6]:
def driver(p):
    """Do antonym replacement and add not"""
    
    ret_ant, ret_add = [], []
    for s in G.sent_tokenize(p):
        ret_add.append(add_not(s))
        ret_ant.append(antonym(s))
    return " ".join(ret_ant), " ".join(ret_add)

dataset["antonym_raw"], dataset["add_not_raw"] = zip(*dataset["original"].map(driver))

# Apply the grammar correction model to the generated texts
dataset["antonym"] = [o["generated_text"] for o in pipe(dataset["antonym_raw"].tolist())]
dataset["add_not"] = [o["generated_text"] for o in pipe(dataset["add_not_raw"].tolist())]
dataset.head()

Unnamed: 0,original,label,antonym_raw,add_not_raw,antonym,add_not
0,Earnings per share ( EPS ) amounted to EUR1 .3...,negative,Earnings per share ( EPS ) amounted to EUR1 .3...,Earnings per share ( EPS ) didn't amount to EU...,Earnings per share ( EPS ) amounted to EUR1 .3...,Earnings per share ( EPS ) didn't amount to EU...
1,"Kiosk and cinema operations have suffered , in...",negative,"Kiosk and cinema operations have enjoy , in pa...",Kiosk and cinema operations don't have suffere...,"Kiosk and cinema operations have enjoyed , in ...",Kiosk and cinema operations don't have suffere...
2,"Last week , the Finnish metals and technology ...",positive,"Last week , the Finnish metals and technology ...","Last week , the Finnish metals and technology ...","Last week, the Finnish metals and technology g...","Last week, the Finnish metals and technology g..."
3,"According to Karhinen , OP-Pohjola is an excit...",positive,"According to Karhinen , OP-Pohjola is an unexc...","not According to Karhinen , OP-Pohjola is an e...","According to Karhinen, OP-Pohjola is an exciti...","According to Karhinen, OP-Pohjola is an exciti..."
4,"Sales climbed 19.2 pct to 1.002 bln eur , surp...",positive,"Sales wane 19.2 pct to 1.002 bln eur , surpass...","Sales didn't climb 19.2 pct to 1.002 bln eur ,...","Sales waned 19.2 pct to 1.002 bln eur , surpas...",Sales didn't climb 19.2 pct to 1.002 billion e...


In [7]:
# Save the sample dataset
dataset.to_csv("../data/financial-phrasebank/sample-neg-128.csv", index=False)