In [1]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR

In [2]:
editor = Editor()
editor.tg

KeyboardInterrupt: 

In [3]:
import checklist
from checklist.test_suite import TestSuite
suite_path = 'Data/sentiment_suite.pkl'
suite = TestSuite.from_file(suite_path)

TypeError: an integer is required (got type bytes)

In [None]:
pred_path = 'Data/our.'
suite.run_from_file(pred_path, overwrite=True)
suite.summary() # or suite.visual_summary_table()

## Generate cases

#### Negation

In [5]:
from collections import defaultdict
import numpy as np
import pandas as pd
import json
import torch
import torch.nn as nn
import torch.optim as optim
import csv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

def load_data(path):
    data = []
    cols = {'verified':0,'reviewTime':1,'reviewerID':2,'asin':3,"reviewText":4,"summary":5,"unixReviewTime":6,"sentiment":7,"id":8}
    for line in open(path):
        review_data = json.loads(line)
        tmp = [None]*len(cols)
        for key in review_data:
            if key in cols:
                if key == "sentiment":
                    tmp[cols[key]] = 1 if review_data[key] == "positive" else 0
                else:
                    tmp[cols[key]] = review_data[key]
        data.append(tmp)
    X = pd.DataFrame(data, columns=cols)
    # set empty reviews to '' (instead of None)
    X.loc[X['reviewText'].isna(), 'reviewText'] = ''
    X.loc[X['summary'].isna(), 'summary'] = ''
    y = X['sentiment']
    X.drop(columns='sentiment', inplace=True)
    return X, y

df, target = load_data('Data/music_reviews_train.json')
df

ModuleNotFoundError: No module named 'nlp'

In [84]:
df.columns

Index(['verified', 'reviewTime', 'reviewerID', 'asin', 'reviewText', 'summary',
       'unixReviewTime', 'id'],
      dtype='object')

In [86]:
offset = 7533
end = offset + 250
data, labels = df["reviewText"][offset:end], target[offset:end]

import en_core_web_sm
nlp = spacy.load('en_core_web_sm')

In [92]:
output = []
label = []
for row, lab in zip(data, labels):
    try:
        row = row.split('.')[0]
        pdata = list(nlp.pipe([row]))
        ret = Perturb.perturb(pdata, Perturb.add_negation)
        
        if ret.data:
            output.append( ret.data[0][1] )
            label.append( int(not lab) )
    except:
        pass

In [96]:
with open('cases_negation.csv', 'w') as f:
    for r, l in zip(output, label):
        f.write(f'{r};{l}\n')

In [None]:
for t in ['This is not good', 'He didn\'t play the guitar', 'He doesn\'t play anything', 'She wasn\'t sad']:
    print(t)
    print(Perturb.remove_negation(nlp(t)))
    print()

#### Editor

In [None]:
editor.template('{neg_verb_present}').data

In [10]:
data[:100]

0     So creative!  Love his music - the words, the ...
1     This tape can hardly be understood and it was ...
2     Buy the CD.  Do not buy the MP3 album.  Downlo...
3     I love Dallas Holms music and voice!  Thank Yo...
4            Great memories of my early years in Christ
                            ...                        
95    Mark and the gang have shown the world just ho...
96    This album is awesome. I love Casting Crowns m...
97    Casting Crowns has again recorded a fantastic ...
98    I was really looking forward to getting this c...
99    This latest effort by Casting Crowns is an out...
Name: reviewText, Length: 100, dtype: object

In [53]:
type(pdata.sent)

AttributeError: 'list' object has no attribute 'sent'

#### Temporal and minority opinion

In [24]:
from checklist.test_suite import TestSuite
suite = TestSuite()


In [35]:
pos_adj = ['good', 'great', 'excellent', 'amazing', 'extraordinary', 'beautiful', 'fantastic', 'nice', 'incredible', 'exceptional', 'awesome', 'perfect', 'fun', 'happy', 'adorable', 'brilliant', 'exciting', 'sweet', 'wonderful']
neg_adj = ['awful', 'bad', 'horrible', 'weird', 'lousy', 'unhappy', 'average', 'poor', 'sad', 'frustrating', 'lame', 'nasty', 'annoying', 'boring', 'creepy', 'dreadful', 'ridiculous', 'terrible', 'unpleasant']
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise' ]
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']
noun = ["singer", "musician", "song writer", "band", "CD", "track", "artist", "album", "song", "lyrics","instruments"]
change_temporal = ['but now', 'however, at late', 'whereas lately', 'although', 'but recently']
change = ['although', 'however', 'but', 'whereas']
others = ['some people', 'my parents', 'my friends', 'people', 'haters']

editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )

editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present + pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)

editor.add_lexicon('noun', noun, overwrite=True)

In [43]:
templates = ["This used to be {pos_adj}, {change} it's {neg_adj}.;0",
             ]
t = editor.template(templates, change=change_temporal, noun = noun, unroll=True, labels=0, save=True)

templates = ['{others} {pos_verb_present} the {noun}, but I {neg_verb_present} it.;0',
             'I {neg_verb_present} the {noun}, but {others} {pos_verb_present} it.;0',
            ]
t += editor.template(templates, others=others,noun = noun, change=change, unroll=True, labels=0, save=True)

templates = ["I think the {noun} used to be {neg_adj}, {change} it's {pos_adj}.;1",
             "I used to {neg_verb_present} the {noun}, {change} I have changed my mind.;1",
             "I never thought the {noun} was {pos_adj}, {change} I've been convinced;1",
             ]
t += editor.template(templates, change=change_temporal, noun = noun, unroll=True, labels=1, save=True)



test = MFT(**t)
description = '''Have conflicting statements where the author has an opinion and a third party has a contrary opinion.
Expect sentiment to be the authors'. Example:
"Some people think you are great, but I think you are terrible" -> should be negative
'''
suite.add(test, 'my opinion is what matters', 'SRL', description, overwrite=True)

In [44]:
suite.to_raw_file(r'Data\our_test.txt', n=400, seed=1)

for test in suite.tests:
    suite.tests[test].name = test
    suite.tests[test].description = suite.info[test]['description]']
    suite.tests[test].capability = suite.info[test]['capability']

path = 'Data\our_sentiment_suite.pkl'
suite.save(path)