In [5]:
import pickle
from checklist.editor import Editor
from checklist.pred_wrapper import PredictorWrapper
from checklist.test_suite import TestSuite
from checklist.test_types import MFT
import numpy as np

In [6]:
with open('baseline_model.pickle', 'rb') as f:
    model = pickle.load(f)

In [7]:
def predict_proba(inputs):
    p = np.array([model.predict_proba([x]) for x in inputs]).reshape(-1, 2)
    
    return p
wrapped_pp = PredictorWrapper.wrap_softmax(predict_proba)

## Capability: Vocabulary

In [32]:
editor = Editor()
suite = TestSuite()

In [33]:
nouns = ['record', 'instrument', 'disk', 'sound', 'music', 'tune', 'artist', 'singer']
editor.add_lexicon('nouns', nouns)
pos_adj = ['amazing', 'incredible', 'extraordinary', 'excellent', 'awesome', 'nice']
neg_adj = ['terrible', 'eerie', 'odd', 'awful', 'ugly', 'strange']
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )

In [34]:
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise']
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']

editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)

In [35]:
# Testing individual positive words
test = MFT(pos_adj + pos_verb_present + pos_verb_past, labels=1) #1 == positive
suite.add(test, 'single positive words', 'Vocabulary', '')
# Testing individual negative words
test = MFT(neg_adj + neg_verb_present + neg_verb_past, labels=0)
suite.add(test, 'single negative words', 'Vocabulary', '')

In [36]:
# Testing words in context
t = editor.template('{it} {nouns} {be} {pos_adj}.', it=['The', 'This', 'That'], be=['is', 'was'], labels=1, save=True)
t += editor.template('{it} {be} {a:pos_adj} {nouns}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=1, save=True)
t += editor.template('{i} {pos_verb} {the} {nouns}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=1, save=True)
t += editor.template('{it} {nouns} {be} {neg_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{it} {be} {a:neg_adj} {nouns}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{i} {neg_verb} {the} {nouns}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=0, save=True)

test = MFT(**t)
suite.add(test, 'Sentiment-laden words in context', 'Vocabulary', 
          'Use positive and negative verbs and adjectives with music nouns such as artist, tune, instrument, etc. E.g. "This was a bad cd"')

In [37]:
suite.run(wrapped_pp, overwrite=True)

Running single positive words
Predicting 21 examples
Running single negative words
Predicting 18 examples
Running Sentiment-laden words in context
Predicting 2448 examples


In [38]:
suite.summary()

Vocabulary

single positive words
Test cases:      21
Fails (rate):    0 (0.0%)


single negative words
Test cases:      18
Fails (rate):    3 (16.7%)

Example fails:
0.8 eerie
----
0.6 dread
----
0.7 regret
----


Sentiment-laden words in context
Test cases:      2448
Fails (rate):    440 (18.0%)

Example fails:
0.7 It was an eerie record.
----
0.7 That is an eerie record.
----
0.5 The artist was odd.
----






## Capability: Negation

In [39]:
pos = ['good', 'enjoyable', 'exciting', 'excellent', 'amazing', 'great', 'engaging']
neg = ['bad', 'terrible', 'awful', 'horrible']

In [40]:
ret = editor.template('This is not {a:pos} {mask}.', pos=pos, labels=0, save=True, nsamples=100)
ret += editor.template('This is not {a:neg} {mask}.', neg=neg, labels=1, save=True, nsamples=100)

test = MFT(**ret, name='Simple negation',
           capability='Negation', description='Very simple negations.')

In [41]:
test.run(wrapped_pp, overwrite=True)

Predicting 200 examples


In [42]:
test.summary()

Test cases:      200
Fails (rate):    178 (89.0%)

Example fails:
0.2 This is not a bad taste.
----
0.1 This is not a terrible experience.
----
0.3 This is not a bad letter.
----


## Capability: Temporal Awareness

In [43]:
suite = TestSuite()

editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.template('{neg_verb_present}').data

['hate', 'dislike', 'regret', 'abhor', 'dread', 'despise']

In [44]:
change = ['but', 'even though', 'although', '']
t = editor.template(['I used to think this music was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this artist is {pos_adj}, {change} I used to think it was {neg_adj}.',
                                 'In the past I thought this cd was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this album is {pos_adj}, {change} in the past I thought it was {neg_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=1)
t += editor.template(['I used to {neg_verb_present} this music, {change} now I {pos_verb_present} it.',
                                 'I {pos_verb_present} this album, {change} I used to {neg_verb_present} it.',
                                 'In the past I would {neg_verb_present} this album, {change} now I {pos_verb} it.',
                                 'I {pos_verb_present} this cd, {change} in the past I would {neg_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=1)
t += editor.template(['I used to think this album was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this cd is {neg_adj}, {change} I used to think it was {pos_adj}.',
                                 'In the past I thought this cd was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this tape is {neg_adj}, {change} in the past I thought it was {pos_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=0)
t += editor.template(['I used to {pos_verb_present} this music, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this album, {change} I used to {pos_verb_present} it.',
                                 'In the past I would {pos_verb_present} this cd, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this album, {change} in the past I would {pos_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=0)

In [45]:
test = MFT(**t)
description = '''Have two conflicing statements, one about the past and one about the present.
Expect the present to carry the sentiment. Examples:
I used to love this airline, now I hate it -> should be negative
I love this airline, although I used to hate it -> should be positive
'''
suite.add(test, 'used to, but now', 'Temporal', description)

In [46]:
suite.run(wrapped_pp, overwrite=True)

Running used to, but now
Predicting 8000 examples


In [47]:
suite.summary()

Temporal

used to, but now
Test cases:      8000
Fails (rate):    3913 (48.9%)

Example fails:
0.7 I used to admire this music, although now I regret it.
----
0.3 I think this artist is nice,  I used to think it was awful.
----
0.6 I used to think this album was incredible, but now I think it is ugly.
----




