In [1]:
import numpy as np
import pandas as pd
from Load import json_to_df
import pickle
import re

In [2]:
from checklist.editor import Editor
from checklist.pred_wrapper import PredictorWrapper
from checklist.perturb import Perturb
from checklist.test_suite import TestSuite
from checklist.test_types import INV

In [3]:
with open('baseline_model.pickle', 'rb') as f:
    model = pickle.load(f)
    
def predict_proba(inputs):
    p = np.array([model.predict_proba([x]) for x in inputs]).reshape(-1, 2)
    
    return p
wrapped_pp = PredictorWrapper.wrap_softmax(predict_proba)

# Invariance, change positive adjectives

In [4]:
df = json_to_df("data\\music_reviews_train.json")

df = df.replace(np.nan, '', regex=True)

#df['reviewText'] = df.reviewText + ' ' + df.summary

data = list(df[df["sentiment"] =="positive"]["reviewText"])

In [5]:
def change_pos_adjectives(x,meta=False, *args, **kwargs):
    
    pos_adjectives = ["good", "nice", "amazing", "perfect", "exciting", "great"]
    ret = []
    ret_meta = []
    for p in pos_adjectives:
        if re.search(r'\b%s\b' % p, x):
            ret.extend([re.sub(r'\b%s\b' % p, p2, x) for p2 in pos_adjectives if p != p2])
            ret_meta.extend([(p, p2) for p2 in pos_adjectives if p != p2])
            
    if meta:
        return ret, ret_meta
    else:
        return ret

In [6]:
ret= Perturb.perturb(data, change_pos_adjectives)

In [7]:
test1 = INV(**ret, labels=1)

In [8]:
test1.run(wrapped_pp)

Predicting 121190 examples


In [9]:
test1.summary(n=2)

Test cases:      17740
Fails (rate):    161 (0.9%)

Example fails:
0.4 Like that you can purchase just a single song instead of the whole album.  Already purchased another album which is good but when you only like one song this is the way to do it.
0.5 Like that you can purchase just a single song instead of the whole album.  Already purchased another album which is perfect but when you only like one song this is the way to do it.
0.5 Like that you can purchase just a single song instead of the whole album.  Already purchased another album which is amazing but when you only like one song this is the way to do it.

----
0.5 This album is amazing!!! The only track I didn't really care much for was "A Dream That Cannot Be" but the rest of the album is solid. I definitely recommend!
0.4 This album is exciting!!! The only track I didn't really care much for was "A Dream That Cannot Be" but the rest of the album is solid. I definitely recommend!
0.4 This album is good!!! The only track I di

# Invariance, change negative adjectives

In [10]:
df = json_to_df("data\\music_reviews_train.json")

df = df.replace(np.nan, '', regex=True)

#df['reviewText'] = df.reviewText + ' ' + df.summary

data = list(df[df["sentiment"] =="negative"]["reviewText"])

In [11]:
def change_neg_adjectives(x,meta=False, *args, **kwargs):
    
    neg_adjectives = ["bad","awful","abhorrent","annoying","poor","inferior","dreadful","nasty","terrible"]
    ret = []
    ret_meta = []
    for p in neg_adjectives:
        if re.search(r'\b%s\b' % p, x):
            ret.extend([re.sub(r'\b%s\b' % p, p2, x) for p2 in neg_adjectives if p != p2])
            ret_meta.extend([(p, p2) for p2 in neg_adjectives if p != p2])
            
    if meta:
        return ret, ret_meta
    else:
        return ret

In [12]:
ret= Perturb.perturb(data, change_neg_adjectives)

In [13]:
test2 = INV(**ret, labels=0)

In [14]:
test2.run(wrapped_pp)

Predicting 55250 examples


In [15]:
test2.summary()

Test cases:      5514
Fails (rate):    161 (2.9%)

Example fails:
0.6 This is a very bad take-off on Ravel et al. It is a modern adaptation with heavy electric bass and long chords. Do not buy this.
0.5 This is a very awful take-off on Ravel et al. It is a modern adaptation with heavy electric bass and long chords. Do not buy this.
0.5 This is a very terrible take-off on Ravel et al. It is a modern adaptation with heavy electric bass and long chords. Do not buy this.

----
0.6 My daughter picked this song.  It is so bad.  These guys need to go away, far away.  The apocalypse is near.
0.4 My daughter picked this song.  It is so awful.  These guys need to go away, far away.  The apocalypse is near.
0.4 My daughter picked this song.  It is so terrible.  These guys need to go away, far away.  The apocalypse is near.

----
0.3 Just God awful.
10/10
0.6 Just God bad.
10/10
0.6 Just God abhorrent.
10/10

----


# Invariance, changing media format

In [17]:
df = json_to_df("data\\music_reviews_train.json")

df = df.replace(np.nan, '', regex=True)

#df['reviewText'] = df.reviewText + ' ' + df.summary

data = list(df["reviewText"])

In [20]:
def change_media(x,meta=False, *args, **kwargs):
    
    media = ["album","cd","song","track","mixtape","cut","tune","melody","ditty","shanty","anthem","hymn","carol","ballad"]
    ret = []
    ret_meta = []
    for p in media:
        if re.search(r'\b%s\b' % p, x):
            ret.extend([re.sub(r'\b%s\b' % p, p2, x) for p2 in media if p != p2])
            ret_meta.extend([(p, p2) for p2 in media if p != p2])
            
    if meta:
        return ret, ret_meta
    else:
        return ret

In [21]:
ret= Perturb.perturb(data, change_media)

In [22]:
test3 = INV(**ret)

In [23]:
test3.run(wrapped_pp)

Predicting 810530 examples


In [24]:
test3.summary()

Test cases:      47755
Fails (rate):    2929 (6.1%)

Example fails:
0.4 Keyshia Cole has one of the best voices in R&B today. Unfortunately, the subject matter on this album is limited to juvenile love-drama, mainly orchestrated around break-ups, cheating, and circumspect relationships. Lyrically, she comes across far younger than her 31 years, relying far too much on slang and occasional vulgarity. It's hard to get past the first song, "Enough of No Love" where featured guest Lil' Wayne raps tastelessly with over-the-top curse words. I experienced a difficult time listening to the album once, with no interest in listening to it again. Hopefully, she will present a mature album her next time around.
0.6 Keyshia Cole has one of the best voices in R&B today. Unfortunately, the subject matter on this ditty is limited to juvenile love-drama, mainly orchestrated around break-ups, cheating, and circumspect relationships. Lyrically, she comes across far younger than her 31 years, relying far 