In [1]:
import pickle
from checklist.editor import Editor
from checklist.pred_wrapper import PredictorWrapper
from checklist.test_suite import TestSuite
from checklist.test_types import MFT
from checklist.test_types import INV
from checklist.perturb import Perturb
import numpy as np
import pandas as pd

In [2]:
import re
import unicodedata
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\caspe\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\caspe\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
def basic_clean(text):
    wnl = nltk.stem.WordNetLemmatizer()
    stopwords = nltk.corpus.stopwords.words('english')
    text = (unicodedata.normalize('NFKD', text)
        .encode('ascii', 'ignore')
        .decode('utf-8', 'ignore')
        .lower())
    words = re.sub(r'[^\w\s]', '', text).split()
    return [wnl.lemmatize(word) for word in words if word not in stopwords]

# EDA
Before we can do any kind of checklisting, we need to know what kind of nouns we have in the dataset for our vocabulary, as well as which sentences are most common and get a general understanding of the dataset

## Sports

In [4]:
df_sports = pd.read_csv("data\\sports_reviews(subset).csv")

In [5]:
words_sports = basic_clean(''.join(str(df_sports["review_body"].tolist())))

### Top 20 unigrams

In [6]:
(pd.Series(nltk.ngrams(words_sports, 1)).value_counts())[:20]

(one,)        219282
(great,)      189620
(would,)      182692
(like,)       162036
(good,)       160369
(product,)    153989
(get,)        143740
(br,)         142202
(use,)        136076
(work,)       133889
(fit,)        131198
(time,)       124648
(well,)       121507
(quality,)     97865
(dont,)        97056
(bought,)      90236
(back,)        86339
(really,)      84986
(even,)        83490
(buy,)         81763
dtype: int64

### Top 20 bigrams

In [7]:
(pd.Series(nltk.ngrams(words_sports, 2)).value_counts())[:20]

(would, recommend)     21061
(work, great)          17450
(well, made)           14909
(waste, money)         14791
(look, like)           14694
(work, well)           14155
(good, quality)        14089
(great, product)       13690
(year, old)            13561
(first, time)          12342
(customer, service)    11262
(highly, recommend)     9971
(feel, like)            8934
(dont, know)            8708
(much, better)          8572
(look, great)           8427
(dont, buy)             7668
(great, price)          7522
(heart, rate)           7489
(fit, well)             7378
dtype: int64

### Top 20 trigrams

In [8]:
(pd.Series(nltk.ngrams(words_sports, 3)).value_counts())[:20]

(dont, waste, money)              4972
(would, recommend, anyone)        3028
(would, recommend, product)       2568
(first, time, used)               2444
(would, highly, recommend)        2298
(heart, rate, monitor)            2283
(year, old, son)                  1950
(would, definitely, recommend)    1810
(last, long, time)                1554
(cant, go, wrong)                 1418
(buy, another, one)               1396
(broke, first, time)              1383
(get, job, done)                  1382
(easy, put, together)             1381
(waste, time, money)              1321
(dont, waste, time)               1249
(give, 5, star)                   1192
(10, year, old)                   1190
(well, worth, money)              1180
(worth, every, penny)             1126
dtype: int64

# Checklisting

## Loading model

In [9]:
with open("baseline_model_sports.pickle", "rb") as file:
    model_sports = pickle.load(file)

In [10]:
def predict_proba_sports(inputs):
    p = np.array([model_sports.predict_proba([x]) for x in inputs]).reshape(-1, 2)
    
    return p
wrapped_pp_sports = PredictorWrapper.wrap_softmax(predict_proba_sports)

In [11]:
df_sports

Unnamed: 0.1,Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date,sentiment
0,0,US,50252441,R6TAXL600Q7HM,B003BYKJH6,477541264,Organized Fishing Regular Sized Rubber Replace...,Sports,2,0,0,N,Y,Two Stars,Rubber is too soft and will not hold rods secu...,2014-10-12,negative
1,1,US,33797792,R2I8433XLIGLC4,B0055DNHLQ,265512225,MLB Boston Red Sox Carl Crawford Signature Ser...,Sports,1,1,1,N,N,they should've showed me the back,I assumed the back just had a number or was pl...,2014-06-19,negative
2,2,US,948790,RXAODS7YI7F5F,B00MG1VQ4A,30042226,Nike Men's Mercurial Victory IV FG Soccer Shoe,Sports,5,0,0,N,Y,Five Stars,Amazing,2015-06-07,positive
3,3,US,13064212,R22UO05PUKW6R6,B001AGP3FK,490903426,"Men's Nike Feather Light Cap, Black",Sports,2,2,3,N,Y,fit too large for average male,the hat is humongous unless you got watermelon...,2014-05-05,negative
4,4,US,28760389,R3KBFYWQ1KJ7A4,B00162PWI6,234218806,Lee Precision 308 Win Loader,Sports,5,1,1,N,Y,Love the classics,"If you know Lee products, and you just want to...",2014-04-26,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888547,888547,US,49397927,R10ZA0US8S23ZH,B00RPZJ7HU,886866209,SNIPER® Rear Sight with Elevation Adjustable P...,Sports,5,0,0,N,Y,Nice,Good quality piece. Fit my gun good and the si...,2015-08-14,positive
888548,888548,US,29803250,R9MZ4RTFXJR9V,B001RMUOKG,567842073,Barnett Outdoors Junior Archery 28-Inch Arrows...,Sports,1,0,0,N,Y,Terrible,"Really bad, even for cheap youth bows.",2015-06-09,negative
888549,888549,US,48027112,R8M5RI5Z2HFBS,B00030A4AS,58599184,Mustad 3366 Classic Sproat Large Ring Hook (10...,Sports,1,0,5,N,Y,small.,small. too small,2015-02-08,negative
888550,888550,US,26343792,RQZ0KYJFR13UD,B001O0D6QA,104661074,Attwood 9065-1 Battery Box Standard,Sports,5,0,0,N,Y,Its a box,Ordered this battery box because the previous ...,2012-12-11,positive


## Sports

## Capability: Vocabulary

In [12]:
editor = Editor()
suite = TestSuite()

In [45]:
nouns = ['cap','shoe','ball','bow',"golf club", "tracksuit","scope"]
editor.add_lexicon('nouns', nouns, overwrite=True)
pos_adj = ['amazing', 'incredible', 'extraordinary', 'excellent', 'awesome', 'nice']
neg_adj = ['terrible', 'eerie', 'odd', 'awful', 'ugly', 'strange']
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )

In [14]:
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise']
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']

editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)

In [15]:
# Testing individual positive words
test = MFT(pos_adj + pos_verb_present + pos_verb_past, labels=1) #1 == positive
suite.add(test, 'single positive words', 'Vocabulary', '')
# Testing individual negative words
test = MFT(neg_adj + neg_verb_present + neg_verb_past, labels=0)
suite.add(test, 'single negative words', 'Vocabulary', '')

In [16]:
# Testing words in context
t = editor.template('{it} {nouns} {be} {pos_adj}.', it=['The', 'This', 'That'], be=['is', 'was'], labels=1, save=True)
t += editor.template('{it} {be} {a:pos_adj} {nouns}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=1, save=True)
t += editor.template('{i} {pos_verb} {the} {nouns}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=1, save=True)
t += editor.template('{it} {nouns} {be} {neg_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{it} {be} {a:neg_adj} {nouns}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{i} {neg_verb} {the} {nouns}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=0, save=True)

test = MFT(**t)
suite.add(test, 'Sentiment-laden words in context', 'Vocabulary', 
          'Use positive and negative verbs and adjectives with beauty product nouns such as product, cream, wig, etc. E.g. "This was a bad cream"')

In [17]:
suite.run(wrapped_pp_sports, overwrite=True)

Running single positive words
Predicting 21 examples
Running single negative words
Predicting 18 examples
Running Sentiment-laden words in context
Predicting 1836 examples


In [18]:
suite.summary()

Vocabulary

single positive words
Test cases:      21
Fails (rate):    0 (0.0%)


single negative words
Test cases:      18
Fails (rate):    4 (22.2%)

Example fails:
0.6 dread
----
0.6 despised
----
0.6 eerie
----


Sentiment-laden words in context
Test cases:      1836
Fails (rate):    265 (14.4%)

Example fails:
0.5 I like this ball.
----
0.5 We like the shoe.
----
0.6 That was an eerie shoe.
----






## Capability: Negation

In [19]:
editor = Editor()
suite = TestSuite()

In [20]:
pos = ['good', 'enjoyable', 'exciting', 'excellent', 'amazing', 'great', 'engaging']
neg = ['bad', 'terrible', 'awful', 'horrible']

In [21]:
editor.add_lexicon("pos",pos,overwrite=True)
editor.add_lexicon("neg",neg,overwrite=True)
editor.add_lexicon('nouns', nouns,overwrite=True)

In [22]:
ret = editor.template('This is not {a:pos} {nouns}.', labels=0, save=True)
ret += editor.template('This is not {a:neg} {nouns}.',  labels=1, save=True)

test = MFT(**ret, name='Simple negation',
           capability='Negation', description='Very simple negations.')

In [24]:
test.run(wrapped_pp_sports, overwrite=True)

Predicting 66 examples


In [25]:
test.summary()

Test cases:      66
Fails (rate):    56 (84.8%)

Example fails:
0.1 This is not a horrible ball.
----
0.1 This is not a horrible cap.
----
0.1 This is not a horrible bow.
----


In [26]:
negation_df_sports = pd.DataFrame({"text":dict(ret)["data"],"sentiment":dict(ret)["labels"]})

## Capability: Invariance

In [27]:
def change_product(x,meta=False, *args, **kwargs):
    product = nouns
    ret = []
    ret_meta = []
    for p in product:
        if re.search(r'\b%s\b' % p, x):
            ret.extend([re.sub(r'\b%s\b' % p, p2, x) for p2 in product if p != p2])
            ret_meta.extend([(p, p2) for p2 in product if p != p2])
            
    if meta:
        return ret, ret_meta
    else:
        return ret

In [28]:
data_pos = list(df_sports[df_sports["sentiment"] =="positive"]["review_body"].replace(np.nan, '', regex=True))
data_neg = list(df_sports[df_sports["sentiment"] =="negative"]["review_body"].replace(np.nan, '', regex=True))

In [29]:
ret = Perturb.perturb(data_pos, change_product, labels=1, nsamples=2000)

In [30]:
test = INV(**ret, labels=1)

In [33]:
test.run(wrapped_pp_sports)

Predicting 12120 examples


In [34]:
test.summary()

Test cases:      2000
Fails (rate):    39 (1.9%)

Example fails:
0.4 This tee helps to solve the two biggest swing problems Ive seen in youth baseball.  It forces them to stay inside the ball (no casting) and come down to the ball (no upper cut).  This is a line drive machine.
0.5 This tee helps to solve the two biggest swing problems Ive seen in youth baseball.  It forces them to stay inside the golf club (no casting) and come down to the golf club (no upper cut).  This is a line drive machine.

----
0.4 I don't know why people complain about the pump included with this ball.  Using the pump I had the ball inflated within 10 to 15 minutes at most.  The ball is great.  I am using it for exercises and as a chair.  I liked it so well that I bought another for work.
0.6 I don't know why people complain about the pump included with this golf club.  Using the pump I had the golf club inflated within 10 to 15 minutes at most.  The golf club is great.  I am using it for exercises and as a cha

In [35]:
inv_pos_samples = pd.DataFrame({"review_body":dict(ret)["data"]}).sample(n=10,random_state=1)

In [36]:
ret = Perturb.perturb(data_neg, change_product, labels=0, nsamples=2000)

In [37]:
test = INV(**ret, labels=0)

In [38]:
test.run(wrapped_pp_sports)

Predicting 12090 examples


In [39]:
test.summary()

Test cases:      2000
Fails (rate):    45 (2.2%)

Example fails:
0.5 Don't plan on playing any real soccer with this ball.  This is without a doubt a low impact practice ball.  If you want a real ball, add about $10 and for $30 you can get a Select Numero 10 or a Brine Voracity.
0.6 Don't plan on playing any real soccer with this golf club.  This is without a doubt a low impact practice golf club.  If you want a real golf club, add about $10 and for $30 you can get a Select Numero 10 or a Brine Voracity.

----
0.5 Was not happy with the ball.  The colors are nice but the ball when compared to other size 5 balls is obviously smaller.
0.6 Was not happy with the golf club.  The colors are nice but the golf club when compared to other size 5 balls is obviously smaller.

----
0.4 Item arrived there was no exercise ball or air pump
0.5 Item arrived there was no exercise golf club or air pump

----


In [40]:
inv_neg_samples = pd.DataFrame({"review_body":dict(ret)["data"]}).sample(n=10,random_state=1)

## Capability: Temporal Awareness

In [41]:
suite = TestSuite()

editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.template('{neg_verb_present}').data

['hate', 'dislike', 'regret', 'abhor', 'dread', 'despise']

In [48]:
nouns = ['cap','shoe','ball','bow',"golf club", "tracksuit","scope"]
editor.add_lexicon('nouns', nouns, overwrite=True)
pos_adj = ['amazing', 'incredible', 'extraordinary', 'excellent', 'awesome', 'nice']
neg_adj = ['terrible', 'eerie', 'odd', 'awful', 'ugly', 'strange']
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )

In [47]:
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise']
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']

editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)

In [49]:
change = ['but', 'even though', 'although', '']
t = editor.template(['I used to think this bow was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this product is {pos_adj}, {change} I used to think it was {neg_adj}.',
                                 'In the past I thought this cap was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this golf club is {pos_adj}, {change} in the past I thought it was {neg_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=1)
t += editor.template(['I used to {neg_verb_present} this shoe, {change} now I {pos_verb_present} it.',
                                 'I {pos_verb_present} this product, {change} I used to {neg_verb_present} it.',
                                 'In the past I would {neg_verb_present} this tracksuit, {change} now I {pos_verb} it.',
                                 'I {pos_verb_present} this bow, {change} in the past I would {neg_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=1)
t += editor.template(['I used to think this product was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this shoe is {neg_adj}, {change} I used to think it was {pos_adj}.',
                                 'In the past I thought this ball was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this scope is {neg_adj}, {change} in the past I thought it was {pos_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=0)
t += editor.template(['I used to {pos_verb_present} this product, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this golf club, {change} I used to {pos_verb_present} it.',
                                 'In the past I would {pos_verb_present} this bag, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this bow, {change} in the past I would {pos_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=0)

In [50]:
test = MFT(**t)
description = '''Have two conflicing statements, one about the past and one about the present.
Expect the present to carry the sentiment. Examples:
I used to love this airline, now I hate it -> should be negative
I love this airline, although I used to hate it -> should be positive
'''
suite.add(test, 'used to, but now', 'Temporal', description)

In [52]:
suite.run(wrapped_pp_sports, overwrite=True)

Running used to, but now
Predicting 8000 examples


In [53]:
suite.summary()

Temporal

used to, but now
Test cases:      8000
Fails (rate):    4208 (52.6%)

Example fails:
0.4 In the past I would dislike this tracksuit,  now I recommend it.
----
0.6 I dislike this golf club,  I used to like it.
----
0.6 In the past I would value this bag, even though now I dislike it.
----






In [54]:
temporal_df_sports = pd.DataFrame({"text":dict(t)["data"],"sentiment":dict(t)["labels"]})

# Hardcases
These are the different cases we're going to include in our tests. These are a mix of different tests, each test descriped below. The aim of these tests is to evaluate the annotators ability to annotate the sentiment of product reviews. 

These tests are made up of both real reviews, sampled from the datasets and synthetic tests, samples which are generated by us.

## Regular samples (20 samples, 50:50 split)
These samples are just normal samples from the dataset. These are intended to be a control test of the annotators generel ability to annotate. If they have a 5% failure rate here, it can be seen as a reflection of their general ability to annotate and should be taken into account when validating their performance on the hard cases.

These tests are real reviews

In [55]:
reg_pos_samples = df_sports[df_sports["sentiment"] == "positive"].sample(n=10, random_state=1)
reg_neg_samples = df_sports[df_sports["sentiment"] == "negative"].sample(n=10, random_state=1)

In [56]:
reg_df = pd.concat((reg_pos_samples[["review_body","sentiment"]], reg_neg_samples[["review_body","sentiment"]]))
reg_df["casetype"] = "regular"

In [57]:
reg_df

Unnamed: 0,review_body,sentiment,casetype
269706,"Works as advertised, mounts perfectly, looks g...",positive,regular
704037,I bought 2! One for the wife and one for myse...,positive,regular
474952,"Of all my aerobic dvds I rotate through, I alw...",positive,regular
569828,Look and act just like real cops cuffs making ...,positive,regular
104643,Thanks I am very happy with the unit and don`t...,positive,regular
238152,"it is OK,a little spend y.Haven't really used ...",positive,regular
285830,"I love this shirt! Nice fabric, good feel, go...",positive,regular
477262,"What I expected, looks and feels great. Plenty...",positive,regular
625542,Grandmaster from the UNITED STATES EXPERT KARA...,positive,regular
477555,very well made knife,positive,regular


## Invariance product name (20 samples, 50:50 split)
For invariance we want to see if changing domain specific words in the data has an influence on the annotators ability to correnctly annotate sentiment of the review. The change in words should not change the sentiment of the text, but should test if there is a bias towards different domain specific words being used in either negative or positive contexts. As an example, is there a negative connotation or bias towards the word "wig" that might make the model or annotator more inclined to put the review as negative than positive, regardless of the actual sentiment of the text. 

These tests are real reviews which have been slightly modified

In [58]:
import random

In [59]:
inv_neg_samples["review_body"] = inv_neg_samples["review_body"].apply(lambda x: random.choice(x))
inv_pos_samples["review_body"] = inv_pos_samples["review_body"].apply(lambda x: random.choice(x))

In [60]:
inv_pos_samples["sentiment"] = "positive"
inv_neg_samples["sentiment"] = "negative"

In [61]:
inv_df = pd.concat((inv_pos_samples, inv_neg_samples))
inv_df["casetype"] = "invariance"

In [62]:
inv_df

Unnamed: 0,review_body,sentiment,casetype
674,There are a few insulated water bottles out th...,positive,invariance
1699,The title says it all. It's a nice looking ba...,positive,invariance
1282,My tracksuit came with a fur rest and a faux l...,positive,invariance
1315,Perfect for the youth Genesis ball. Nice colo...,positive,invariance
1210,Wish it was more bouncy but still a great shoe,positive,invariance
1636,This scope has been a big help to use sighting...,positive,invariance
613,My son has been looking for a ball and arrow f...,positive,invariance
447,I went to the local golf shop to test out driv...,positive,invariance
1131,These are great items. My sons ball laces were...,positive,invariance
808,I purchased this to help myself with practice....,positive,invariance


## Negation (20 samples, 50:50 split)
Negation is quite a straight forward test. It simply tests the annotators ability to handle double negatives and positive reviews with negative adjectives

These tests are synthetic

In [63]:
nega_pos_samples = negation_df_sports[negation_df_sports["sentiment"]==1].sample(n=10, random_state= 1)
nega_neg_samples = negation_df_sports[negation_df_sports["sentiment"]==0].sample(n=10, random_state= 1)

In [64]:
nega_pos_samples["sentiment"] = "positive"
nega_neg_samples["sentiment"] = "negative"

In [65]:
nega_df = pd.concat((nega_pos_samples, nega_neg_samples))
nega_df["casetype"] = "negation"

In [66]:
nega_df.columns = ['review_body', 'sentiment', 'casetype']

In [67]:
nega_df

Unnamed: 0,review_body,sentiment,casetype
55,This is not an awful shoe.,positive,negation
60,This is not a horrible cap.,positive,negation
45,This is not a bad bow.,positive,negation
56,This is not an awful ball.,positive,negation
62,This is not a horrible ball.,positive,negation
59,This is not an awful tracksuit.,positive,negation
52,This is not a terrible golf club.,positive,negation
46,This is not a bad golf club.,positive,negation
44,This is not a bad ball.,positive,negation
61,This is not a horrible shoe.,positive,negation


## Temporal (20 samples, 50:50 split)
For temporal hard cases, we want to see if participents have a hard time annotating if text has a temporal element to it. In this case, the reviewer describes how they used to have a relationship to the product, but now it's changed "used to, but now..."

The baseline model is not able to handle these kinds of sentences very well, most likely because it goes off of the word counts and not the context of which they are used in the sentence.

These samples are synthetic

In [68]:
temp_pos_samples = temporal_df_sports[temporal_df_sports["sentiment"] == 1].sample(n=10, random_state= 1)
temp_neg_samples = temporal_df_sports[temporal_df_sports["sentiment"] == 0].sample(n=10, random_state= 1)

In [69]:
temp_pos_samples["sentiment"] = "positive"
temp_neg_samples["sentiment"] = "negative"

In [70]:
temp_df = pd.concat((temp_pos_samples, temp_neg_samples))
temp_df["casetype"] = "temporal"

In [71]:
temp_df.columns = ['review_body', 'sentiment', 'casetype']

In [72]:
temp_df

Unnamed: 0,review_body,sentiment,casetype
200,"I used to think this bow was odd, now I think...",positive,temporal
1078,"In the past I thought this cap was odd, even t...",positive,temporal
610,"In the past I thought this cap was eerie, even...",positive,temporal
2159,"I admire this bow, although in the past I woul...",positive,temporal
1169,"I think this product is incredible, even thoug...",positive,temporal
2448,"I used to despise this shoe, although now I va...",positive,temporal
2938,"In the past I would dislike this tracksuit, bu...",positive,temporal
813,"I think this product is nice, even though I us...",positive,temporal
1268,"I used to think this bow was terrible, althoug...",positive,temporal
870,"In the past I thought this cap was ugly, now ...",positive,temporal


## Fairness, Demographic based (20 samples, 50:50 split)
For fairness we want to see if our participents annotate differently based on the gender of the reviewer, or any minority background the reviewer might have. 

They are also selected based on if the product is targeted at a specific gender or if the creator of the product might be a specific gender. These reviews are selected from the dataset, where these details are overtly expressed, with a few exceptions. 

These exceptions are when a product line's name is mentioned and that product line is targeted at a specific gender or if a creator of a product is a well known person and we know their gender or minority background based on their name being present in the review. As an example, if a review states "This Nicki Minaj album is great!", we can infere that this album is made by a woman of colour.

These tests are sampled from the dataset

In [183]:
"""
specify which words you're looking for in the string down below in regex format
"""
mask = df_sports[["review_headline","review_body"]].apply(
    lambda x: x.str.contains(
        "autism",
        regex=True
    )
).any(axis=1)

In [167]:
df_sports[mask]

Unnamed: 0.1,Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date,sentiment
12573,12573,US,27384562,R1LWA3Z2AOOU32,B002SRRFYC,853415729,Maxam Mesh Army Green Hammock,Sports,1,1,11,N,Y,ha,dont buy this it hold upto..<br /><br />------...,2013-01-15,negative
28312,28312,US,6881633,R3OPAPIQ9MTGF3,B00N3IEKCK,81660830,Mitchell and Ness NBA Chicago Bulls Custom Sna...,Sports,1,0,1,N,N,poop,look at it. its gay as aids.,2015-01-07,negative
64671,64671,US,50101831,RKTV1A4Q5995T,B001OQ5ONC,167443939,iFit Jillian Michaels Weight Loss Program Level 2,Sports,1,0,6,N,N,No I will not own a NordicTrac or workout on one.,This iFit 2 did never work either for my exerc...,2015-03-03,negative
72542,72542,US,11762464,R1UNRONI8T6WFA,B000YZTXBS,596431623,NFL Dart Cabinet Set w/ Steel Tip Bristle Dart...,Sports,1,2,3,N,Y,"Box was trashed when it arrived, hinges uneven...","Box was trashed when it arrived, hinges uneven...",2015-04-17,negative
100079,100079,US,46697276,R3G31PXTCYAUPS,B002TUSJIO,408199061,UTG Airsoft UHC 1911 Pistol,Sports,5,0,0,N,Y,Ridiculously good gun,This gun is fantastic all around. I've had pro...,2013-09-04,positive
104090,104090,US,10083929,R1W09CBWOH0BG1,B005GQ7FP6,153304452,Bleach Hitsugaya Hyourinmaru Zanpakuto Sword 1...,Sports,5,0,1,N,Y,AMAZINGGG!!!,Amazing! I ordered this sword because I am a h...,2013-11-14,positive
113650,113650,US,21031239,R26VEH57FDX9VC,B004FGUZOG,431664037,Rock Sauce Topical Pain Reliever for use with ...,Sports,1,0,0,N,N,Big rip off,I was very disappointed when I used Rock Sauce...,2012-08-21,negative
138432,138432,US,17231562,RV862H0GBG2A6,B001F4S0Y8,491771472,Ace Martial Arts Supply Single Sword Display S...,Sports,2,0,0,N,Y,You get what you pay for,"First off, the picture is very misleading. The...",2013-07-07,negative
151416,151416,US,52425231,R4PT7O5YRH1LN,B004FEOUNA,522068993,Bullseye Dragon Throwing Target,Sports,1,2,3,N,N,Punishing made in china bs,I came to this page from a google search for a...,2011-08-29,negative
154994,154994,US,13353534,R2Z7NU3A8TR4VW,B000PSQWOK,228644467,BBTac M4 M16 Replica Airsoft Gun M83 A2 Electr...,Sports,1,2,4,N,N,"Pretty bad, first gun material",its pretty bad in everything: range accuracy...,2011-04-12,negative


In [186]:
print(len(df_sports[mask]))
sample = df_sports[mask].sample(n=1)[["review_body","sentiment"]]
print(sample["review_body"].iloc[0])
print(sample["sentiment"].iloc[0])

30
Nice yoga ball... But it was a lot smaller than the description reads... It is for my son who has autism.. He loves to bounce..
negative


In [187]:
fair_pos_samples = [
    "Great quality and Momma even likes to nail it sometimes to relieve stress. :) My son is on the autism spectrum and a psychologist recommended this to help him with learning visual distance with regards to playing with balls/sports in the future - visual therapy type stuff. He is 4 and it seems to be helping. My husband and I were very impressed because the price is so reasonable (esp. when compared to the overpriced special needs toys). My neuro-typical daughter loves it too! I'm telling everyone about it and I'm giving one to my 9 year old nephew for Christmas for sure! He needs to get a little aggression out in a healthy way. Punch or kick it!",
    "Great for hand-push wheelchair users, weight lifting, holding hand-weights while cardio, bike riding. Stitching will wear faster with mesh when hand-breaking the w.chair, however. But they provide enough give in the thumbs so as the user does not feel discomfort when pushing. For cardio workout, the back-strap will leave a black mark on the back of your hand, not a problem, just noticeable!",
    "I'm in a wheelchair and have tennis elbow from gripping the tire and wheels so often. During basketball season it really flares up especially when driving. I've tried many others but these work the best and don't irritate or pinch my arm..I would recommend.",
    "much better than the plastic nose clips.  Kinda works on my asian nose.",
    "My lesbian child loves them!!",
    "I got these for my handicapped daughter she plays with them all the time & loves them she reads braille really good & these were perfect<br />Thank You",
    "What I love about this blanket is that it keeps you warm without overheating you. I bought this for my bed-bound handicapped son. He overheats easily and this blanket keeps him at the perfect temperature on cool nights. It's very soft, too. As for the &#34;Cardinals&#34;... well, I've lived in Arizona for 28 years...what can I say? ",
    "Normally i just use bags to shoot on, but i wanted something a little more stable.  I am not disappointed with this bipod.  It was super easy to install; and I'm mechanically handicapped.  (installed on the sling swivel) It also has a place for the sling swivel it replaces, so you don't have to go without a sling.  Oh yeah and it looks cool also!",
    "good light weight blocks, sturdy enough to take full body weight for a small-ish woman in hand balances.  arrived very fast, no strange factory smell (believe me, this is a bonus)",
    "I wore this one for a few hours, nice and comfy. Weirdly designed to get on but works fine! I ended up getting a size to big because I was afraid it would be too small. I would suggest if you are a full C and above going XL or bigger. I'll write a review for the large when it comes in. Both I still like the XL I got, it still fits nice but i'm curious how well the large would fit. Very soft to touch, the design is weird and a little difficult but still works out.<br /><br />I am using this because i'm transgender ftm, after I wear it for a while I will right some more reviews to let others who are also ftm know if this is good or not."
]

In [188]:
fair_neg_samples = [
    "This is a great idea for kids with sensory issues. However this product is poorly made. The seams ripped immediately and in my opinion the fabric isn't stretchy enough. I have two boys with autism and the size small didn't even fit my 3 year old, who is very thin. I will not be ordering products made by this company.",
    "I purchased these gloves as protection for my hands while rolling in my wheelchair. Much to my chagrin, they failed to last even a week. Three holes are already visible in my right glove. Even at this price point, the durability of the gloves is unacceptably subpar.",
    "We thought it worked well before we hit the water... It fit neither my nose nor my boyfriend's. I have a flatter asian nose and he has a predominant Caucasian one, and it worked on neither. He's had experiences with nose clips but could not make this one work without it hurting on me. I have resorted to using swim masks now.",
    "I'm<br />Not sure whose designing this caps but they are definitely not designing them<br />For people of color. The damn thing is useless. Would be nice if you actually spoke to someone of color to help design a hat to fit our heads.",
    "dreadful fabric...and I am gay! This might be good for a nite out on the town. Sleezy town. Yikes! word.",
    "stupid.  if you miss the ball you have to get up and down constantly!  not for handicapped or elders.",
    "Quality is great but it looks more like a mans hat than a woman's.  The bill on this hat is way too big.  If you're guy- I totally recommend it.  If you're a gal- look for something else.  I will probably only wear it when my other hats are in the laundry.",
    "This is a mans hoodie with a man size chart so be ware. If you are a woman size down, way down",
    "Fits fine for width and large chest but was not made correctly to fit any woman it's super super short and without the other strap it doesn't stay up at all.",
    "Nice yoga ball... But it was a lot smaller than the description reads... It is for my son who has autism.. He loves to bounce.."
]

In [189]:
fair_pos_df = pd.DataFrame({"review_body":fair_pos_samples})
fair_pos_df["sentiment"] = "positive"

In [190]:
fair_neg_df = pd.DataFrame({"review_body":fair_neg_samples})
fair_neg_df["sentiment"] = "negative"

In [191]:
fair_df = pd.concat((fair_pos_df,fair_neg_df))

In [192]:
fair_df["casetype"] = "fairness"

In [193]:
fair_df

Unnamed: 0,review_body,sentiment,casetype
0,Great quality and Momma even likes to nail it ...,positive,fairness
1,"Great for hand-push wheelchair users, weight l...",positive,fairness
2,I'm in a wheelchair and have tennis elbow from...,positive,fairness
3,much better than the plastic nose clips. Kind...,positive,fairness
4,My lesbian child loves them!!,positive,fairness
5,I got these for my handicapped daughter she pl...,positive,fairness
6,What I love about this blanket is that it keep...,positive,fairness
7,"Normally i just use bags to shoot on, but i wa...",positive,fairness
8,"good light weight blocks, sturdy enough to tak...",positive,fairness
9,"I wore this one for a few hours, nice and comf...",positive,fairness


## Compiling hardcases

In [194]:
hardcases_sports = pd.concat((reg_df,inv_df,nega_df,temp_df,fair_df)).reset_index(drop=True)

In [195]:
hardcases_sports = hardcases_sports.sample(frac=1,random_state=1).reset_index(drop=True)

In [196]:
hardcases_sports

Unnamed: 0,review_body,sentiment,casetype
0,Great quality and Momma even likes to nail it ...,positive,fairness
1,My lesbian child loves them!!,positive,fairness
2,I was very excited to give this golf club as a...,negative,invariance
3,"Great for hand-push wheelchair users, weight l...",positive,fairness
4,I'm<br />Not sure whose designing this caps bu...,negative,fairness
...,...,...,...
95,"I used to like this product, even though now I...",negative,temporal
96,very well made knife,positive,regular
97,"In the past I thought this ball was awesome, e...",negative,temporal
98,Received these in the factory package (unopene...,negative,regular


In [198]:
with open("hardcases_sports_cheatsheet.csv", "w") as file:
    hardcases_sports.to_csv(file)