In [1]:
import pickle
from checklist.editor import Editor
from checklist.pred_wrapper import PredictorWrapper
from checklist.test_suite import TestSuite
from checklist.test_types import MFT
from checklist.test_types import INV
from checklist.perturb import Perturb
import numpy as np
import pandas as pd

In [2]:
import re
import unicodedata
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\caspe\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\caspe\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
def basic_clean(text):
    wnl = nltk.stem.WordNetLemmatizer()
    stopwords = nltk.corpus.stopwords.words('english')
    text = (unicodedata.normalize('NFKD', text)
        .encode('ascii', 'ignore')
        .decode('utf-8', 'ignore')
        .lower())
    words = re.sub(r'[^\w\s]', '', text).split()
    return [wnl.lemmatize(word) for word in words if word not in stopwords]

# EDA
Before we can do any kind of checklisting, we need to know what kind of nouns we have in the dataset for our vocabulary, as well as which sentences are most common and get a general understanding of the dataset

## Music

In [4]:
df_music = pd.read_csv("data\\music_reviews(subset).csv")

In [5]:
words_music = basic_clean(''.join(str(df_music["review_body"].tolist())))

### Top 20 unigrams

In [6]:
(pd.Series(nltk.ngrams(words_music, 1)).value_counts())[:20]

(album,)     507788
(song,)      450092
(cd,)        371926
(br,)        329005
(like,)      305844
(one,)       292390
(music,)     284104
(sound,)     195053
(good,)      178739
(great,)     176251
(track,)     153868
(band,)      145327
(time,)      143764
(love,)      132010
(get,)       131182
(would,)     125331
(really,)    124303
(dont,)      121632
(first,)     119607
(best,)      109172
dtype: int64

### Top 20 bigrams

In [7]:
(pd.Series(nltk.ngrams(words_music, 2)).value_counts())[:20]

(br, br)            62313
(sound, like)       42054
(one, best)         16167
(song, album)       15600
(good, song)        13611
(dont, know)        13591
(song, like)        12403
(every, song)       12344
(much, better)      11227
(great, song)       10888
(one, song)         10614
(song, cd)          10394
(ever, heard)       10118
(year, ago)          9651
(one, favorite)      9386
(great, album)       9275
(buy, cd)            9140
(sound, quality)     9123
(best, song)         9034
(bought, cd)         8899
dtype: int64

### Top 20 trigrams

In [8]:
(pd.Series(nltk.ngrams(words_music, 3)).value_counts())[:20]

(ive, ever, heard)        3968
(br, br, br)              3229
(dont, get, wrong)        3160
(rock, n, roll)           2959
(dont, waste, money)      2858
(song, sound, like)       2117
(album, sound, like)      1904
(one, best, album)        1866
(album, br, br)           1642
(best, song, album)       1614
(first, time, heard)      1565
(first, two, album)       1406
(song, br, br)            1297
(one, best, song)         1234
(greatest, hit, album)    1167
(cant, go, wrong)         1129
(br, br, album)           1123
(every, song, album)      1104
(one, best, cd)           1035
(im, big, fan)            1034
dtype: int64

# Checklisting

## Loading model

In [9]:
with open("baseline_model_music.pickle", "rb") as file:
    model_music = pickle.load(file)

In [10]:
def predict_proba_music(inputs):
    p = np.array([model_music.predict_proba([x]) for x in inputs]).reshape(-1, 2)
    
    return p
wrapped_pp_music = PredictorWrapper.wrap_softmax(predict_proba_music)

## Music

## Capability: Vocabulary

In [11]:
editor = Editor()
suite = TestSuite()

In [12]:
nouns = ["album","cd","song","track","mixtape","cut","tune","melody","ditty","shanty","anthem","hymn","carol","ballad","disc","artist"]
editor.add_lexicon('nouns', nouns)
pos_adj = ['amazing', 'incredible', 'extraordinary', 'excellent', 'awesome', 'nice']
neg_adj = ['terrible', 'eerie', 'odd', 'awful', 'ugly', 'strange']
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )

In [13]:
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise']
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']

editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)

In [14]:
# Testing individual positive words
test = MFT(pos_adj + pos_verb_present + pos_verb_past, labels=1) #1 == positive
suite.add(test, 'single positive words', 'Vocabulary', '')
# Testing individual negative words
test = MFT(neg_adj + neg_verb_present + neg_verb_past, labels=0)
suite.add(test, 'single negative words', 'Vocabulary', '')

In [15]:
# Testing words in context
t = editor.template('{it} {nouns} {be} {pos_adj}.', it=['The', 'This', 'That'], be=['is', 'was'], labels=1, save=True)
t += editor.template('{it} {be} {a:pos_adj} {nouns}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=1, save=True)
t += editor.template('{i} {pos_verb} {the} {nouns}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=1, save=True)
t += editor.template('{it} {nouns} {be} {neg_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{it} {be} {a:neg_adj} {nouns}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{i} {neg_verb} {the} {nouns}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=0, save=True)

test = MFT(**t)
suite.add(test, 'Sentiment-laden words in context', 'Vocabulary', 
          'Use positive and negative verbs and adjectives with beauty product nouns such as product, cream, wig, etc. E.g. "This was a bad cream"')

In [17]:
suite.run(wrapped_pp_music, overwrite=True)

Running single positive words
Predicting 21 examples
Running single negative words
Predicting 18 examples
Running Sentiment-laden words in context
Predicting 4896 examples


In [18]:
suite.summary()

Vocabulary

single positive words
Test cases:      21
Fails (rate):    4 (19.0%)

Example fails:
0.4 admire
----
0.5 liked
----
0.5 value
----


single negative words
Test cases:      18
Fails (rate):    2 (11.1%)

Example fails:
0.6 regret
----
0.7 eerie
----


Sentiment-laden words in context
Test cases:      4896
Fails (rate):    901 (18.4%)

Example fails:
0.5 We abhor the shanty.
----
0.6 We regret this album.
----
0.7 That shanty was eerie.
----






## Capability: Negation

In [19]:
editor = Editor()
suite = TestSuite()

In [20]:
pos = ['good', 'enjoyable', 'exciting', 'excellent', 'amazing', 'great', 'engaging']
neg = ['bad', 'terrible', 'awful', 'horrible']

In [21]:
editor.add_lexicon("pos",pos,overwrite=True)
editor.add_lexicon("neg",neg,overwrite=True)
editor.add_lexicon('nouns', nouns,overwrite=True)

In [22]:
ret = editor.template('This is not {a:pos} {nouns}.', labels=0, save=True)
ret += editor.template('This is not {a:neg} {nouns}.',  labels=1, save=True)

test = MFT(**ret, name='Simple negation',
           capability='Negation', description='Very simple negations.')

In [24]:
test.run(wrapped_pp_music, overwrite=True)

Predicting 176 examples


In [25]:
test.summary()

Test cases:      176
Fails (rate):    155 (88.1%)

Example fails:
0.5 This is not a good ditty.
----
0.7 This is not an engaging carol.
----
0.6 This is not a good shanty.
----


In [26]:
negation_df_music = pd.DataFrame({"text":dict(ret)["data"],"sentiment":dict(ret)["labels"]})

## Capability: Invariance

In [27]:
def change_product(x,meta=False, *args, **kwargs):
    product = nouns
    ret = []
    ret_meta = []
    for p in product:
        if re.search(r'\b%s\b' % p, x):
            ret.extend([re.sub(r'\b%s\b' % p, p2, x) for p2 in product if p != p2])
            ret_meta.extend([(p, p2) for p2 in product if p != p2])
            
    if meta:
        return ret, ret_meta
    else:
        return ret

In [28]:
data_pos = list(df_music[df_music["sentiment"] =="positive"]["review_body"].replace(np.nan, '', regex=True))
data_neg = list(df_music[df_music["sentiment"] =="negative"]["review_body"].replace(np.nan, '', regex=True))

In [30]:
ret = Perturb.perturb(data_pos, change_product, labels=1, nsamples=2000)

In [31]:
test = INV(**ret, labels=1)

In [32]:
test.run(wrapped_pp_music)

Predicting 50420 examples


In [33]:
test.summary()

Test cases:      2000
Fails (rate):    69 (3.5%)

Example fails:
0.4 On her first album I gave it a three star rating(or was it two?) <br />Isaid that I could only hope that she could grow as an artist. And I can say with confidence after over a year since this release, that she did. <br />More highlights than her first album. More range of songs and yet holding a theme of carrying sounds from the soul of decades past.Better writing. This album is superior to her first in almost everyway. While I easily got tired of \\"Fallin\\", I don't think I'll easily get tired of great tracks like \\"You don't know my name\\" or \\"Karma\\". So here's hoping to a really good career. <br /> <br />
0.5 On her first shanty I gave it a three star rating(or was it two?) <br />Isaid that I could only hope that she could grow as an artist. And I can say with confidence after over a year since this release, that she did. <br />More highlights than her first shanty. More range of songs and yet holding a th

In [34]:
inv_pos_samples = pd.DataFrame({"review_body":dict(ret)["data"]}).sample(n=10,random_state=1)

In [35]:
ret = Perturb.perturb(data_neg, change_product, labels=0, nsamples=2000)

In [36]:
test = INV(**ret, labels=0)

In [37]:
test.run(wrapped_pp_music)

Predicting 47495 examples


In [38]:
test.summary()

Test cases:      2000
Fails (rate):    61 (3.0%)

Example fails:
0.4 The music is wonderful.  The quality of the CD is my concern[[ASIN:B002IGHX8G Philip Glass Piano Music - Ruhr Festival Piano]].  After two uses, the disc is warped and my player will not accept it.  I bought a new disc.  Is this a common issue?
0.5 The music is wonderful.  The quality of the CD is my concern[[ASIN:B002IGHX8G Philip Glass Piano Music - Ruhr Festival Piano]].  After two uses, the shanty is warped and my player will not accept it.  I bought a new shanty.  Is this a common issue?

----
0.4 Love the song, and Kiesza has a great voice, but I did not realize that the CD ONLY has the single Hideaway, and does NOT have any of the other songs off of her first EP album.  For the price ($12 for one song) this seems very over priced.  I would recommend purchasing a different version of her album.
0.6 Love the shanty, and Kiesza has a great voice, but I did not realize that the CD ONLY has the single Hideaway, and 

In [39]:
inv_neg_samples = pd.DataFrame({"review_body":dict(ret)["data"]}).sample(n=10,random_state=1)

## Capability: Temporal Awareness

In [40]:
suite = TestSuite()

editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.template('{neg_verb_present}').data

['hate', 'dislike', 'regret', 'abhor', 'dread', 'despise']

In [41]:
pos_adj = ['amazing', 'incredible', 'extraordinary', 'excellent', 'awesome', 'nice']
neg_adj = ['terrible', 'eerie', 'odd', 'awful', 'ugly', 'strange']
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )

In [42]:
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise']
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']

editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)

In [43]:
change = ['but', 'even though', 'although', '']
t = editor.template(['I used to think this music was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this artist is {pos_adj}, {change} I used to think it was {neg_adj}.',
                                 'In the past I thought this cd was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this album is {pos_adj}, {change} in the past I thought it was {neg_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=1)
t += editor.template(['I used to {neg_verb_present} this music, {change} now I {pos_verb_present} it.',
                                 'I {pos_verb_present} this album, {change} I used to {neg_verb_present} it.',
                                 'In the past I would {neg_verb_present} this album, {change} now I {pos_verb} it.',
                                 'I {pos_verb_present} this cd, {change} in the past I would {neg_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=1)
t += editor.template(['I used to think this album was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this cd is {neg_adj}, {change} I used to think it was {pos_adj}.',
                                 'In the past I thought this cd was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this tape is {neg_adj}, {change} in the past I thought it was {pos_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=0)
t += editor.template(['I used to {pos_verb_present} this music, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this album, {change} I used to {pos_verb_present} it.',
                                 'In the past I would {pos_verb_present} this cd, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this album, {change} in the past I would {pos_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=0)

In [44]:
test = MFT(**t)
description = '''Have two conflicing statements, one about the past and one about the present.
Expect the present to carry the sentiment. Examples:
I used to love this airline, now I hate it -> should be negative
I love this airline, although I used to hate it -> should be positive
'''
suite.add(test, 'used to, but now', 'Temporal', description)

In [45]:
suite.run(wrapped_pp_music, overwrite=True)

Running used to, but now
Predicting 8000 examples


In [46]:
suite.summary()

Temporal

used to, but now
Test cases:      8000
Fails (rate):    3978 (49.7%)

Example fails:
0.4 I used to dislike this music, although now I recommend it.
----
0.2 In the past I thought this cd was awful, but now I think it is awesome.
----
0.4 I used to think this music was odd, but now I think it is nice.
----






In [47]:
temporal_df_music = pd.DataFrame({"text":dict(t)["data"],"sentiment":dict(t)["labels"]})

# Hardcases
These are the different cases we're going to include in our tests. These are a mix of different tests, each test descriped below. The aim of these tests is to evaluate the annotators ability to annotate the sentiment of product reviews. 

These tests are made up of both real reviews, sampled from the datasets and synthetic tests, samples which are generated by us.

## Regular samples (20 samples, 50:50 split)
These samples are just normal samples from the dataset. These are intended to be a control test of the annotators generel ability to annotate. If they have a 5% failure rate here, it can be seen as a reflection of their general ability to annotate and should be taken into account when validating their performance on the hard cases.

These tests are real reviews

In [48]:
reg_pos_samples = df_music[df_music["sentiment"] == "positive"].sample(n=10, random_state=1)
reg_neg_samples = df_music[df_music["sentiment"] == "negative"].sample(n=10, random_state=1)

In [49]:
reg_df = pd.concat((reg_pos_samples[["review_body","sentiment"]], reg_neg_samples[["review_body","sentiment"]]))
reg_df["casetype"] = "regular"

In [50]:
reg_df

Unnamed: 0,review_body,sentiment,casetype
18889,If you liked Sky Blue Sky you will probably en...,positive,regular
331528,Great CD.,positive,regular
277640,this cd is good better than the first one best...,positive,regular
95898,everyone that hears this CD loves it. It is ...,positive,regular
337068,"Debelah Morgan has finally arrived, with a def...",positive,regular
185797,there are 18 of Sue's best on this CD. the Col...,positive,regular
242579,This cd has revolutionized NFG history. These ...,positive,regular
14759,"I purchased this album, largely based on my lo...",positive,regular
115372,"I love, love, love this CD! Everyol couple nee...",positive,regular
256916,You will never here the Symphonie Fantastique ...,positive,regular


## Invariance product name (20 samples, 50:50 split)
For invariance we want to see if changing domain specific words in the data has an influence on the annotators ability to correnctly annotate sentiment of the review. The change in words should not change the sentiment of the text, but should test if there is a bias towards different domain specific words being used in either negative or positive contexts. As an example, is there a negative connotation or bias towards the word "wig" that might make the model or annotator more inclined to put the review as negative than positive, regardless of the actual sentiment of the text. 

These tests are real reviews which have been slightly modified

In [51]:
import random

In [52]:
inv_neg_samples["review_body"] = inv_neg_samples["review_body"].apply(lambda x: random.choice(x))
inv_pos_samples["review_body"] = inv_pos_samples["review_body"].apply(lambda x: random.choice(x))

In [53]:
inv_pos_samples["sentiment"] = "positive"
inv_neg_samples["sentiment"] = "negative"

In [54]:
inv_df = pd.concat((inv_pos_samples, inv_neg_samples))
inv_df["casetype"] = "invariance"

In [55]:
inv_df

Unnamed: 0,review_body,sentiment,casetype
674,Yagya made a big splash -- if one can splash i...,positive,invariance
1699,This was my first carol by Maroon 5 and I abso...,positive,invariance
1282,I had the opportunity to see Harry in concert ...,positive,invariance
1315,Whoever said Rancid should be pensioned off ar...,positive,invariance
1210,Erykah Badu's worst stuff is better than the a...,positive,invariance
1636,I have never listened to him before when he wa...,positive,invariance
613,This is a great collaboration of artistes. I d...,positive,invariance
447,You like the movie? Did you like the music? Th...,positive,invariance
1131,Albums pretty great. Something different from ...,positive,invariance
808,"La Chavela has a voice so rich, so deep, so fu...",positive,invariance


## Negation (20 samples, 50:50 split)
Negation is quite a straight forward test. It simply tests the annotators ability to handle double negatives and positive reviews with negative adjectives

These tests are synthetic

In [56]:
nega_pos_samples = negation_df_music[negation_df_music["sentiment"]==1].sample(n=10, random_state= 1)
nega_neg_samples = negation_df_music[negation_df_music["sentiment"]==0].sample(n=10, random_state= 1)

In [57]:
nega_pos_samples["sentiment"] = "positive"
nega_neg_samples["sentiment"] = "negative"

In [58]:
nega_df = pd.concat((nega_pos_samples, nega_neg_samples))
nega_df["casetype"] = "negation"

In [59]:
nega_df.columns = ['review_body', 'sentiment', 'casetype']

In [60]:
nega_df

Unnamed: 0,review_body,sentiment,casetype
136,This is not a terrible ditty.,positive,negation
151,This is not an awful melody.,positive,negation
164,This is not a horrible mixtape.,positive,negation
139,This is not a terrible hymn.,positive,negation
156,This is not an awful carol.,positive,negation
114,This is not a bad song.,positive,negation
133,This is not a terrible cut.,positive,negation
174,This is not a horrible disc.,positive,negation
153,This is not an awful shanty.,positive,negation
162,This is not a horrible song.,positive,negation


## Temporal (20 samples, 50:50 split)
For temporal hard cases, we want to see if participents have a hard time annotating if text has a temporal element to it. In this case, the reviewer describes how they used to have a relationship to the product, but now it's changed "used to, but now..."

The baseline model is not able to handle these kinds of sentences very well, most likely because it goes off of the word counts and not the context of which they are used in the sentence.

These samples are synthetic

In [61]:
temp_pos_samples = temporal_df_music[temporal_df_music["sentiment"] == 1].sample(n=10, random_state= 1)
temp_neg_samples = temporal_df_music[temporal_df_music["sentiment"] == 0].sample(n=10, random_state= 1)

In [62]:
temp_pos_samples["sentiment"] = "positive"
temp_neg_samples["sentiment"] = "negative"

In [63]:
temp_df = pd.concat((temp_pos_samples, temp_neg_samples))
temp_df["casetype"] = "temporal"

In [64]:
temp_df.columns = ['review_body', 'sentiment', 'casetype']

In [65]:
temp_df

Unnamed: 0,review_body,sentiment,casetype
200,"I used to think this music was odd, but now I ...",positive,temporal
1078,"In the past I thought this cd was ugly, althou...",positive,temporal
610,"In the past I thought this cd was terrible, ev...",positive,temporal
2159,"I appreciate this cd, even though in the past ...",positive,temporal
1169,"I think this artist is excellent, even though ...",positive,temporal
2448,"I used to abhor this music, now I value it.",positive,temporal
2938,"In the past I would abhor this album, now I l...",positive,temporal
813,"I think this artist is extraordinary, but I us...",positive,temporal
1268,"I used to think this music was terrible, even ...",positive,temporal
870,"In the past I thought this cd was odd, but now...",positive,temporal


## Fairness, Demographic based (20 samples, 50:50 split)
For fairness we want to see if our participents annotate differently based on the gender of the reviewer, or any minority background the reviewer might have. 

They are also selected based on if the product is targeted at a specific gender or if the creator of the product might be a specific gender. These reviews are selected from the dataset, where these details are overtly expressed, with a few exceptions. 

These exceptions are when a product line's name is mentioned and that product line is targeted at a specific gender or if a creator of a product is a well known person and we know their gender or minority background based on their name being present in the review. As an example, if a review states "This Nicki Minaj album is great!", we can infere that this album is made by a woman of colour.

These tests are sampled from the dataset

In [310]:
"""
specify which words you're looking for in the string down below in regex format
"""
mask = df_music[["review_headline","review_body"]].apply(
    lambda x: x.str.contains(
        "woman|women",
        regex=True
    )
).any(axis=1)

In [297]:
df_music[mask]

Unnamed: 0.1,Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date,sentiment
47945,47945,US,45192488,R1EN6GS4P0E528,B000006M1T,616973253,Blue Horizon,Music,2,2,2,N,N,Disappointed and puzzled,Let me start out by saying I'm a HUGE Eric Gal...,2004-03-31,negative
64805,64805,US,10775612,R3MPQP02P3O20E,B000002JSJ,484694890,Presence,Music,5,1,2,N,N,The true Zeppelin swan song,In Through The Out Door and Coda were abberati...,2008-09-25,positive
90274,90274,US,15725862,R2WSMKDI6BDQ78,B00E9UBDAI,88348378,Imagination (Expanded Edition),Music,5,1,1,N,N,A Truly Exciting Musical Moment For Gladys Kni...,One of those music history stories that always...,2014-05-31,positive
111636,111636,US,41111992,R2TRQBMJEO69W2,B000002H1Z,877940825,Love Language,Music,2,1,5,N,N,downright heaven-like,Teddy Pendergrass recorded a session of songs ...,2010-01-15,negative
115117,115117,US,49498315,R1UEKS2KWKBP2M,B001FZ09SW,432765884,Perpetual Flame,Music,1,5,16,N,N,Sloppiest piece of work I've ever heard. Sham...,I cannot believe how utterly sloppy this piece...,2009-01-06,negative
124688,124688,US,36144981,RDA7BWVZKEPSQ,B000GIWGJY,872582992,You Come Too,Music,5,4,4,N,N,This is a gift to your child,I work at a special needs school and I had the...,2006-12-05,positive
147647,147647,US,53051672,R2FDT3VCAXSPQS,B000JCEA96,645915524,Richard Howard: Prairie Visions - A Suite for ...,Music,5,0,0,N,N,the story of this remarkable album,Parkinson's fails to quash dream Music paints...,2008-04-05,positive
160932,160932,US,51049105,R1HF04F2VZDI9X,B0000936MD,946619889,Everything Must Go,Music,2,7,13,N,N,The Future of Elevator Music,"Break out the Maalox, kick back in your wheelc...",2003-07-16,negative
164870,164870,US,38201609,RWDUNLMJTVOM9,B000EQ5QGS,771408180,Holy Diver Live [2 CD],Music,2,4,7,N,N,"It's been a good run, just not up to par with ...","Sorry Dio fans, I hear you speaking and your r...",2006-04-22,negative
168129,168129,US,51706536,R3CHXCSUFJ4RS8,B0000019QP,314760467,Doctor Came at Dawn,Music,2,11,79,N,N,"What is talent, anyways?...",I happened upon this amazing collection of lo-...,2000-09-04,negative


In [330]:
print(len(df_music[mask]))
sample = df_music[mask].sample(n=1)[["review_body","sentiment"]]
print(sample["review_body"].iloc[0])
print(sample["sentiment"].iloc[0])

7181
Tim Mcgraw is horrible, he has absolutely no talent what so ever he pays someone tons of money to write his lyrics (which he sings like crap), he can't play any real instrument( maybe a tambourine), he sounds like everyone else on a country music station( no originality), and his music is filled with drug references, violence against women, and bad language.
negative


In [331]:
fair_pos_samples = [
    "I have now seen the future of country/bluegrass music - its name is Caustic.  Perfectly fusing sing-a-long reckneck anthems with touching, heartfelt ballads that could make the even toughest of cowboys cry themselves to sleep out on the range.  Matt Fanale's expertise on the mouth harp helps redefine what modern country music is all about.   It's also refreshing to have an openly gay member of a country band (Fanale) who's not afraid to wave the pink flag for homosexual equality.    All in all, a wonderful album.",
    "I love t.A.T.u. I'm a lesbian myself and to hear about two girls that are in love singing I hoped it would be good and it was. I give this album five stars because it has great music and the lyrics are awesome.",
    "Being of Indian origin, I've know of Apache Indian for a while.  I didn't know, however, that he is so big outside of us south asians.  His music has been in 5 soundtracks, most notably Clueless and Dumb and Dumber.  This CD is probably his best one.  Some of these tracks will probably make it mainstream... they are all hot...<BR>Desis Represent!",
    "This tape is a must have for everyone who has children, especially children on the autism spectrum, and lots of adults, too! I listen to it in the car and sing along. The messages are near and dear to the hearts of children with autism spectrum disorders, but help other kids understand them better. They may find themselves in a song, too. Songs like \"picky eater\" \"Who wants to wear clothes (not me!)\" and \"(Please don't try to fix me) Love me for who I am\" are (as are most) very clever. The words are easy to understand, the music is catchy and performed with lots of energy and enthusiasm. I wish everyone would buy one! The world would be a much better place!",
    "Bought the album for my son.  He has cerebal palsy and autism and loves Sesame Street.",
    "if your a hispanic looking for some good gospel filled lyrics with a litte movement, youve gotta buy this.  Its got a lot of latin beats and merenge for the raza!",
    "Very inspirational album and christian recording from Roberto Carlos. Some of the songs became classics in the hispanic evangelical christian community. I find it very uplifting when I want to listen  to hispanic american singers with a positive message.",
    "I find this CD to be a wonderful representation of the first Czech woman composer and conductor-Vitezslava Kapralova.  These songs are gorgeous.  Swooping melodies, glissandos in the piano, simple melodies in the soprano, and all tied together by a strong Czech sound and sensibility.  Favorites on the CD are Koleda, and Dve pisne, two songs. <BR>I would strongly recommend buying this cd for listening on a rainy afternoon.  Music students, especially younger singers, are also encouraged to study these songs, as they would make a wonderful addition to a recital.  I am an operatic performer, and I am working on incorporating them into future programs.",
    "Quite impressive, for all 15 songs in the album (I guess I got a south american export version with Disco Heaven as #15).",
    "THIS WAS A FINE COMPILATION OF ROMANTIC THEMES FROM POPULAR KOREAN TV DRAMAS . I LIKED IT BECAUSE WESTERN AUDIENCES ARE ONLY NOW APPRECIATE THE BEAUTY OF KOREAN MUSIC THRU MOVIES AND SBS,KBS,AND MBC AND THRU HULU. I WOULD LIKE TO HEAR MORE OF THESE CDS AND GLAD THEY ARE GETTING MORE POPULAR."
]

In [332]:
fair_neg_samples = [
    "A lot people like this band, but five stars??<br />These guys do weave a hypnotic spell. Their dynamic wants to take the listener to a point where you fully expect the music to really take off and rock out. But it never quite does..leaving you wanting more. Like almost achieving orgasm. They do this quite well on songs like Cross Your Eyes, Blue Eurasians and Fanfare; and the effect approaches excitement.<br />They have a good sound and play well. The main problem is that most of the songs just aren't that strong or involving. <br />Not bad, especially for a debut, but not good enough to make me want to rush out and buy their new release...I'll wait til it shows up in the 'used' bin.",
    "This is a wonderful gesture by a mom of a son with autism and most of the lyrics are beautiful.  However, it does not sound like a professional CD and her voice is very hard to listen to.  I was somewhat disappointed as it is not a very professional nor good quality CD.",
    "This CD was not at all what I expected or what the cover portrayed.  I thought this would be a collection of fun children's songs in Spanish, maybe some well-known preschool songs translated in Spanish.  Instead, these songs sound like traditional old world hispanic music that is not appealing to my kids or myself at all.",
    "I bought this record in order to do the class with a girlfriend so we can keep track of time and have a beat to follow.  The music is fine, the beat OK.  We did not like the stops between songs, other records have non-stop music.  But the big disappointment was the woman counting and giving instructions for the exercises.  Both my girlfriend and I are Spanish speakers and though we understand English had difficulty understanding the instructions particurlarly because there is no guide for whatever exercises this lady is doing.",
    "If you want to hear south american music, why listen to a frenchman?  worse,this album contains substantial portions of music &quot;borrowed&quot; (no attribution) from an excellent Central/North american group, Huyalcaltia.<BR>very dissapointing.",
    "First south american music CD I've bought.  I was looking for a CD with good pan flute and interesting relatively quick beats.  Track 1 is definitely better than the others.  Search continues for music like Track 1 on this CD.  This CD is relatively short, seems like about 35 minutes or less.",
    "I'm korean,  please understand with my insufficient ability in English<br /><br />I received my CD two weeks ago, and I found a crack on a plastic cover. Next time, please consider of using air cap.<br /><br />Fortunately, there was no problem with CD.<br /><br />Thankyou",
    "I'll admit it, back in the early days, I was a real big fan of Peal Jam.  They rocked, and  , more importantly, they stood up for those less fortunate than middle America.  Than I head the song 'Daughter' and I learned that Eddie didn't have sympathy for at least one disadvantaged group in America: The mentally handicapped.  On that track he cruelly and heartlessly mocks the vocal affectations of those with mental disabilities, just to get a cheap laugh.  I'll admit it, his impression is dead on: he totally sounds like a 'tard. But is it really appropriate to mock the mentally handicapped, particularly when one is involved in so many fashionable, liberal causes?  Well, no.  The time for fratboy humor is over, Mr. Vedder.",
    "After I listened the CD for few times I find it tiring. Lack of decent lyrics and the melody is pretty much copy of others. There was nothing special in her album. Her Chinese CD's were pretty decent I am just  disappointed that this CD ain't as good as her chinese CD's. Maybe it's the  U.S. marketing strategy, I really think this CD can be a lot better.  Besides, she is not the best singer that I have heard. Try Shanza, Sandy  Lam, Sally Yeh and others.",
    "Tim Mcgraw is horrible, he has absolutely no talent what so ever he pays someone tons of money to write his lyrics (which he sings like crap), he can't play any real instrument( maybe a tambourine), he sounds like everyone else on a country music station( no originality), and his music is filled with drug references, violence against women, and bad language."
]

In [333]:
fair_pos_df = pd.DataFrame({"review_body":fair_pos_samples})
fair_pos_df["sentiment"] = "positive"

In [334]:
fair_neg_df = pd.DataFrame({"review_body":fair_neg_samples})
fair_neg_df["sentiment"] = "negative"

In [335]:
fair_df = pd.concat((fair_pos_df,fair_neg_df))

In [336]:
fair_df["casetype"] = "fairness"

In [337]:
fair_df

Unnamed: 0,review_body,sentiment,casetype
0,I have now seen the future of country/bluegras...,positive,fairness
1,I love t.A.T.u. I'm a lesbian myself and to he...,positive,fairness
2,"Being of Indian origin, I've know of Apache In...",positive,fairness
3,This tape is a must have for everyone who has ...,positive,fairness
4,Bought the album for my son. He has cerebal p...,positive,fairness
5,if your a hispanic looking for some good gospe...,positive,fairness
6,Very inspirational album and christian recordi...,positive,fairness
7,I find this CD to be a wonderful representatio...,positive,fairness
8,"Quite impressive, for all 15 songs in the albu...",positive,fairness
9,THIS WAS A FINE COMPILATION OF ROMANTIC THEMES...,positive,fairness


In [338]:
hardcases_music = pd.concat((reg_df,inv_df,nega_df,temp_df,fair_df)).reset_index(drop=True)

In [339]:
hardcases_music = hardcases_music.sample(frac=1,random_state=1).reset_index(drop=True)

In [340]:
hardcases_music

Unnamed: 0,review_body,sentiment,casetype
0,I have now seen the future of country/bluegras...,positive,fairness
1,Bought the album for my son. He has cerebal p...,positive,fairness
2,I can't believe all the 4 and 5 stars this rec...,negative,invariance
3,I love t.A.T.u. I'm a lesbian myself and to he...,positive,fairness
4,I bought this record in order to do the class ...,negative,fairness
...,...,...,...
95,"I used to like this music, although now I regr...",negative,temporal
96,You will never here the Symphonie Fantastique ...,positive,regular
97,"In the past I thought this cd was excellent, b...",negative,temporal
98,I bought this CD because I'd heard so many gre...,negative,regular


In [342]:
with open("hardcases_music_cheatsheet.csv", "w") as file:
    hardcases_music.to_csv(file)