In [46]:
#Importing 
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import nltk  
from nltk.sentiment import vader
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import spacy
nlp = spacy.load('en')
vader_model = SentimentIntensityAnalyzer()

In [47]:
data = pd.read_csv('train-balanced-sarcasm.csv')

In [49]:
empty_comments = data["comment"].isna()
empty_comments = data[empty_comments].index
data.drop(empty_comments, axis=0, inplace=True)

In [51]:
def run_vader(textual_unit, 
              lemmatize=False, 
              parts_of_speech_to_consider=set(),
              verbose=0):
    """
    Run VADER on a sentence from spacy
    
    :param str textual unit: a textual unit, e.g., sentence, sentences (one string)
    (by looping over doc.sents)
    :param bool lemmatize: If True, provide lemmas to VADER instead of words
    :param set parts_of_speech_to_consider:
    -empty set -> all parts of speech are provided
    -non-empty set: only these parts of speech are considered
    :param int verbose: if set to 1, information is printed
    about input and output
    
    :rtype: dict
    :return: vader output dict
    """
    doc = nlp(textual_unit)
        
    input_to_vader = []

    for sent in doc.sents:
        for token in sent:

            to_add = token.text

            if lemmatize:
                to_add = token.lemma_

                if to_add == '-PRON-': 
                    to_add = token.text

            if parts_of_speech_to_consider:
                if token.pos_ in parts_of_speech_to_consider:
                    input_to_vader.append(to_add) 
            else:
                input_to_vader.append(to_add)

    scores = vader_model.polarity_scores(' '.join(input_to_vader))
    
    if verbose >= 1:
        print()
        print('INPUT SENTENCE', sent)
        print('INPUT TO VADER', input_to_vader)
        print('VADER OUTPUT', scores)

    return scores

In [52]:
def vader_output_to_label(vader_output):
    """
    map vader output e.g.,
    {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4215}
    to one of the following values:
    a) positive float -> 'positive'
    b) 0.0 -> 'neutral'
    c) negative float -> 'negative'
    
    :param dict vader_output: output dict from vader
    
    :rtype: str
    :return: 'negative' | 'neutral' | 'positive'
    """
    compound = vader_output['compound']
    
    if compound < 0.0:
        return 'negative'
    elif compound == 0.0:
        return 'neutral'
    elif compound > 0.0:
        return 'positive'

### **sentiment analysis of sarcastic comments:**

#### Parent comments:

In [54]:
for comment in data[data["label"] == 1]["parent_comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False))
    print()

The dumb thing is, they are risking their seller account, too. => negative
{'neg': 0.384, 'neu': 0.616, 'pos': 0.0, 'compound': -0.6808}

Clinton campaign accuses FBI of 'blatant double standard' => negative
{'neg': 0.255, 'neu': 0.745, 'pos': 0.0, 'compound': -0.34}

Anyone else think that it was interesting the way she mentioned that she had a tape recorder in her hand while he was assaulting her... => positive
{'neg': 0.106, 'neu': 0.705, 'pos': 0.189, 'compound': 0.3818}

Here's what happens when Obama gives up his Twitter account => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Monday night Drinking thread Brought to You by Infuriating capitalization Inconsistencies => negative
{'neg': 0.254, 'neu': 0.746, 'pos': 0.0, 'compound': -0.5267}



#### comments:

In [55]:
#SARCASTIC sentiment analisys
for comment in data[data["label"] == 1]["comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False))
    print()

But they'll have all those reviews! => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

wow it is totally unreasonable to assume that the agency that covered up Bush war crimes because "muh republican party" would be partisan as fuck => negative
{'neg': 0.212, 'neu': 0.602, 'pos': 0.186, 'compound': -0.2263}

Ho ho ho... But Melania said that there is no way it could have happened because she didn't know the woman! => negative
{'neg': 0.102, 'neu': 0.898, 'pos': 0.0, 'compound': -0.3595}

I can't wait until @potus starts a twitter war against Morning Joe. => negative
{'neg': 0.281, 'neu': 0.719, 'pos': 0.0, 'compound': -0.5994}

gotta love the teachers who give exams on the day after halloween => positive
{'neg': 0.0, 'neu': 0.741, 'pos': 0.259, 'compound': 0.6369}



In [56]:
for comment in data[data["label"] == 1]["comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=True))
    print()

But they'll have all those reviews! => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

wow it is totally unreasonable to assume that the agency that covered up Bush war crimes because "muh republican party" would be partisan as fuck => negative
{'neg': 0.291, 'neu': 0.535, 'pos': 0.174, 'compound': -0.6597}

Ho ho ho... But Melania said that there is no way it could have happened because she didn't know the woman! => negative
{'neg': 0.123, 'neu': 0.877, 'pos': 0.0, 'compound': -0.4753}

I can't wait until @potus starts a twitter war against Morning Joe. => negative
{'neg': 0.281, 'neu': 0.719, 'pos': 0.0, 'compound': -0.5994}

gotta love the teachers who give exams on the day after halloween => positive
{'neg': 0.0, 'neu': 0.741, 'pos': 0.259, 'compound': 0.6369}



In [57]:
for comment in data[data["label"] == 1]["comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False, parts_of_speech_to_consider={'NOUN'}))
    print()

But they'll have all those reviews! => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

wow it is totally unreasonable to assume that the agency that covered up Bush war crimes because "muh republican party" would be partisan as fuck => negative
{'neg': 0.787, 'neu': 0.213, 'pos': 0.0, 'compound': -0.8126}

Ho ho ho... But Melania said that there is no way it could have happened because she didn't know the woman! => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I can't wait until @potus starts a twitter war against Morning Joe. => negative
{'neg': 1.0, 'neu': 0.0, 'pos': 0.0, 'compound': -0.5994}

gotta love the teachers who give exams on the day after halloween => positive
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}



In [58]:
for comment in data[data["label"] == 1]["comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False, parts_of_speech_to_consider={'ADJ'}))
    print()

But they'll have all those reviews! => neutral
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

wow it is totally unreasonable to assume that the agency that covered up Bush war crimes because "muh republican party" would be partisan as fuck => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Ho ho ho... But Melania said that there is no way it could have happened because she didn't know the woman! => negative
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

I can't wait until @potus starts a twitter war against Morning Joe. => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

gotta love the teachers who give exams on the day after halloween => positive
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}



In [59]:
for comment in data[data["label"] == 1]["comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False, parts_of_speech_to_consider={'VERB'}))
    print()

But they'll have all those reviews! => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

wow it is totally unreasonable to assume that the agency that covered up Bush war crimes because "muh republican party" would be partisan as fuck => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Ho ho ho... But Melania said that there is no way it could have happened because she didn't know the woman! => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I can't wait until @potus starts a twitter war against Morning Joe. => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

gotta love the teachers who give exams on the day after halloween => positive
{'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'compound': 0.6369}



### **Not sarcastic comments**

#### parent comments:

In [62]:
for comment in data[data["label"] == 0]["parent_comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False))
    print()

Yeah, I get that argument. At this point, I'd prefer is she lived in NC as well. => positive
{'neg': 0.126, 'neu': 0.657, 'pos': 0.217, 'compound': 0.2023}

The blazers and Mavericks (The wests 5 and 6 seed) did not even carry a good enough record to make the playoffs in the east last year. => negative
{'neg': 0.095, 'neu': 0.905, 'pos': 0.0, 'compound': -0.3412}

They're favored to win. => positive
{'neg': 0.0, 'neu': 0.312, 'pos': 0.688, 'compound': 0.765}

deadass don't kill my buzz => positive
{'neg': 0.0, 'neu': 0.572, 'pos': 0.428, 'compound': 0.5773}

Yep can confirm I saw the tool they use for that. It was made by our boy EASports_MUT => positive
{'neg': 0.0, 'neu': 0.879, 'pos': 0.121, 'compound': 0.296}



#### comments:

In [60]:
for comment in data[data["label"] == 0]["comment"][:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False))
    print()

NC and NH. => positive
{'neg': 0.0, 'neu': 0.337, 'pos': 0.663, 'compound': 0.6037}

You do know west teams play against west teams more than east teams right? => positive
{'neg': 0.0, 'neu': 0.844, 'pos': 0.156, 'compound': 0.34}

They were underdogs earlier today, but since Gronk's announcement this afternoon, the Vegas line has moved to patriots -1 => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

This meme isn't funny none of the "new york nigga" ones are. => negative
{'neg': 0.167, 'neu': 0.833, 'pos': 0.0, 'compound': -0.3412}

I could use one of those tools. => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}



In [61]:
for comment in data[data["label"] == 0]["comment"][0:5]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=True))
    print()

NC and NH. => positive
{'neg': 0.0, 'neu': 0.337, 'pos': 0.663, 'compound': 0.6037}

You do know west teams play against west teams more than east teams right? => positive
{'neg': 0.0, 'neu': 0.844, 'pos': 0.156, 'compound': 0.34}

They were underdogs earlier today, but since Gronk's announcement this afternoon, the Vegas line has moved to patriots -1 => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

This meme isn't funny none of the "new york nigga" ones are. => negative
{'neg': 0.167, 'neu': 0.833, 'pos': 0.0, 'compound': -0.3412}

I could use one of those tools. => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}



In [43]:
for comment in data[data["label"] == 0]["comment"][0:20]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False, parts_of_speech_to_consider={'ADJ'}))
    print()

NC and NH. => positive
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

You do know west teams play against west teams more than east teams right? => positive
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

They were underdogs earlier today, but since Gronk's announcement this afternoon, the Vegas line has moved to patriots -1 => neutral
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

This meme isn't funny none of the "new york nigga" ones are. => negative
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4404}

I could use one of those tools. => neutral
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

I don't pay attention to her, but as long as she's legal I wouldn't kick her out of bed (before she took a load) => positive
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.128}

Trick or treating in general is just weird... => negative
{'neg': 0.63, 'neu': 0.37, 'pos': 0.0, 'compound': -0.1779}

Blade Mastery+Masamune or GTFO! => neutral
{'neg': 0.0, 'neu':

In [44]:
for comment in data[data["label"] == 0]["comment"][:20]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False, parts_of_speech_to_consider={'NOUN'}))
    print()

NC and NH. => positive
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

You do know west teams play against west teams more than east teams right? => positive
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

They were underdogs earlier today, but since Gronk's announcement this afternoon, the Vegas line has moved to patriots -1 => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

This meme isn't funny none of the "new york nigga" ones are. => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I could use one of those tools. => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I don't pay attention to her, but as long as she's legal I wouldn't kick her out of bed (before she took a load) => positive
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Trick or treating in general is just weird... => negative
{'neg': 1.0, 'neu': 0.0, 'pos': 0.0, 'compound': -0.0516}

Blade Mastery+Masamune or GTFO! => neutral
{'neg': 0.0, 'neu': 0.0, '

In [45]:
for comment in data[data["label"] == 0]["comment"][:20]:
    print(comment, "=>", vader_output_to_label(run_vader(comment, lemmatize=False)))
    print(run_vader(comment, lemmatize=False, parts_of_speech_to_consider={'VERB'}))
    print()

NC and NH. => positive
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

You do know west teams play against west teams more than east teams right? => positive
{'neg': 0.0, 'neu': 0.294, 'pos': 0.706, 'compound': 0.34}

They were underdogs earlier today, but since Gronk's announcement this afternoon, the Vegas line has moved to patriots -1 => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

This meme isn't funny none of the "new york nigga" ones are. => negative
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}

I could use one of those tools. => neutral
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I don't pay attention to her, but as long as she's legal I wouldn't kick her out of bed (before she took a load) => positive
{'neg': 0.318, 'neu': 0.682, 'pos': 0.0, 'compound': -0.1027}

Trick or treating in general is just weird... => negative
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Blade Mastery+Masamune or GTFO! => neutral
{'neg': 0.0, 'neu