In [14]:
#Importing 
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import nltk  
from nltk.sentiment import vader
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import spacy
nlp = spacy.load('en')
vader_model = SentimentIntensityAnalyzer()

In [15]:
data = pd.read_csv('train-balanced-sarcasm.csv')

In [16]:
empty_comments = data["comment"].isna()
empty_comments = data[empty_comments].index
data.drop(empty_comments, axis=0, inplace=True)

In [17]:
def run_vader(textual_unit, 
              lemmatize=False, 
              parts_of_speech_to_consider=set(),
              verbose=0):
    """
    Run VADER on a sentence from spacy
    
    :param str textual unit: a textual unit, e.g., sentence, sentences (one string)
    (by looping over doc.sents)
    :param bool lemmatize: If True, provide lemmas to VADER instead of words
    :param set parts_of_speech_to_consider:
    -empty set -> all parts of speech are provided
    -non-empty set: only these parts of speech are considered
    :param int verbose: if set to 1, information is printed
    about input and output
    
    :rtype: dict
    :return: vader output dict
    """
    doc = nlp(textual_unit)
        
    input_to_vader = []

    for sent in doc.sents:
        for token in sent:

            to_add = token.text

            if lemmatize:
                to_add = token.lemma_

                if to_add == '-PRON-': 
                    to_add = token.text

            if parts_of_speech_to_consider:
                if token.pos_ in parts_of_speech_to_consider:
                    input_to_vader.append(to_add) 
            else:
                input_to_vader.append(to_add)

    scores = vader_model.polarity_scores(' '.join(input_to_vader))
    
    if verbose >= 1:
        print()
        print('INPUT SENTENCE', sent)
        print('INPUT TO VADER', input_to_vader)
        print('VADER OUTPUT', scores)

    return scores

In [24]:
#SARCASTIC sentiment analisys
for comment in data[data["label"] == 1]["comment"][200:250]:
    print(comment)
    print(run_vader(comment, lemmatize=False))
    print()

Yeah, because his Native American family will do so much better with Trump as President.
{'neg': 0.0, 'neu': 0.707, 'pos': 0.293, 'compound': 0.6573}

Wow hubby sounds nice
{'neg': 0.0, 'neu': 0.233, 'pos': 0.767, 'compound': 0.765}

she's the one trump supporter
{'neg': 0.0, 'neu': 0.704, 'pos': 0.296, 'compound': 0.2732}

Yeah not as cool though
{'neg': 0.274, 'neu': 0.419, 'pos': 0.307, 'compound': 0.0613}

Ahhh, but weren't those emails doctored by Russia?
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

WOW what a huge surprise
{'neg': 0.0, 'neu': 0.101, 'pos': 0.899, 'compound': 0.8374}

Just put bad cards into your deck to tech against it.
{'neg': 0.259, 'neu': 0.741, 'pos': 0.0, 'compound': -0.5423}

_[Bismillahs]_ **straps C4 around waist** #ALLAHU AKBAR
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I really hope that alot of this stuff is
{'neg': 0.0, 'neu': 0.687, 'pos': 0.313, 'compound': 0.4927}

That's why you went to USC out of HS.
{'neg': 0.0, 'neu': 1.0,

In [26]:
#NOT SARCASTIC sentiment analisys
for comment in data[data["label"] == 0]["comment"][200:250]:
    print(comment)
    print(run_vader(comment, lemmatize=False))
    print()

The Cupertino Combination Headphone and Charger Ports
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Sympathy: I have none.
{'neg': 0.0, 'neu': 0.444, 'pos': 0.556, 'compound': 0.3612}

Anyone know what part of the VOD this would be from?
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

You can't get mad at the racism in the title and then tell someone who is offended by the word "faggot" to fuck off because it doesn't mean what they think it means.
{'neg': 0.3, 'neu': 0.643, 'pos': 0.056, 'compound': -0.9076}

I think he misspelled "Speed"
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Will this have any effect on valuation of Rupee against Dollar?
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Isn't that like illegal?
{'neg': 0.263, 'neu': 0.373, 'pos': 0.364, 'compound': 0.2057}

You can't expect the Knicks to make the playoffs let alone a top 5 seed.
{'neg': 0.127, 'neu': 0.759, 'pos': 0.114, 'compound': -0.0516}

So goblins retargeting is a hiccup?
{'