In [12]:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
import nltk
from nltk import pos_tag, word_tokenize
from nltk.corpus import wordnet
import pandas as pd
from nltk.stem import WordNetLemmatizer
! pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
nltk.download('punkt')
nltk.download('wordnet')

nltk.download('averaged_perceptron_tagger')
path = "data/posts5months.pkl"
senti_df = pd.read_pickle(path)
senti_df = senti_df.head(100)
senti_df['text'] = senti_df.title + ' ' + senti_df.selftext
original_df = senti_df.drop(['title', 'selftext'], axis = 1)
print(senti_df.text.head())

def get_wordnet_pos(pos_tag):
    if pos_tag.startswith('J'):
        return wordnet.ADJ
    elif pos_tag.startswith('V'):
        return wordnet.VERB
    elif pos_tag.startswith('N'):
        return wordnet.NOUN
    elif pos_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

for text in senti_df['text']:
    tokenized = nltk.word_tokenize(text)
    pos_tags = pos_tag(tokenized)
    text = [WordNetLemmatizer().lemmatize(t[0], get_wordnet_pos(t[1])) for t in pos_tags]
    text = [t for t in text if len(t) > 1]
print(senti_df.head())

def get_sentiment_score(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    return score['compound']

# Return new DataFrame with sentiment score
def calculate_sentiment_score(df):
    def sentiment_score(row):
        score = get_sentiment_score(row['text'])

        scaled_score = score * row['score']
        return score

    senti_df['sentiment'] = senti_df.apply(lambda x: sentiment_score(x), axis=1)
    return df

print('Calculating sentiment score')
df = calculate_sentiment_score(senti_df)

# df.to_csv('results.csv')
normalized_result = df.sentiment.mean()

print('Completed!\n')
print('Average sentiment:', normalized_result)
print('Range: +1 is positive and -1 is negative')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
0    Looking for a mathematical proof or any furthe...
1    A question about elliptic curves I just stumbl...
2    Finally accepted math as my passion Transfer C...
3    Stuck between two departments and don’t know h...
4    AMD Polaris 12nm refresh coming in October and...
Name: text, dtype: object
   created_utc  ...                                               text
0   1538871394  ...  Looking for a mathematical proof or any furthe...
1   1538881895  ...  A question about elliptic curves I just stumbl...
2   1538887632  ...  Finally accepted math as my passion Transfer C...
3

In [26]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


In [39]:
print(df.sentiment[85:])
senti_df['title'][85:]

85    0.8296
86    0.8857
87    0.9533
88    0.9890
89    0.9978
90    0.9321
91   -0.8224
92    0.9743
93   -0.9626
94    0.9509
95    0.9812
96    0.9842
97    0.9650
98    0.9422
99    0.9654
Name: sentiment, dtype: float64


85    I posted a question on here last night and my ...
86    Have a new girlfriend and didn't realize how a...
87    a friend of a friend groped me in my “sleep” l...
88    Sex with my (25M) girlfriend (21F) never lasts...
89    My husband won’t tongue kiss me. It really hurts.
90                    Any tips for going down on a girl
91    FWB is upset and feels disrespected because I ...
92    Sony showed the first 35 min of “Spider-Man: I...
93    What are some craziest Revenge movies like Old...
94    Why does almost everyone dislike John Carpente...
95    Unofficial Discussion: Johnny English Strikes ...
96              Where can I see Mandy (2018) in the UK?
97               "The Star Wars Holiday Special" (1978)
98    "The Hate U Give" was surprisingly heavy and p...
99    What was the best film you watched this week? ...
Name: title, dtype: object