In [1]:
import pandas as pd

In [4]:
df = pd.read_csv("../data/Sentiment Dataset.csv", nrows=100, usecols=['ItemID', 'Sentiment', 'SentimentText'])

In [5]:
df.head()

Unnamed: 0,ItemID,Sentiment,SentimentText
0,1,0,is so sad for my APL frie...
1,2,0,I missed the New Moon trail...
2,3,1,omg its already 7:30 :O
3,4,0,.. Omgaga. Im sooo im gunna CRy. I'...
4,5,0,i think mi bf is cheating on me!!! ...


In [6]:
from afinn import Afinn

In [7]:
affin = Afinn()

In [8]:
df['afinn'] =  df.apply(lambda x: affin.score(x['SentimentText']), axis=1)

In [9]:
df.columns

Index(['ItemID', 'Sentiment', 'SentimentText', 'afinn'], dtype='object')

In [10]:
df[['SentimentText', 'Sentiment','afinn']].head(10)

Unnamed: 0,SentimentText,Sentiment,afinn
0,is so sad for my APL frie...,0,-1.0
1,I missed the New Moon trail...,0,-2.0
2,omg its already 7:30 :O,1,0.0
3,.. Omgaga. Im sooo im gunna CRy. I'...,0,-1.0
4,i think mi bf is cheating on me!!! ...,0,-3.0
5,or i just worry too much?,0,-3.0
6,Juuuuuuuuuuuuuuuuussssst Chillin!!,1,0.0
7,Sunny Again Work Tomorrow :-| ...,0,0.0
8,handed in my uniform today . i miss you ...,1,-2.0
9,hmmmm.... i wonder how she my number @-),1,0.0


In [12]:
pd.set_option('display.max_colwidth', 100)

In [13]:
# ! pip install vaderSentiment 

In [14]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [15]:
def vader_sentiment(sentences):
    analyzer = SentimentIntensityAnalyzer()
    return {'vaderSentiment_' + k : v for k,v in analyzer.polarity_scores(sentences).items()}


In [16]:
vaderSentiments = df.apply(lambda x: vader_sentiment(x['SentimentText']), axis=1)

In [17]:
df = df.merge(pd.DataFrame(vaderSentiments.tolist()), left_index=True, right_index=True)

In [18]:
df.columns

Index(['ItemID', 'Sentiment', 'SentimentText', 'afinn',
       'vaderSentiment_compound', 'vaderSentiment_neg', 'vaderSentiment_neu',
       'vaderSentiment_pos'],
      dtype='object')

In [19]:
df[['SentimentText', 'Sentiment', 'afinn','vaderSentiment_compound', 'vaderSentiment_neg', 'vaderSentiment_neu',
    'vaderSentiment_pos']].head(10)

Unnamed: 0,SentimentText,Sentiment,afinn,vaderSentiment_compound,vaderSentiment_neg,vaderSentiment_neu,vaderSentiment_pos
0,is so sad for my APL friend.............,0,-1.0,-0.5256,0.361,0.639,0.0
1,I missed the New Moon trailer...,0,-2.0,-0.296,0.355,0.645,0.0
2,omg its already 7:30 :O,1,0.0,-0.2808,0.348,0.652,0.0
3,.. Omgaga. Im sooo im gunna CRy. I've been at this dentist since 11.. I was suposed 2...,0,-1.0,-0.4767,0.129,0.871,0.0
4,i think mi bf is cheating on me!!! T_T,0,-3.0,-0.6679,0.39,0.61,0.0
5,or i just worry too much?,0,-3.0,-0.4404,0.42,0.58,0.0
6,Juuuuuuuuuuuuuuuuussssst Chillin!!,1,0.0,0.0,0.0,1.0,0.0
7,Sunny Again Work Tomorrow :-| TV Tonight,0,0.0,0.2732,0.179,0.526,0.295
8,handed in my uniform today . i miss you already,1,-2.0,-0.1531,0.186,0.814,0.0
9,hmmmm.... i wonder how she my number @-),1,0.0,0.0772,0.0,0.822,0.178


# SentinelWordNet

In [20]:
from nltk.corpus import sentiwordnet as swn

In [21]:
slow = swn.senti_synsets('happy')

In [22]:
list(slow)

[SentiSynset('happy.a.01'),
 SentiSynset('felicitous.s.02'),
 SentiSynset('glad.s.02'),
 SentiSynset('happy.s.04')]

In [23]:
slow = swn.senti_synsets('sad')

In [24]:
list(slow)

[SentiSynset('sad.a.01'),
 SentiSynset('sad.s.02'),
 SentiSynset('deplorable.s.01')]

---

# Load inquirer_basic lexicon

In [26]:
general_inquiere = pd.read_csv(r"C:\Users\C18244A\Documents\Datathon 2019\text-analysis\lexicons_external\inquirerbasic.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [27]:
general_inquiere.shape

(11788, 186)

In [28]:
general_inquiere.head()

Unnamed: 0,Entry,Source,Positiv,Negativ,Pstv,Affil,Ngtv,Hostile,Strong,Power,...,Anomie,NegAff,PosAff,SureLw,If,NotLw,TimeSpc,FormLw,Othtags,Defined
0,A,H4Lvd,,,,,,,,,...,,,,,,,,,DET ART,| article: Indefinite singular article--some or any one
1,ABANDON,H4Lvd,,Negativ,,,Ngtv,,,,...,,,,,,,,,SUPV,|
2,ABANDONMENT,H4,,Negativ,,,,,,,...,,,,,,,,,Noun,|
3,ABATE,H4Lvd,,Negativ,,,,,,,...,,,,,,,,,SUPV,|
4,ABATEMENT,Lvd,,,,,,,,,...,,,,,,,,,Noun,


# Load emo_sentic_net lexicon

In [29]:
emo_lexicon = pd.read_csv('../lexicons/emo_sentic_net_new.csv')

In [31]:
emo_lexicon.head()

Unnamed: 0,Concepts,Positive,Negative
0,weltschmerz,1,0
1,detachment,0,1
2,unrest,0,1
3,unfriendliness,0,1
4,unconcern,0,1


# Eval new features

In [32]:
df.head(5)

Unnamed: 0,ItemID,Sentiment,SentimentText,afinn,vaderSentiment_compound,vaderSentiment_neg,vaderSentiment_neu,vaderSentiment_pos
0,1,0,is so sad for my APL friend.............,-1.0,-0.5256,0.361,0.639,0.0
1,2,0,I missed the New Moon trailer...,-2.0,-0.296,0.355,0.645,0.0
2,3,1,omg its already 7:30 :O,0.0,-0.2808,0.348,0.652,0.0
3,4,0,.. Omgaga. Im sooo im gunna CRy. I've been at this dentist since 11.. I was suposed 2...,-1.0,-0.4767,0.129,0.871,0.0
4,5,0,i think mi bf is cheating on me!!! T_T,-3.0,-0.6679,0.39,0.61,0.0


In [33]:
new_df = df[['SentimentText', 'Sentiment', 'afinn', 'vaderSentiment_compound', 'vaderSentiment_neg', 'vaderSentiment_neu', 'vaderSentiment_pos']]

In [34]:
%run ../dataprep/symbolic_approach.py

In [35]:
evaluate(new_df['SentimentText'][1].replace('..', '').strip())

-1

In [36]:
import nltk

In [37]:
for i in new_df['SentimentText'][:10]:
    tokenized = nltk.word_tokenize(i.replace('.', '').strip())
    print('Value: ', evaluate(tokenized), 'Tokents: ', tokenized)

Value:  -1 Tokents:  ['is', 'so', 'sad', 'for', 'my', 'APL', 'friend']
Value:  -1 Tokents:  ['I', 'missed', 'the', 'New', 'Moon', 'trailer']
Value:  -1 Tokents:  ['omg', 'its', 'already', '7:30', ':', 'O']
Value:  -1 Tokents:  ['Omgaga', 'Im', 'sooo', 'im', 'gunna', 'CRy', 'I', "'ve", 'been', 'at', 'this', 'dentist', 'since', '11', 'I', 'was', 'suposed', '2', 'just', 'get', 'a', 'crown', 'put', 'on', '(', '30mins', ')']
Value:  -1 Tokents:  ['i', 'think', 'mi', 'bf', 'is', 'cheating', 'on', 'me', '!', '!', '!', 'T_T']
Value:  -1 Tokents:  ['or', 'i', 'just', 'worry', 'too', 'much', '?']
Value:  -1 Tokents:  ['Juuuuuuuuuuuuuuuuussssst', 'Chillin', '!', '!']
Value:  -1 Tokents:  ['Sunny', 'Again', 'Work', 'Tomorrow', ':', '-|', 'TV', 'Tonight']
Value:  -1 Tokents:  ['handed', 'in', 'my', 'uniform', 'today', 'i', 'miss', 'you', 'already']
Value:  -1 Tokents:  ['hmmmm', 'i', 'wonder', 'how', 'she', 'my', 'number', '@', '-', ')']


In [38]:
emo_lexicon.shape

(13189, 3)

# Different lexicons:
    afinn - [-5; 5]
    vader - [0 - 1]
    emo - pol [0-2] neg[0-4]