### Sentiment Lexicon Features

In [1]:
import pandas as pd
import numpy as np

### Affin scores

In [2]:
df = pd.read_csv('lexicons/afinn.txt', sep='\t')

In [3]:
affin_map = dict()
affin_scores = list()
for index, row in df.iterrows():
    # print(f'{row["word"]} {row["score"]}')
    affin_map[row["word"]] = row["score"]
    affin_scores.append(row["score"])
    
affin_min = min(affin_scores)
affin_max = max(affin_scores)

In [4]:
# get the normalized affin score (range [-1, 1])
def get_affin_score(word):
    if affin_map.get(word) is not None:
        score = affin_map[word]
        return 2 * ((score - affin_min)/(affin_max - affin_min)) - 1
    else:
        return 0

In [5]:
get_affin_score("abandoned")

-0.4

### General Inquirer scores

In [6]:
raw_data = pd.read_excel ('lexicons/inquirerbasic.xls')
raw_data = raw_data.as_matrix()

  


In [7]:
gi_map = dict()
gi_scores = list()
for row in raw_data:
    word = row[0]
    positive = row[2]
    negative = row[3]
    score = 0
    if positive == "Positiv":
        score = 1
    elif negative == "Negativ":
        score = -1
    if word is not True and word is not False:
        gi_map[word.lower()] = score
        gi_scores.append(score)

In [8]:
def get_gi_score(word):
    if gi_map.get(word) is not None:
        return gi_map.get(word)
    else:
        return 0

In [9]:
get_gi_score("ability")

1

### MPQA scores

In [10]:
df_mpqa = pd.read_csv('lexicons/MPQA.txt')
df_mpqa.head()

Unnamed: 0,mpqa
0,type=weaksubj len=1 word1=abandoned pos1=adj s...
1,type=weaksubj len=1 word1=abandonment pos1=nou...
2,type=weaksubj len=1 word1=abandon pos1=verb st...
3,type=strongsubj len=1 word1=abase pos1=verb st...
4,type=strongsubj len=1 word1=abasement pos1=any...


In [11]:
mpqa_map = dict()
mpqa_scores = list()
for index, row in df_mpqa.iterrows():
    splits = row[0].split(' ')
    words = splits[2].split('=')
    scores = splits[len(splits)-1].split('=')
    score = 0
    if scores[1] == 'positive':
        score = 1
    elif scores[1] == 'negative':
        score = -1
    mpqa_map[words[1]] = score
    mpqa_scores.append(score)

In [12]:
def get_mpqa_score(word):
    if mpqa_map.get(word) is not None:
        return mpqa_map.get(word)
    else:
        return 0

In [13]:
get_mpqa_score("happy")

1

### Liu’s scores

In [14]:
df_liu_pos = pd.read_csv('lexicons/liu-positive-words.txt')
df_liu_neg = pd.read_csv('lexicons/liu-negative-words.txt')

In [15]:
df_liu_pos.head()

Unnamed: 0,words
0,a+
1,abound
2,abounds
3,abundance
4,abundant


In [16]:
liu_map = dict()
liu_scores = list()
for index, row in df_liu_pos.iterrows():
    liu_map[row["words"]] = 1
    liu_scores.append(1)
for index, row in df_liu_neg.iterrows():
    liu_map[row["words"]] = -1
    liu_scores.append(1)

In [17]:
def get_liu_score(word):
    if liu_map.get(word) is not None:
        return liu_map.get(word)
    else:
        return 0

In [18]:
get_liu_score("doubtless")

1

### NRC Emotion Lexicon

In [36]:
df_nrc = pd.read_csv('lexicons/NRC.txt', sep='\t')

In [37]:
df_nrc.head(10)

Unnamed: 0,word,emotions,score
0,aback,anger,0
1,aback,anticipation,0
2,aback,disgust,0
3,aback,fear,0
4,aback,joy,0
5,aback,negative,0
6,aback,positive,0
7,aback,sadness,0
8,aback,surprise,0
9,aback,trust,0


In [50]:
nrc_map = dict()
nrc_scores = list()
item = 0
pos_score = None
neg_score = None
done = False
for index, row in df_nrc.iterrows():
    word = row["word"]
    if item == 10:
        item = 0
        pos_score = None
        neg_score = None
        done = False
    if item == 5:
        neg_score = row["score"]
    if item == 6:
        pos_score = row["score"]
    if pos_score is not None and neg_score is not None and not done:
        if pos_score != 0 and neg_score == 0:
            nrc_map[word] = 1
        elif pos_score == 0 and neg_score != 0:
            nrc_map[word] = -1
        else:
            nrc_map[word] = 0
        done = True
    item += 1

In [52]:
def get_nrc_score(word):
    if nrc_map.get(word) is not None:
        return nrc_map.get(word)
    else:
        return 0

In [54]:
get_nrc_score("abyss")

-1