In [23]:
import pandas as pd
from finvader import finvader
data = pd.read_csv('testing_data.csv',index_col=None)

In [24]:
# use the classifier
import numpy as np
data['finvader'] = data.text.apply(finvader,use_sentibignomics = True, use_henry = True, indicator="compound")

In [25]:
# discretize compound indicator into 3 scores
conditions = [
    (data['finvader'] > 0.33),
    (data['finvader']> - 0.33) & (data['finvader'] < 0.33),
    (data['finvader'] < - 0.33)
]
values = [1, 0, -1]
data['finvader'] = np.select(conditions, values)

In [26]:
# calculate true positives (TP), true negatives (TN), false positives (FP) and false negatives (FN)
data['finvader_correct'] = np.where((data['label'] == data['finvader']), 1, 0)
data['finvader_TP'] = np.where((data['label'] == 1) & (data['finvader'] == 1), 1, 0)
data['finvader_TN'] = np.where((data['label'] == -1) & (data['finvader'] == -1), 1, 0)
data['finvader_FP'] = np.where((data['label'] != 1) & (data['finvader'] == 1), 1, 0)
data['finvader_FN'] = np.where((data['label'] != -1) & (data['finvader'] == -1), 1, 0)

In [27]:
# print accuracy, precision, recall, F1 score
print("FinVADER accuracy is:",  round((((data['finvader_correct'].sum()) / len(data))) * 100, 2), "%")
print("FinVADER precision is:",  round((( data['finvader_TP'].sum() / ((data['finvader_TP'].sum() + data['finvader_FP'].sum())))) * 100, 2), "%")
print("FinVADER recall is:",  round((( data['finvader_TP'].sum() / ((data['finvader_TP'].sum() + data['finvader_FN'].sum())))) * 100, 2), "%")
print("FinVADER F1 score is:",  2 * round(
    (((data['finvader_TP'].sum() / ((data['finvader_TP'].sum() + data['finvader_FP'].sum()))) *
      (((data['finvader_TP'].sum() / ((data['finvader_TP'].sum() + data['finvader_FN'].sum())))))) /
     (((data['finvader_TP'].sum() / ((data['finvader_TP'].sum() + data['finvader_FP'].sum()))) +
       (((data['finvader_TP'].sum() / ((data['finvader_TP'].sum() + data['finvader_FN'].sum())))))))
     )* 100, 2), "%")

FinVADER accuracy is: 69.36 %
FinVADER precision is: 60.57 %
FinVADER recall is: 85.45 %
FinVADER F1 score is: 70.88 %


In [28]:
# baseline VADER
# discretize compound indicator into 3 scores
from nltk.sentiment.vader import SentimentIntensityAnalyzer
baseline = SentimentIntensityAnalyzer()

def sentiment_vader(sentence):

    sentiment_dict = baseline.polarity_scores(sentence)

    compound = sentiment_dict['compound']

    if sentiment_dict['compound'] > 0.33 :
        overall_sentiment = 1

    elif sentiment_dict['compound'] < - 0.33 :
        overall_sentiment = - 1

    else :
        overall_sentiment = 0

    return overall_sentiment

In [29]:
# use the classifier
data['vader'] = data.text.apply(sentiment_vader)

In [31]:
# calculate true positives (TP), true negatives (TN), false positives (FP) and false negatives (FN)
data['vader_correct'] = np.where((data['label'] == data['vader']), 1, 0)
data['vader_TP'] = np.where((data['label'] == 1) & (data['vader'] == 1), 1, 0)
data['vader_TN'] = np.where((data['label'] == -1) & (data['vader'] == -1), 1, 0)
data['vader_FP'] = np.where((data['label'] != 1) & (data['vader'] == 1), 1, 0)
data['vader_FN'] = np.where((data['label'] != -1) & (data['vader'] == -1), 1, 0)

In [32]:
# print accuracy, precision, recall, F1 score
print("VADER accuracy is:",  round((((data['vader_correct'].sum()) / len(data))) * 100, 2), "%")
print("VADER precision is:",  round((( data['vader_TP'].sum() / ((data['vader_TP'].sum() + data['vader_FP'].sum())))) * 100, 2), "%")
print("VADER recall is:",  round((( data['vader_TP'].sum() / ((data['vader_TP'].sum() + data['vader_FN'].sum())))) * 100, 2), "%")
print("VADER F1 score is:",  2 * round(
    (((data['vader_TP'].sum() / ((data['vader_TP'].sum() + data['vader_FP'].sum()))) *
      (((data['vader_TP'].sum() / ((data['vader_TP'].sum() + data['vader_FN'].sum())))))) /
     (((data['vader_TP'].sum() / ((data['vader_TP'].sum() + data['vader_FP'].sum()))) +
       (((data['vader_TP'].sum() / ((data['vader_TP'].sum() + data['vader_FN'].sum())))))))
     )* 100, 2), "%")

VADER accuracy is: 58.75 %
VADER precision is: 44.41 %
VADER recall is: 88.28 %
VADER F1 score is: 59.1 %
