In [4]:
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support

# load the CSV file into a Pandas dataframe
df = pd.read_csv('ds_sentiment/annotation_set_sentiment_extra.csv')

In [5]:
# drop all rows where there is no agreement on sentiment with the annotators
# Filter the rows where the "sentiment_annotator" column is null
null_sentiment = df[df['sentiment_annotator'].isnull()]

# Drop the rows where "sentiment_annotator" is null
df.drop(null_sentiment.index, inplace=True)

In [6]:
print(len(df))

442


### Accuracy 

#### textblob

In [7]:
total = len(df)

num_positive = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob'] == 'positive')])
num_neutral = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob'] == 'neutral')])
num_negative = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob'] == 'negative')])

total_matching = num_positive + num_neutral + num_negative

accuracy = total_matching / total * 100
print(accuracy)

40.95022624434389


#### textblob v2

In [8]:
total = len(df)

num_positive = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob-v2'] == 'positive')])
num_neutral = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob-v2'] == 'neutral')])
num_negative = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob-v2'] == 'negative')])

total_matching = num_positive + num_neutral + num_negative

accuracy = total_matching / total * 100
print(accuracy)

42.081447963800905


#### vader

In [9]:
total = len(df)

num_positive = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_vader'] == 'positive')])
num_neutral = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_vader'] == 'neutral')])
num_negative = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_vader'] == 'negative')])

total_matching = num_positive + num_neutral + num_negative

accuracy = total_matching / total * 100
print(accuracy)

52.26244343891403


## precision

#### textblob

In [10]:
positive_tp = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob'] == 'positive')])
positive_fp = len(df[(df['sentiment_textblob'] == 'positive') & (df['sentiment_annotator'] != 'positive')])
# pos_test = len(df[(df['sentiment_ano'] == 'positive')])
# print(pos_test, positive_fp, positive_tp)
precision_positive = positive_tp / (positive_fp + positive_tp)


neutral_tp = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob'] == 'neutral')])
neutral_fp = len(df[(df['sentiment_textblob'] == 'neutral') & (df['sentiment_annotator'] != 'neutral')])
precision_neutral = neutral_tp / (neutral_fp + neutral_tp)

negative_tp = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob'] == 'negative')])
negative_fp = len(df[(df['sentiment_textblob'] == 'negative') & (df['sentiment_annotator'] != 'negative')])
precision_negative = negative_tp / (negative_fp + negative_tp)



print('precision-positive:', precision_positive)
print('precision-neutral:', precision_neutral)
print('precision-negative:', precision_negative)

total_precision = (precision_positive + precision_neutral + precision_negative) / 3
print('final precision score:', total_precision)

precision-positive: 0.22924901185770752
precision-neutral: 0.34285714285714286
precision-negative: 0.8319327731092437
final precision score: 0.4680129759413647


#### vader

In [11]:
positive_tp = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_vader'] == 'positive')])
positive_fp = len(df[(df['sentiment_vader'] == 'positive') & (df['sentiment_annotator'] != 'positive')])
precision_positive = positive_tp / (positive_fp + positive_tp)


neutral_tp = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_vader'] == 'neutral')])
neutral_fp = len(df[(df['sentiment_vader'] == 'neutral') & (df['sentiment_annotator'] != 'neutral')])
precision_neutral = neutral_tp / (neutral_fp + neutral_tp)

negative_tp = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_vader'] == 'negative')])
negative_fp = len(df[(df['sentiment_vader'] == 'negative') & (df['sentiment_annotator'] != 'negative')])
precision_negative = negative_tp / (negative_fp + negative_tp)



print('precision-positive:', precision_positive)
print('precision-neutral:', precision_neutral)
print('precision-negative:', precision_negative)

total_precision = (precision_positive + precision_neutral + precision_negative) / 3
print('final precision score:', total_precision)

precision-positive: 0.2568306010928962
precision-neutral: 0.35
precision-negative: 0.8190954773869347
final precision score: 0.4753086928266102


Option:
**Show all precision numbers. Explain that the neutral precision score brings it down by a lot. Find a resource of why this is.**

## Recall

#### textblob

In [12]:
positive_tp = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob'] == 'positive')])
positive_fn = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob'] != 'positive')])
recall_positive = positive_tp / (positive_fn + positive_tp)


neutral_tp = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob'] == 'neutral')])
neutral_fn = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob'] != 'neutral')])
recall_neutral = neutral_tp / (neutral_fn + neutral_tp)

negative_tp = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob'] == 'negative')])
negative_fn = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob'] != 'negative')])
recall_negative = negative_tp / (negative_fn + negative_tp)



print('recall-positive:', recall_positive)
print('recall-neutral:', recall_neutral)
print('recall-negative:', recall_negative)

total_recall = (recall_positive + recall_neutral + recall_negative) / 3
print('final recall score:', total_recall) # Find how to calculate this?

recall-positive: 0.7945205479452054
recall-neutral: 0.38095238095238093
recall-negative: 0.3235294117647059
final recall score: 0.49966744688743076


#### vader

In [13]:
positive_tp = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_vader'] == 'positive')])
positive_fn = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_vader'] != 'positive')])
recall_positive = positive_tp / (positive_fn + positive_tp)


neutral_tp = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_vader'] == 'neutral')])
neutral_fn = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_vader'] != 'neutral')])
recall_neutral = neutral_tp / (neutral_fn + neutral_tp)

negative_tp = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_vader'] == 'negative')])
negative_fn = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_vader'] != 'negative')])
recall_negative = negative_tp / (negative_fn + negative_tp)



print('recall-positive:', recall_positive)
print('recall-neutral:', recall_neutral)
print('recall-negative:', recall_negative)

total_recall = (recall_positive + recall_neutral + recall_negative) / 3
print('final recall score:', total_recall) # Find how to calculate this?

recall-positive: 0.6438356164383562
recall-neutral: 0.3333333333333333
recall-negative: 0.5326797385620915
final recall score: 0.5032828961112603


## f1 score

#### textblob

In [14]:
positive_tp = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob'] == 'positive')])
positive_fn = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_textblob'] != 'positive')])
positive_fp = len(df[(df['sentiment_textblob'] == 'positive') & (df['sentiment_annotator'] != 'positive')])
f1_positive = (2 * positive_tp) / (2 * positive_tp + positive_fp + positive_fn)

neutral_tp = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob'] == 'neutral')])
neutral_fn = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_textblob'] != 'neutral')])
neutral_fp = len(df[(df['sentiment_textblob'] == 'neutral') & (df['sentiment_annotator'] != 'neutral')])
f1_neutral = (2 * neutral_tp) / (2 * neutral_tp + neutral_fp + neutral_fn)

negative_tp = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob'] == 'negative')])
negative_fn = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_textblob'] != 'negative')])
negative_fp = len(df[(df['sentiment_textblob'] == 'negative') & (df['sentiment_annotator'] != 'negative')])
f1_negative = (2 * negative_tp) / (2 * negative_tp + negative_fp + negative_fn)


print('f1-positive:', f1_positive)
print('f1-neutral:', f1_neutral)
print('f1-negative:', f1_negative)

total_f1 = (f1_positive + f1_neutral + f1_negative) / 3
print('final f1 score:', total_f1) # Find how to calculate this?


f1-positive: 0.3558282208588957
f1-neutral: 0.3609022556390977
f1-negative: 0.46588235294117647
final f1 score: 0.3942042764797233


#### vader

In [15]:
positive_tp = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_vader'] == 'positive')])
positive_fn = len(df[(df['sentiment_annotator'] == 'positive') & (df['sentiment_vader'] != 'positive')])
positive_fp = len(df[(df['sentiment_vader'] == 'positive') & (df['sentiment_annotator'] != 'positive')])
f1_positive = (2 * positive_tp) / (2 * positive_tp + positive_fp + positive_fn)

neutral_tp = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_vader'] == 'neutral')])
neutral_fn = len(df[(df['sentiment_annotator'] == 'neutral') & (df['sentiment_vader'] != 'neutral')])
neutral_fp = len(df[(df['sentiment_vader'] == 'neutral') & (df['sentiment_annotator'] != 'neutral')])
f1_neutral = (2 * neutral_tp) / (2 * neutral_tp + neutral_fp + neutral_fn)

negative_tp = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_vader'] == 'negative')])
negative_fn = len(df[(df['sentiment_annotator'] == 'negative') & (df['sentiment_vader'] != 'negative')])
negative_fp = len(df[(df['sentiment_vader'] == 'negative') & (df['sentiment_annotator'] != 'negative')])
f1_negative = (2 * negative_tp) / (2 * negative_tp + negative_fp + negative_fn)


print('f1-positive:', f1_positive)
print('f1-neutral:', f1_neutral)
print('f1-negative:', f1_negative)

total_f1 = (f1_positive + f1_neutral + f1_negative) / 3
print('final f1 score:', total_f1) # Find how to calculate this?


f1-positive: 0.3671875
f1-neutral: 0.34146341463414637
f1-negative: 0.6455445544554456
final f1 score: 0.4513984896965306


## calculate using SKlearn metrics

In [16]:
# extract the true sentiment labels and model predictions
y_true = df['sentiment_annotator']
y_textblob = df['sentiment_textblob']
y_textblob2 = df['sentiment_textblob-v2']
y_vader = df['sentiment_vader']



#### textblob comparison

In [17]:
# calculate the precision, recall and f1 scores for TextBlob predictions
precision_textblob, recall_textblob, f1_score_textblob, _ = precision_recall_fscore_support(y_true, y_textblob, average='weighted', labels=['negative', 'neutral', 'positive'])
print("TextBlob precision:", precision_textblob)
print("TextBlob recall:", recall_textblob)
print("TextBlob f1 score:", f1_score_textblob)



TextBlob precision: 0.662684629948057
TextBlob recall: 0.4095022624434389
TextBlob f1 score: 0.4327427652216347


In [18]:
# calculate the precision, recall and f1 scores for TextBlob predictions
precision_textblob, recall_textblob, f1_score_textblob, _ = precision_recall_fscore_support(y_true, y_textblob2, average='weighted', labels=['negative', 'neutral', 'positive'])
print("TextBlob 2 precision:", precision_textblob)
print("TextBlob 2 recall:", recall_textblob)
print("TextBlob 2 f1 score:", f1_score_textblob)



TextBlob 2 precision: 0.6592799216218422
TextBlob 2 recall: 0.42081447963800905
TextBlob 2 f1 score: 0.44922121601839377


In [19]:
# calculate the precision, recall and f1 scores for TextBlob predictions
precision_textblob, recall_textblob, f1_score_textblob, _ = precision_recall_fscore_support(y_true, y_textblob, average=None, labels=['negative', 'neutral', 'positive'])
print("TextBlob precision:", precision_textblob)
print("TextBlob recall:", recall_textblob)
print("TextBlob f1 score:", f1_score_textblob)



TextBlob precision: [0.83193277 0.34285714 0.22924901]
TextBlob recall: [0.32352941 0.38095238 0.79452055]
TextBlob f1 score: [0.46588235 0.36090226 0.35582822]


In [20]:
# calculate the precision, recall and f1 scores for TextBlob predictions
precision_textblob, recall_textblob, f1_score_textblob, _ = precision_recall_fscore_support(y_true, y_textblob2, average=None, labels=['negative', 'neutral', 'positive'])
print("TextBlob 2 precision:", precision_textblob)
print("TextBlob 2 recall:", recall_textblob)
print("TextBlob 2 f1 score:", f1_score_textblob)



TextBlob 2 precision: [0.828125   0.34177215 0.22553191]
TextBlob 2 recall: [0.34640523 0.42857143 0.7260274 ]
TextBlob 2 f1 score: [0.48847926 0.38028169 0.34415584]


#### Vader scores

In [25]:
# calculate the precision, recall and f1 scores for Vader predictions
precision_vader, recall_vader, f1_score_vader, _ = precision_recall_fscore_support(y_true, y_vader, average=None, labels=['negative', 'neutral', 'positive'])
print("Vader precision:", precision_vader)
print("Vader recall:", recall_vader)
print("Vader f1 score:", f1_score_vader)

Vader precision: [0.81909548 0.35       0.2568306 ]
Vader recall: [0.53267974 0.33333333 0.64383562]
Vader f1 score: [0.64554455 0.34146341 0.3671875 ]


In [19]:
# calculate the precision, recall and f1 scores for Vader predictions
precision_vader, recall_vader, f1_score_vader, _ = precision_recall_fscore_support(y_true, y_vader, average='weighted', labels=['negative', 'neutral', 'positive'])
print("Vader precision:", precision_vader)
print("Vader recall:", recall_vader)
print("Vader f1 score:", f1_score_vader)

Vader precision: 0.6593707012673834
Vader recall: 0.5226244343891403
Vader f1 score: 0.5562296748536596
