In [54]:
import pandas as pd
import numpy as np
from afinn import Afinn
import nltk
from nltk.corpus import sentiwordnet as swn
from normalization import normalize_accented_characters, html_parser, strip_html
from utils import display_evaluation_metrics, display_confusion_matrix, display_classification_report
nltk.download('averaged_perceptron_tagger')
nltk.download('sentiwordnet')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


True

## Train,test set separation

In [74]:
dataset = pd.read_csv(r'datasets/movie_reviews.csv')
dataset=dataset[:1000]

train_data = dataset[:950]
test_data = dataset[950:]

test_reviews = np.array(test_data['review'])
test_sentiments = np.array(test_data['sentiment'])

## EMOTICONS
afn = Afinn(emoticons=True) 

## Evaluation for unsupervised Lexicon sentiment tagging

#### compare against the sentence tagging (already provided in the dataset )

In [79]:
### BORROWED FROM THE AR_SARKAR METRIC
def analyze_sentiment_sentiwordnet_lexicon(review,
                                           verbose=False):
    
    review = html_parser.unescape(review)
    review = strip_html(review)
    
    text_tokens = nltk.word_tokenize(review)
    tagged_text = nltk.pos_tag(text_tokens)
    pos_score = neg_score = token_count = obj_score = 0

    for word, tag in tagged_text:
        ss_set = None
        if 'NN' in tag and swn.senti_synsets(word, 'n'):
            ss_set = list(swn.senti_synsets(word, 'n'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'VB' in tag and swn.senti_synsets(word, 'v'):
            ss_set = list(swn.senti_synsets(word, 'v'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'JJ' in tag and swn.senti_synsets(word, 'a'):
            ss_set = list(swn.senti_synsets(word, 'a'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'RB' in tag and swn.senti_synsets(word, 'r'):
            ss_set = list(swn.senti_synsets(word, 'r'))
            if ss_set : 
                ss_set=ss_set[0]
        
        if ss_set:
            
            pos_score += ss_set.pos_score()
            neg_score += ss_set.neg_score()
            obj_score += ss_set.obj_score()
            token_count += 1
    
    
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_sentiment = 'positive' if norm_final_score >= 0 else 'negative'
    if verbose:
        norm_obj_score = round(float(obj_score) / token_count, 2)
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        
        sentiment_frame = pd.DataFrame([[final_sentiment, norm_obj_score,
                                         norm_pos_score, norm_neg_score,
                                         norm_final_score]],
                                         columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment', 'Objectivity',
                                                                       'Positive', 'Negative', 'Overall']], 
                                                              labels=[[0,0,0,0,0],[0,1,2,3,4]]))
        print (sentiment_frame)   
    return final_sentiment
            
                                                               

sentiwordnet_predictions = [analyze_sentiment_sentiwordnet_lexicon(review) for review in test_reviews]

                                           

#### Basline lexicon

In [80]:

print ('Performance metrics:')
display_evaluation_metrics(true_labels=test_sentiments,
                           predicted_labels=sentiwordnet_predictions,
                           positive_class='positive')  
print ('\nConfusion Matrix:'             )              
display_confusion_matrix(true_labels=test_sentiments,
                         predicted_labels=sentiwordnet_predictions,
                         classes=['positive', 'negative'])
print ('\nClassification report:' )                        
display_classification_report(true_labels=test_sentiments,
                              predicted_labels=sentiwordnet_predictions,
                              classes=['positive', 'negative'])   

Performance metrics:
Accuracy: 0.66
Precision: 0.59
Recall: 0.96
F1 Score: 0.73

Confusion Matrix:
                 Predicted:         
                   positive negative
Actual: positive         23        1
        negative         16       10

Classification report:
              precision    recall  f1-score   support

    positive       0.59      0.96      0.73        24
    negative       0.91      0.38      0.54        26

   micro avg       0.66      0.66      0.66        50
   macro avg       0.75      0.67      0.64        50
weighted avg       0.76      0.66      0.63        50



### EVALUATION IS BASED ON COMPARING THE LEXICON GENERATED METRIC TO THE USER GIVEN SENTIMENT.

### true_labels : already tagged sentiment
### predicted_labels : single sent. score produced by summation over the lexicons. 

### Here we can make the improvements using two techniques : Google, stanford and NUS papers. 
