# Unsupervised sentiment analyses based on movie reviews
## Based on 50,000 labeled IMDb movie reviews [dataset](http://ai.stanford.edu/~amaas/data/sentiment/)

# Import necessary dependencies

In [24]:
import pandas as pd
import numpy as np

# Python files with functions
import text_normalizer as tn
import model_evaluation_utils as meu
# ~Python files

np.set_printoptions(precision=2, linewidth=80)

# Load and normalize data

In [25]:
dataset = pd.read_csv('movie_reviews.csv')

reviews = np.array(dataset['review'])
sentiments = np.array(dataset['sentiment'])

# extract data for model evaluation
test_size = 500
test_reviews = reviews[-test_size:]
test_sentiments = sentiments[-test_size:]

In [67]:
# some random reviews for future in-depth interpretation of the results of our models on them
# sample_review_ids = [7626, 3533, 13010]
sample_review_ids = [442,154,15]

In [27]:
# normalize data
norm_test_reviews = tn.normalize_corpus(test_reviews)

# Sentiment analysis with AFINN

In [28]:
# !git clone https://github.com/fnielsen/afinn/
import sys
import os

module_path = os.path.abspath(os.path.join('') + 'afinn\\afinn\\')
if module_path not in sys.path:
    sys.path.append(module_path)
from afinn import Afinn

In [29]:
afn = Afinn(emoticons=True)

## Predict sentiment for sample reviews

In [68]:
for review, sentiment in zip(test_reviews[sample_review_ids],
                             test_sentiments[sample_review_ids]):
    print('Review: ', review)
    print('Actual sentiment: ', sentiment)
    print('Predicted sentiment polarity: ', afn.score(review))
    print('-' * 60)

Review:  The movie is not that bad, Ringo Lam sucks. I hate when Van Damme has love in his movies, van Damme is good only when he doesn't have love in his movies.
Actual sentiment:  negative
Predicted sentiment polarity:  0.0
------------------------------------------------------------
Review:  This film is one of Michael Keaton's best. Throughout the film he is 'on'. With co-stars like Ms. Henner, Joe Piscopo and Danny DeVito, you can't go wrong. Great laughs, great fun for everyone.
Actual sentiment:  positive
Predicted sentiment polarity:  14.0
------------------------------------------------------------
Review:  A quite easy to watch tale of 2 thieves, with that love/hate type relationship between them. Chrisopher Walken stars and is very good as the silent rogue with a scam bigger than he's letting on.
Actual sentiment:  positive
Predicted sentiment polarity:  2.0
------------------------------------------------------------


+ the text used in a raw form because AFINN takes into account emoticons, exclamations etc.

## Predict sentiment for test dataset

In [33]:
sentiment_polarity = [afn.score(review) for review in test_reviews]
predicted_sentiments = [
    'positive' if score >= 1. else 'negative' for score in sentiment_polarity
]

+ the threshold should be chosen based on analyzing corpora

# Evaluate AFINN model performance

In [34]:
meu.display_model_performance_metrics(
    true_labels=test_sentiments,
    predicted_labels=predicted_sentiments,
    classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.704
Precision: 0.7127
Recall: 0.704
F1 Score: 0.7005

Model Classification report:
------------------------------
             precision    recall  f1-score   support

   positive       0.67      0.81      0.73       252
   negative       0.76      0.60      0.67       248

avg / total       0.71      0.70      0.70       500


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive        204       48
        negative        100      148


+ many negative were classified as positive, fine-tuning the threshiold may be useful 

# Sentiment analysis with SentiWordNet

In [35]:
from nltk.corpus import sentiwordnet as swn

awesome = list(swn.senti_synsets('awesome', 'a'))[0]
print('Positive polarity score: ', awesome.pos_score())
print('Negative polarity score: ', awesome.neg_score())
print('Objective score: ', awesome.obj_score())

Positive polarity score:  0.875
Negative polarity score:  0.125
Objective score:  0.0


### [senti_synsets description](https://stackoverflow.com/a/38263475)

# Build SentiWordNet model

In [57]:
def analyze_sentiment_sentiwordnet_lexicon(review, verbose=False):
    # tokenize and POS tag text tokens
    tagged_text = [(token.text, token.tag_) for token in tn.nlp(review)]

    pos_score = neg_score = token_count = obj_score = 0.

    # get word synsets based on POS tags
    # get sentiment scores if synstets are found
    for word, tag in tagged_text:
        ss_set = None
        if 'NN' in tag and list(swn.senti_synsets(word, 'n')):
            ss_set = list(swn.senti_synsets(word, 'n'))[0]
        elif 'VB' in tag and list(swn.senti_synsets(word, 'v')):
            ss_set = list(swn.senti_synsets(word, 'v'))[0]
        elif 'JJ' in tag and list(swn.senti_synsets(word, 'a')):
            ss_set = list(swn.senti_synsets(word, 'a'))[0]
        elif 'RB' in tag and list(swn.senti_synsets(word, 'r')):
            ss_set = list(swn.senti_synsets(word, 'r'))[0]
        # if synset is found
        if ss_set:
            # add scores for all found synsets
            pos_score += ss_set.pos_score()
            neg_score += ss_set.neg_score()
            obj_score += ss_set.obj_score()
            token_count += 1

    # aggregate final scores
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_statement = 'positive' if norm_final_score >= 0 else 'negative'

    if verbose:
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        norm_obj_score = round(float(obj_score) / token_count, 2)

        # display te results
        df = pd.DataFrame(
            [[
                final_statement, norm_obj_score, norm_pos_score,
                norm_neg_score, norm_final_score
            ]],
            columns=pd.MultiIndex(
                levels=[['Sentiment stats'], [
                    'Prediced sentiment', 'Objectivity', 'Positive',
                    'Negative', 'Overall'
                ]],
                labels=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]]))
        print(df)

    return final_statement

## Predict sentiment for sample reviews

In [69]:
for review, sentiment in zip([norm_test_reviews[i] for i in sample_review_ids],
                             test_sentiments[sample_review_ids]):
    print('Review: ', review)
    print('Actual sentiment: ', sentiment)
    pred_sentiment = analyze_sentiment_sentiwordnet_lexicon(
        review, verbose=True)
    print('-' * 60)

Review:  movie not bad ringo lam suck hate van damme love movie van damme good not love movie
Actual sentiment:  negative
     Sentiment stats                                      
  Prediced sentiment Objectivity Positive Negative Overall
0           negative         0.7     0.14     0.16   -0.02
------------------------------------------------------------
Review:  film one michael keaton best throughout film co star like ms henner joe piscopo danny devito not go wrong great laugh great fun everyone
Actual sentiment:  positive
     Sentiment stats                                      
  Prediced sentiment Objectivity Positive Negative Overall
0           positive        0.86     0.09     0.05    0.04
------------------------------------------------------------
Review:  quite easy watch tale 2 thief love hate type relationship chrisopher walken star good silent rogue scam big let
Actual sentiment:  positive
     Sentiment stats                                      
  Prediced sentiment

## Predict sentiment for test dataset

In [59]:
predicted_sentiments = [
    analyze_sentiment_sentiwordnet_lexicon(review)
    for review in norm_test_reviews
]

## Evaluate SentiWordNet model performance

In [60]:
meu.display_model_performance_metrics(
    true_labels=test_sentiments,
    predicted_labels=predicted_sentiments,
    classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.67
Precision: 0.6747
Recall: 0.67
F1 Score: 0.6673

Model Classification report:
------------------------------
             precision    recall  f1-score   support

   positive       0.65      0.76      0.70       252
   negative       0.70      0.58      0.64       248

avg / total       0.67      0.67      0.67       500


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive        191       61
        negative        104      144


# Sentiment analysis with VADER

In [70]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Build VADER model

In [72]:
def analyze_sentiment_vader_model(review, threshold=0.1, verbose=False):
    # pre-process text
    review = tn.strip_html_tags(review)
    review = tn.remove_accented_chars(review)
    review = tn.expand_contractions(review)

    # analyze the sentiment for review
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(review)
    # get aggregate scores and final sentiment
    agg_score = scores['compound']
    final_sentiment = 'positive' if agg_score >= threshold else 'negative'

    if verbose:
        # display detailed sentiment statistics
        positive = str(round(scores['pos'], 2) * 100) + '%'
        negative = str(round(scores['neg'], 2) * 100) + '%'
        neutral = str(round(scores['neu'], 2) * 100) + '%'
        final = round(agg_score, 2)

        df = pd.DataFrame(
            [[final_sentiment, final, positive, negative, neutral]],
            columns=pd.MultiIndex(
                levels=[['Sentiment stats:'], [
                    'Predicted sentiment', 'Polarity score', 'Positive',
                    'Negative', 'Neutral'
                ]],
                labels=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]]))
        print(df)
    return final_sentiment

+ during preprocessing punctuation and emoticons were kept intact
### Recommendations for threshold value
+ for positive > 0.5
+ for neutral [-0.5, 0.5]
+ for negative < -0.5

## Predict sentiment for sample reviews

In [73]:
for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('Review: ',review)
    print('Actual sentiment: ',sentiment)
    pred_sentiment = analyze_sentiment_vader_model(review,verbose=True)

Review:  The movie is not that bad, Ringo Lam sucks. I hate when Van Damme has love in his movies, van Damme is good only when he doesn't have love in his movies.
Actual sentiment:  negative
     Sentiment stats:                                                     
  Predicted sentiment Polarity score Positive Negative             Neutral
0            positive           0.84    30.0%    13.0%  56.00000000000001%
Review:  This film is one of Michael Keaton's best. Throughout the film he is 'on'. With co-stars like Ms. Henner, Joe Piscopo and Danny DeVito, you can't go wrong. Great laughs, great fun for everyone.
Actual sentiment:  positive
     Sentiment stats:                                                    
  Predicted sentiment Polarity score Positive            Negative Neutral
0            positive           0.95    40.0%  7.000000000000001%   54.0%
Review:  A quite easy to watch tale of 2 thieves, with that love/hate type relationship between them. Chrisopher Walken stars and i

## Predict sentiment for test dataset

In [74]:
predicted_sentiments = [analyze_sentiment_vader_model(review) for review in test_reviews]

## Evaluate VADER model performance

In [75]:
meu.display_model_performance_metrics(
    true_labels=test_sentiments,
    predicted_labels=predicted_sentiments,
    classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.698
Precision: 0.7068
Recall: 0.698
F1 Score: 0.6943

Model Classification report:
------------------------------
             precision    recall  f1-score   support

   positive       0.67      0.81      0.73       252
   negative       0.75      0.59      0.66       248

avg / total       0.71      0.70      0.69       500


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive        203       49
        negative        102      146


# As the result the best model is AFINN