In [2]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

### Sentiment Analysis using NLTK Sentiment Intensity Analyzer

In [3]:
hotel_review = ["Great resort to stay in when you visit the Dominican Republic.",
                "Rooms were under renovation when I visited, so the availability was limited.",
                "Love the ocean breeze and the food.",
                "The food is delicious but not over the top.",
                "Service - Little slow, probably because of too many people.",
                "The place is not easy to find.",
                "Prawns cooked in local specialty sauce were tasty."]

In [4]:
sent_analyzer = SentimentIntensityAnalyzer()
for sentence in hotel_review:
    print(sentence)
    scores = sent_analyzer.polarity_scores(sentence)
    for k in scores:
        print('{0}: {1}, '.format(k, scores[k]), end='')
    print()

Great resort to stay in when you visit the Dominican Republic.
neg: 0.0, neu: 0.709, pos: 0.291, compound: 0.6249, 
Rooms were under renovation when I visited, so the availability was limited.
neg: 0.16, neu: 0.84, pos: 0.0, compound: -0.2263, 
Love the ocean breeze and the food.
neg: 0.0, neu: 0.588, pos: 0.412, compound: 0.6369, 
The food is delicious but not over the top.
neg: 0.168, neu: 0.623, pos: 0.209, compound: 0.1184, 
Service - Little slow, probably because of too many people.
neg: 0.0, neu: 1.0, pos: 0.0, compound: 0.0, 
The place is not easy to find.
neg: 0.286, neu: 0.714, pos: 0.0, compound: -0.3412, 
Prawns cooked in local specialty sauce were tasty.
neg: 0.0, neu: 1.0, pos: 0.0, compound: 0.0, 


In [5]:
def normalize(score, alpha=15):
    """
    Normalize the score to be between -1 and 1 using an alpha that
    approximates the max expected value
    """
    norm_score = score/math.sqrt((score*score) + alpha)
    return norm_score

### Sentiment Analysis using Naive Bayes Classifier (training and validation)

In [17]:
from nltk.tokenize import word_tokenize
training_set = [("Great resort to stay in when you visit the Dominican Republic.","pos"),
                ("Rooms were under renovation when I visited, so the availability was limited.","neg"),
                ("Love the ocean breeze and the food.","pos"),
                ("The food is delicious but not over the top.","neg"),
                ("Service - Little slow, probably because of too many people.","neg"),
                ("The place is not easy to find.","neg"),
                ("V interesting!","pos"),
                ("V much interesting!","neg"),
                ("Does that add value?!","neg"),
                ("Prawns cooked in a local specialty sauce were tasty.", "pos")]

  
# Step 2 
dictionary = set(word.lower() for passage in training_set for word in word_tokenize(passage[0]))
  
# Step 3
t = [({word: (word in word_tokenize(x[0])) for word in dictionary}, x[1]) for x in training_set]
  
# Step 4 – the classifier is trained with sample data
classifier = nltk.NaiveBayesClassifier.train(t)
  
test_data = "Service - Little slow, probably because of too many people"
test_data_features = {word.lower(): (word in word_tokenize(test_data.lower())) for word in dictionary}
  
print (classifier.classify(test_data_features))

neg


In [18]:
test_data = "Just way V interesting."
test_data_features = {word.lower(): (word in word_tokenize(test_data.lower())) for word in dictionary}
  
print (classifier.classify(test_data_features))

pos


### Subjectivity analysis using NLTK

In [8]:
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.classify import NaiveBayesClassifier

#### Obtain a set of 100 subjective and 100 objective sentences from NLTK subjectivity corpus

In [9]:
n_instances = 100
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
len(subj_docs), len(obj_docs)

(100, 100)

In [10]:
obj_docs[1] # each input consist of sentence represented as a list of strings, and a label (subj or obj)

(['emerging',
  'from',
  'the',
  'human',
  'psyche',
  'and',
  'showing',
  'characteristics',
  'of',
  'abstract',
  'expressionism',
  ',',
  'minimalism',
  'and',
  'russian',
  'constructivism',
  ',',
  'graffiti',
  'removal',
  'has',
  'secured',
  'its',
  'place',
  'in',
  'the',
  'history',
  'of',
  'modern',
  'art',
  'while',
  'being',
  'created',
  'by',
  'artists',
  'who',
  'are',
  'unconscious',
  'of',
  'their',
  'artistic',
  'achievements',
  '.'],
 'obj')

#### Split into training and testing sets

In [11]:
train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs

In [12]:
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

#### Use simple unigram word features, handling negation:

In [13]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
print(len(unigram_feats))
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

83


#### Apply features to obtain a feature-value representation of the datasets

In [14]:
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)

#### Train classifier and output evaluation results

In [15]:
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)
for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
    print('{0}: {1}'.format(key, value))

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.8
F-measure [obj]: 0.8
F-measure [subj]: 0.8
Precision [obj]: 0.8
Precision [subj]: 0.8
Recall [obj]: 0.8
Recall [subj]: 0.8
