<a href="https://colab.research.google.com/github/RahimMirani/Bidirectional-LSTM-for-NLP/blob/main/Bayes_Sentiment_Analysis_Classifier_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
import nltk
nltk.download('subjectivity')
nltk.download('punkt_tab')
nltk.download('vader_lexicon')

[nltk_data] Downloading package subjectivity to /root/nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [26]:
n_instances = 100
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
len(subj_docs), len(obj_docs)
(100, 100)

(100, 100)

In [27]:
subj_docs[0]

(['smart',
  'and',
  'alert',
  ',',
  'thirteen',
  'conversations',
  'about',
  'one',
  'thing',
  'is',
  'a',
  'small',
  'gem',
  '.'],
 'subj')

In [28]:
train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs

In [29]:
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

In [30]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
len(unigram_feats)
83
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

In [31]:
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)

In [32]:
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)

for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
     print('{0}: {1}'.format(key, value))

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.8
F-measure [obj]: 0.8
F-measure [subj]: 0.8
Precision [obj]: 0.8
Precision [subj]: 0.8
Recall [obj]: 0.8
Recall [subj]: 0.8


In [33]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentences = [
   "Paris is beautiful, romantic, and vibrant.",
   "Paris is beautiful, romantic, and vibrant!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
   "Paris is absolutely beautiful, romantic, and vibrant.",  # booster words handled correctly (sentiment intensity adjusted)
   "Paris is ABSOLUTELY BEAUTIFUL, romantic, and VIBRANT.",  # emphasis for ALLCAPS handled
   "Paris is ABSOLUTELY BEAUTIFUL, romantic, and VIBRANT!!!",# combination of signals - appropriate intensity adjustment
   "Paris is ABSOLUTELY BEAUTIFUL, truly romantic, and INCREDIBLY VIBRANT!!!",# booster words & punctuation make this close to ceiling for score
   "The pizza was delicious.",         # positive sentence
   "The pizza was kind of delicious.", # qualified positive sentence is handled correctly (intensity adjusted)
   "The crust was crispy, but the cheese was bland and the sauce was too sweet.", # mixed negation sentence
   "A terribly dry, flavorless sandwich.",       # negative sentence with booster words
   "At least it wasn't the worst sandwich ever.", # negated negative sentence with contraction
   ":) and 🍕",     # emoticons handled
   "",              # an empty string is correctly handled
   "This hotel sucks",     # negative slang handled
   "This hotel sucks!",    # negative slang with punctuation emphasis handled
   "THIS HOTEL SUCKS!",    # negative slang with capitalization emphasis
   "This hotel is kinda bad! But the view from the balcony is amazing, lol" # mixed sentiment example with slang and contrastive conjunction "but"
]

paragraph = "It was one of the worst dining experiences I've had, despite the high ratings. \
Unbelievably slow service!! Cold food. VERY disappointing ambiance. \
The restaurant was bad. Very bad restaurant. VERY bad restaurant. VERY BAD restaurant. VERY BAD restaurant!"

In [34]:
from nltk import tokenize
lines_list = tokenize.sent_tokenize(paragraph)
sentences.extend(lines_list)

In [35]:
tricky_sentences = [
  "Most online restaurant reviews are garbage.",
   "Michelin-starred dining is the best.",
   "Food reviews have never been reliable.",
   "Food reviews on this app have never been this reliable.",
   "Gordon Ramsay has never been so entertaining.",
   "I won't say that the restaurant is spectacular, and I wouldn't claim that \
   the restaurant is too mediocre either.",
   "I like to complain about expensive restaurants, but I couldn't find fault with this one.",
   "I like to complain about expensive restaurants, BUT I couldn't help but criticize this one.",
   "It's one thing to try street food, but another thing entirely \
   to get food poisoning from it.",
   "The dessert was too good.",
   "This dish was actually neither that flavorful nor very well presented.",
   "This restaurant doesn’t care about quality, service, or any other kind of \
   fine dining experience.",
   "Those who see flaws in authentic cuisine are missing out on a true culinary experience.",
   "There are bland and uninspired dishes, BUT the chef adds just enough creativity to \
   keep it exciting.",
   "The menu is not extraordinary, but the service is excellent and the ambiance is PERFECT!",
   "Le Bernardin is one of the most unforgettable fine dining experiences.",
   "Le Bernardin is one of the least remarkable fine dining experiences.",
   "Le Bernardin is at least enjoyable as a fine dining experience.",
   "People fall in love with a dish...",
   "but then it tastes different the next time.",
   "usually around the time they start recommending it to their friends.",
   "The Eiffel Tower lights went off tonight.",
   "However, Chef Pierre solemnly argues, his team prepared the dish \
   exactly as the customer ordered it—in the 'least offensive way possible.'"
]
sentences.extend(tricky_sentences)
for sentence in sentences:
    sid = SentimentIntensityAnalyzer()
    print(sentence)
    ss = sid.polarity_scores(sentence)
    for k in sorted(ss):
        print('{0}: {1}, '.format(k, ss[k]), end='')
    print()

Paris is beautiful, romantic, and vibrant.
compound: 0.875, neg: 0.0, neu: 0.231, pos: 0.769, 
Paris is beautiful, romantic, and vibrant!
compound: 0.8832, neg: 0.0, neu: 0.226, pos: 0.774, 
Paris is absolutely beautiful, romantic, and vibrant.
compound: 0.8903, neg: 0.0, neu: 0.275, pos: 0.725, 
Paris is ABSOLUTELY BEAUTIFUL, romantic, and VIBRANT.
compound: 0.9379, neg: 0.0, neu: 0.229, pos: 0.771, 
Paris is ABSOLUTELY BEAUTIFUL, romantic, and VIBRANT!!!
compound: 0.9464, neg: 0.0, neu: 0.218, pos: 0.782, 
Paris is ABSOLUTELY BEAUTIFUL, truly romantic, and INCREDIBLY VIBRANT!!!
compound: 0.969, neg: 0.0, neu: 0.207, pos: 0.793, 
The pizza was delicious.
compound: 0.5719, neg: 0.0, neu: 0.448, pos: 0.552, 
The pizza was kind of delicious.
compound: 0.5279, neg: 0.0, neu: 0.595, pos: 0.405, 
The crust was crispy, but the cheese was bland and the sauce was too sweet.
compound: 0.6124, neg: 0.0, neu: 0.778, pos: 0.222, 
A terribly dry, flavorless sandwich.
compound: -0.5574, neg: 0.545, 