In [1]:
#!pip install spacy

In [2]:
#!python -m spacy download en_core_web_sm

In [3]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [4]:
sentences = [
  'The food we had yesterday was delicious',
  'My time in Italy was very enjoyable',
  'I found the meal to be tasty',
  'The internet was slow.',
  'Our experience was suboptimal'
]

### We are going to split our sentences in such a way as to obtain the aspect (ex: food) and its expression (ex: delicious)

For each token inside our sentences, we can see the dependency through spacy's dependency analysis and POS (Part-Of-Speech)tags
https://spacy.io/usage/linguistic-features

In [5]:
for sentence in sentences:
    doc = nlp(sentence)
    for token in doc:
        print(token.text, token.dep_, token.head.text, token.head.pos_,token.pos_,[child for child in token.children])

The det food NOUN DET []
food nsubj was AUX NOUN [The, had]
we nsubj had VERB PRON []
had relcl food NOUN VERB [we, yesterday]
yesterday npadvmod had VERB NOUN []
was ROOT was AUX AUX [food, delicious]
delicious acomp was AUX ADJ []
My poss time NOUN PRON []
time nsubj was AUX NOUN [My, in]
in prep time NOUN ADP [Italy]
Italy pobj in ADP PROPN []
was ROOT was AUX AUX [time, enjoyable]
very advmod enjoyable ADJ ADV []
enjoyable acomp was AUX ADJ [very]
I nsubj found VERB PRON []
found ROOT found VERB VERB [I, be]
the det meal NOUN DET []
meal nsubj be AUX NOUN [the]
to aux be AUX PART []
be ccomp found VERB AUX [meal, to, tasty]
tasty acomp be AUX ADJ []
The det internet NOUN DET []
internet nsubj was AUX NOUN [The]
was ROOT was AUX AUX [internet, slow, .]
slow acomp was AUX ADJ []
. punct was AUX PUNCT []
Our poss experience NOUN PRON []
experience nsubj was AUX NOUN [Our]
was ROOT was AUX AUX [experience, suboptimal]
suboptimal acomp was AUX ADJ []


Below is an example of dependency visualization in a sentence:

https://spacy.io/usage/visualizers

In [6]:
import spacy
from spacy import displacy


doc = nlp("The food we had yesterday was delicious")
displacy.serve(doc, style="ent")




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


By using the linguistic characteristics and in particular the POS, we will extract the adjectives as expression of sentiment 

In [7]:
for sentence in sentences:
    doc = nlp(sentence)
    descriptive_term = ''
    for token in doc:
        #print(token)
        if token.pos_ == 'ADJ':
            descriptive_term = token
    print(sentence)
    print(descriptive_term)

The food we had yesterday was delicious
delicious
My time in Italy was very enjoyable
enjoyable
I found the meal to be tasty
tasty
The internet was slow.
slow
Our experience was suboptimal
suboptimal


As you can see, what's missing are intensifiers like "very" (we'll avoid adverbs). we will extract them using the children property.  

In [8]:
for sentence in sentences:
    doc = nlp(sentence)
    descriptive_term = ''
    for token in doc:
        if token.pos_ == 'ADJ':
            prepend = ''
            for child in token.children:
                #print(child)
                if child.pos_ != 'ADV':
                    continue
                prepend += child.text + ' '
            descriptive_term = prepend + token.text
    #print(sentence)
    print(descriptive_term)

delicious
very enjoyable
tasty
slow
suboptimal


We'll put that in a dictionary list

In [9]:
aspects = []
for sentence in sentences:
    doc = nlp(sentence)
    descriptive_term = ''
    target = ''
    for token in doc:
        if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
            target = token.text
        if token.pos_ == 'ADJ':
            prepend = ''
            for child in token.children:
                if child.pos_ != 'ADV':
                    continue
                prepend += child.text + ' '
            descriptive_term = prepend + token.text  
    
    aspects.append({'aspect': target,'description': descriptive_term})
print(aspects)

[{'aspect': 'food', 'description': 'delicious'}, {'aspect': 'time', 'description': 'very enjoyable'}, {'aspect': 'meal', 'description': 'tasty'}, {'aspect': 'internet', 'description': 'slow'}, {'aspect': 'experience', 'description': 'suboptimal'}]


### using TextBlob for sentiment extraction

In [15]:
!pip install TextBlob

Collecting TextBlob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
     ---------------------------------------- 0.0/636.8 kB ? eta -:--:--
     ------ ------------------------------- 112.6/636.8 kB 6.8 MB/s eta 0:00:01
     ------------------------ ------------- 409.6/636.8 kB 5.1 MB/s eta 0:00:01
     -------------------------------------  634.9/636.8 kB 5.7 MB/s eta 0:00:01
     -------------------------------------- 636.8/636.8 kB 5.0 MB/s eta 0:00:00
Installing collected packages: TextBlob
Successfully installed TextBlob-0.17.1


TextBlob is a library that offers out-of-the-box sentiment analysis. It has a bag of words approach, which means it has a list of words such as “good”, “bad” and “excellent” that have a sentiment score attached to them. It is also able to select modifiers (such as “not”) and intensifiers (such as “very”) that affect the sentiment score. 

In [10]:
from textblob import TextBlob
for aspect in aspects:
    aspect['sentiment'] = TextBlob(aspect['description']).sentiment
print(aspects)

[{'aspect': 'food', 'description': 'delicious', 'sentiment': Sentiment(polarity=1.0, subjectivity=1.0)}, {'aspect': 'time', 'description': 'very enjoyable', 'sentiment': Sentiment(polarity=0.65, subjectivity=0.78)}, {'aspect': 'meal', 'description': 'tasty', 'sentiment': Sentiment(polarity=0.0, subjectivity=0.0)}, {'aspect': 'internet', 'description': 'slow', 'sentiment': Sentiment(polarity=-0.30000000000000004, subjectivity=0.39999999999999997)}, {'aspect': 'experience', 'description': 'suboptimal', 'sentiment': Sentiment(polarity=0.0, subjectivity=0.0)}]


looking at the results we can notice that the adjectives "tasty" and "suboptimal" are considered neutral. It looks like they are not part of TextBlob's dictionary and therefore not picked up.

TextBlob allows us to train a NaiveBayesClassifier using a very simple and easy-to-understand syntax for everyone, which we will use to improve our sentiment analysis. 

Thus, we will perform a Corpus-Based Sentiment Lexicon Acquisition using TextBlob 

In [20]:
#!python -m textblob.download_corpora

In [11]:
from textblob.classifiers import NaiveBayesClassifier
# We train the NaivesBayesClassifier
train = [
  ('Slow internet.', 'negative'),
  ('Delicious food', 'positive'),
  ('Suboptimal experience', 'negative'),
  ('Very enjoyable time', 'positive'),
  ('delicious food.', 'negative')
]
cl = NaiveBayesClassifier(train)# And then we try to classify some sample sentences.
blob = TextBlob("Delicious food. Very Slow internet. Suboptimal experience. Enjoyable food.", classifier=cl)
for s in blob.sentences:
    print(s)
    print(s.classify())

Delicious food.
positive
Very Slow internet.
negative
Suboptimal experience.
negative
Enjoyable food.
negative


We will now redo our classification using the trainer model

In [12]:
from textblob import TextBlob
for aspect in aspects:
    blob = TextBlob(aspect['description'], classifier=cl)  
    aspect['sentiment'] = blob.classify()
print(aspects)

[{'aspect': 'food', 'description': 'delicious', 'sentiment': 'negative'}, {'aspect': 'time', 'description': 'very enjoyable', 'sentiment': 'positive'}, {'aspect': 'meal', 'description': 'tasty', 'sentiment': 'negative'}, {'aspect': 'internet', 'description': 'slow', 'sentiment': 'negative'}, {'aspect': 'experience', 'description': 'suboptimal', 'sentiment': 'negative'}]


# To DO:

1. Try on other sentences using the classifier 

In [13]:
# We will try the Decision Tree classifier
from textblob.classifiers import DecisionTreeClassifier

train = [
     ('I love this sandwich.', 'positive'),
     ('this is an amazing place!', 'positive'),
     ('I feel very good about these beers.', 'positive'),
     ('this is my best work.', 'positive'),
     ("what an awesome view", 'positive'),
     ('I do not like this restaurant', 'negative'),
     ('my boss is horrible.', 'negative')
]

cl = DecisionTreeClassifier(train)

blob = TextBlob("The beer was good . But the hangover is horrible.", classifier=cl)
for s in blob.sentences:
    print(s)
    print(s.classify())

The beer was good .
positive
But the hangover is horrible.
negative


In [14]:
test = [
     ('the beer was good.', 'positive'),
     ('I do not enjoy my job', 'negative'),
     ("I ain't feeling dandy today.", 'negative'),
     ("I feel amazing!", 'positive'),
]

In [15]:
# The accuracy
cl.accuracy(test)

0.5

In [16]:
from textblob import TextBlob

for aspect in aspects:
    blob = TextBlob(aspect['description'], classifier=cl)  
    aspect['sentiment'] = blob.classify()
for asp in aspects:
    print(asp)

{'aspect': 'food', 'description': 'delicious', 'sentiment': 'positive'}
{'aspect': 'time', 'description': 'very enjoyable', 'sentiment': 'positive'}
{'aspect': 'meal', 'description': 'tasty', 'sentiment': 'positive'}
{'aspect': 'internet', 'description': 'slow', 'sentiment': 'positive'}
{'aspect': 'experience', 'description': 'suboptimal', 'sentiment': 'positive'}


In [18]:
# We will try MaxEntClassifier
from textblob.classifiers import MaxEntClassifier

train = [
     ('I love this sandwich.', 'positive'),
     ('this is an amazing place!', 'positive'),
     ('I feel very good about these beers.', 'positive'),
     ('this is my best work.', 'positive'),
     ("what an awesome view", 'positive'),
     ('I do not like this restaurant', 'negative'),
     ('my boss is horrible.', 'negative')
]

cl = MaxEntClassifier(train)

blob = TextBlob("The beer was good . But the hangover is horrible.", classifier=cl)
for s in blob.sentences:
    print(s)
    print(s.classify())

The beer was good .
  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -0.69315        0.714
             2          -0.48107        0.714
             3          -0.43877        0.714
             4          -0.40176        0.714
             5          -0.36933        0.714
             6          -0.34093        0.857
             7          -0.31598        0.857
             8          -0.29400        1.000
             9          -0.27454        1.000
            10          -0.25724        1.000
            11          -0.24179        1.000
            12          -0.22793        1.000
            13          -0.21544        1.000
            14          -0.20415        1.000
            15          -0.19389        1.000
            16          -0.18454        1.000
            17          -0.17599        1.000
            18          -0.16815        1.000
            19          -0.

In [21]:
# The accuracy of the model MaxEnt Classifier
cl.accuracy(test)

0.75

In [20]:
from textblob import TextBlob

for aspect in aspects:
    blob = TextBlob(aspect['description'], classifier=cl)  
    aspect['sentiment'] = blob.classify()
for asp in aspects:
    print(asp)

{'aspect': 'food', 'description': 'delicious', 'sentiment': 'positive'}
{'aspect': 'time', 'description': 'very enjoyable', 'sentiment': 'positive'}
{'aspect': 'meal', 'description': 'tasty', 'sentiment': 'positive'}
{'aspect': 'internet', 'description': 'slow', 'sentiment': 'positive'}
{'aspect': 'experience', 'description': 'suboptimal', 'sentiment': 'positive'}
