In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [193]:
# sentences = [
#   'The food we had yesterday was delicious',
#   'My time in Italy was very enjoyable',
#   'I found the meal to be tasty',
#   'The internet was slow.',
#   'Our experience was suboptimal',
#   'The T-Shirt was dirty.'
# ] # Sample sentences

In [194]:
from datasets import load_dataset
reviews = load_dataset("tomaarsen/setfit-absa-semeval-restaurants", split="train[:128]")
df = reviews.to_pandas()

sentences = df['text']

In [195]:
sentences

0                   But the staff was so horrible to us.
1      To be completely fair, the only redeeming fact...
2      The food is uniformly exceptional, with a very...
3      The food is uniformly exceptional, with a very...
4      The food is uniformly exceptional, with a very...
                             ...                        
123    Right off the L in Brooklyn this is a nice coz...
124    Right off the L in Brooklyn this is a nice coz...
125    Le Pere Pinard has a $15 pre-theater menu that...
126    I also ordered for delivery and the restaurant...
127    I also ordered for delivery and the restaurant...
Name: text, Length: 128, dtype: object

In [197]:
for sentence in sentences:
  doc = nlp(sentence)
  for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_, token.pos_, [child for child in token.children])

But cc was AUX CCONJ []
the det staff NOUN DET []
staff nsubj was AUX NOUN [the]
was ROOT was AUX AUX [But, staff, horrible, .]
so advmod horrible ADJ ADV []
horrible acomp was AUX ADJ [so, to]
to prep horrible ADJ ADP [us]
us pobj to ADP PRON []
. punct was AUX PUNCT []
To aux be AUX PART []
be advcl was AUX AUX [To, fair]
completely advmod fair ADJ ADV []
fair acomp be AUX ADJ [completely]
, punct was AUX PUNCT []
the det factor NOUN DET []
only amod factor NOUN ADJ []
redeeming amod factor NOUN VERB []
factor nsubj was AUX NOUN [the, only, redeeming]
was ROOT was AUX AUX [be, ,, factor, food, ,, but, make, .]
the det food NOUN DET []
food attr was AUX NOUN [the, ,, was]
, punct food NOUN PUNCT []
which nsubj was AUX PRON []
was relcl food NOUN AUX [which, above]
above prep was AUX ADP [average]
average pobj above ADP ADJ []
, punct was AUX PUNCT []
but cc was AUX CCONJ []
could aux make VERB AUX []
n't neg make VERB PART []
make conj was AUX VERB [could, n't, up, for]
up prt make VE

In [198]:
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  for token in doc:
    if token.pos_ == 'ADJ':
      descriptive_term = token
  print(sentence)
  print(descriptive_term)

But the staff was so horrible to us.
horrible
To be completely fair, the only redeeming factor was the food, which was above average, but couldn't make up for all the other deficiencies of Teodora.
other
The food is uniformly exceptional, with a very capable kitchen which will proudly whip up whatever you feel like eating, whether it's on the menu or not.
capable
The food is uniformly exceptional, with a very capable kitchen which will proudly whip up whatever you feel like eating, whether it's on the menu or not.
capable
The food is uniformly exceptional, with a very capable kitchen which will proudly whip up whatever you feel like eating, whether it's on the menu or not.
capable
Not only was the food outstanding, but the little 'perks' were great.
great
Not only was the food outstanding, but the little 'perks' were great.
great
Our agreed favorite is the orrechiete with sausage and chicken (usually the waiters are kind enough to split the dish in half so you get to sample both meats)

In [199]:
#assumption: features of products and services are mostly nouns/noun compounds
#step 1: extract the aspect candiates
aspects = []
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  target = ''
  for token in doc:
    if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
        target = token.text
    if token.dep_ == "compound":
        target = token.text+token.head.text
    if token.pos_ == 'ADJ':
      descriptive_term = token.text
  aspects.append({'aspect': target,'description': descriptive_term})

In [None]:
aspects

In [201]:
#considering reviews pertaining to only food and service
#step 2: classifying if "aspect" or "non-aspect" i.e., considering only the aspect candidates of the lot.
pizza_reviews = []
service_reviews = []
for aspect in aspects:
    if aspect['aspect'] == "pizza" or aspect['aspect'] == "dish" or aspect['aspect'] == "food" or aspect['aspect'] == "desert" or aspect['aspect'] == "PerePinard":
        pizza_reviews.append(aspect)
    elif aspect['aspect'] == "waiters" or aspect['aspect'] == "service" or aspect['aspect'] == "menu":
        service_reviews.append(aspect)
    else:
        continue

In [202]:
pizza_reviews

[{'aspect': 'food', 'description': 'capable'},
 {'aspect': 'food', 'description': 'capable'},
 {'aspect': 'food', 'description': 'capable'},
 {'aspect': 'food', 'description': 'good'},
 {'aspect': 'pizza', 'description': 'thin'},
 {'aspect': 'pizza', 'description': 'thin'},
 {'aspect': 'food', 'description': 'REASONABLE'},
 {'aspect': 'food', 'description': 'REASONABLE'},
 {'aspect': 'food', 'description': 'empty'},
 {'aspect': 'desert', 'description': 'good'},
 {'aspect': 'desert', 'description': 'good'},
 {'aspect': 'desert', 'description': 'good'},
 {'aspect': 'food', 'description': 'outstanding'},
 {'aspect': 'pizza', 'description': 'huge'},
 {'aspect': 'dish', 'description': 'small'},
 {'aspect': 'food', 'description': 'better'},
 {'aspect': 'food', 'description': 'better'},
 {'aspect': 'PerePinard', 'description': 'outstanding'}]

In [203]:
restaurant_reviews = [dict(t) for t in {tuple(aspect.items()) for aspect in pizza_reviews}]

In [204]:
restaurant_reviews.extend([dict(t) for t in {tuple(aspect.items()) for aspect in service_reviews}])

In [205]:
restaurant_reviews

[{'aspect': 'pizza', 'description': 'huge'},
 {'aspect': 'PerePinard', 'description': 'outstanding'},
 {'aspect': 'food', 'description': 'good'},
 {'aspect': 'desert', 'description': 'good'},
 {'aspect': 'dish', 'description': 'small'},
 {'aspect': 'food', 'description': 'outstanding'},
 {'aspect': 'pizza', 'description': 'thin'},
 {'aspect': 'food', 'description': 'capable'},
 {'aspect': 'food', 'description': 'REASONABLE'},
 {'aspect': 'food', 'description': 'better'},
 {'aspect': 'food', 'description': 'empty'},
 {'aspect': 'service', 'description': 'fast'},
 {'aspect': 'service', 'description': 'curtious'},
 {'aspect': 'service', 'description': 'poor'},
 {'aspect': 'menu', 'description': 'excellent'},
 {'aspect': 'menu', 'description': 'limited'},
 {'aspect': 'waiters', 'description': 'enough'}]

In [206]:
# step 3: associate a polarity score to each extracted aspect. (here using TextBlob)
from textblob import TextBlob
map_dict = restaurant_reviews.copy()
for aspect in restaurant_reviews:
  aspect['sentiment'] = TextBlob(aspect['description']).sentiment
restaurant_reviews

[{'aspect': 'pizza',
  'description': 'huge',
  'sentiment': Sentiment(polarity=0.39999999999999997, subjectivity=0.9)},
 {'aspect': 'PerePinard',
  'description': 'outstanding',
  'sentiment': Sentiment(polarity=0.5, subjectivity=0.875)},
 {'aspect': 'food',
  'description': 'good',
  'sentiment': Sentiment(polarity=0.7, subjectivity=0.6000000000000001)},
 {'aspect': 'desert',
  'description': 'good',
  'sentiment': Sentiment(polarity=0.7, subjectivity=0.6000000000000001)},
 {'aspect': 'dish',
  'description': 'small',
  'sentiment': Sentiment(polarity=-0.25, subjectivity=0.4)},
 {'aspect': 'food',
  'description': 'outstanding',
  'sentiment': Sentiment(polarity=0.5, subjectivity=0.875)},
 {'aspect': 'pizza',
  'description': 'thin',
  'sentiment': Sentiment(polarity=-0.4, subjectivity=0.8500000000000001)},
 {'aspect': 'food',
  'description': 'capable',
  'sentiment': Sentiment(polarity=0.2, subjectivity=0.4)},
 {'aspect': 'food',
  'description': 'REASONABLE',
  'sentiment': Sentim

In [207]:
# step 3: associate a polarity score to each extracted aspect. (here using VADER)
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
def get_sentiment(text):
    scores = analyzer.polarity_scores(text)
    sentiment = "positive" if scores['pos'] >= 0.5 else "negative"
    return sentiment

def get_score(text):
    scores = analyzer.polarity_scores(text)
    score = scores['pos'] if scores['pos'] >= 0.5 else scores['neg']
    return score

In [208]:
for aspect in restaurant_reviews:
    aspect['VADER sentiment'] = get_sentiment(aspect['description'])
    aspect['VADER score'] = get_score(aspect['description'])
restaurant_reviews

[{'aspect': 'pizza',
  'description': 'huge',
  'sentiment': Sentiment(polarity=0.39999999999999997, subjectivity=0.9),
  'VADER sentiment': 'positive',
  'VADER score': 1.0},
 {'aspect': 'PerePinard',
  'description': 'outstanding',
  'sentiment': Sentiment(polarity=0.5, subjectivity=0.875),
  'VADER sentiment': 'positive',
  'VADER score': 1.0},
 {'aspect': 'food',
  'description': 'good',
  'sentiment': Sentiment(polarity=0.7, subjectivity=0.6000000000000001),
  'VADER sentiment': 'positive',
  'VADER score': 1.0},
 {'aspect': 'desert',
  'description': 'good',
  'sentiment': Sentiment(polarity=0.7, subjectivity=0.6000000000000001),
  'VADER sentiment': 'positive',
  'VADER score': 1.0},
 {'aspect': 'dish',
  'description': 'small',
  'sentiment': Sentiment(polarity=-0.25, subjectivity=0.4),
  'VADER sentiment': 'negative',
  'VADER score': 0.0},
 {'aspect': 'food',
  'description': 'outstanding',
  'sentiment': Sentiment(polarity=0.5, subjectivity=0.875),
  'VADER sentiment': 'posi