#Lib Import

In [21]:
# NLTK
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
nltk.download('stopwords')

#Spacy
import spacy
nlp = spacy.load('en')

# Other
import re
import json
import string
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder

#Keras
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#Data loading and cleaning

In [22]:
#load data
import pandas as pd
reviews_train = pd.read_csv("https://drive.google.com/uc?export=download&id=1InpAOdyHowB805CFVEWg-HHiXXh0lpBS").astype(str)

#show first 5 records
reviews_train.head()

Unnamed: 0,review,aspect,sentiment
0,It has be my TD branch for the past 15 years. ...,staff,positive
1,It has be my TD branch for the past 15 years. ...,service,positive
2,So rude castumer service,service,negative
3,Staff were very welcoming and friendly. I had ...,staff,positive
4,Here are 3 reasons TD is the worst bank for sm...,staff,negative


In [23]:
#data size
reviews_train.shape

(90, 3)

In [0]:
#fix typos
reviews_train.sentiment[reviews_train.sentiment == 'netrual'] = 'neutral' 
reviews_train.sentiment[reviews_train.sentiment == 'positive '] = 'positive' 
reviews_train.sentiment[reviews_train.sentiment == 'negative '] = 'negative' 
reviews_train.aspect[reviews_train.aspect == 'servicve'] = 'service' 
reviews_train.aspect[reviews_train.aspect == 'line '] = 'line'

In [0]:
#aspect name change
reviews_train.aspect[reviews_train.aspect == 'service'] = 'service quality' 
reviews_train.aspect[reviews_train.aspect == 'staff'] = 'staff friendliness'

In [26]:
#drop neutral records to reduce imbalance for now
reviews_train =reviews_train[reviews_train.sentiment != 'neutral'  ]
reviews_train.groupby('sentiment').size()

sentiment
negative    44
positive    40
dtype: int64

In [27]:
#fix typos
reviews_train.aspect[reviews_train.aspect == 'servicve'] = 'service' 
reviews_train.aspect[reviews_train.aspect == 'line '] = 'line'

#drop na column to reduce imbalance for now
reviews_train =reviews_train[reviews_train.aspect != 'na'  ]
reviews_train.groupby('aspect').size()

aspect
line                  10
service quality       24
staff friendliness    48
dtype: int64

In [28]:
#8 rows dropped in total 
reviews_train.shape

(82, 3)

#Aspect analyzer


In [0]:
#asepct classifier NN sturcture, the output layer is 3 neurons corresponding three aspects
aspect_model = Sequential()
aspect_model.add(Dense(512, input_shape=(3000,), activation='relu'))
aspect_model.add((Dense(256, activation='relu')))
aspect_model.add((Dense(128, activation='relu')))
aspect_model.add(Dense(3, activation='softmax'))
#compile model
aspect_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [0]:
#asepct classifier NN sturcture, the output layer is 3 neurons corresponding three aspects
aspect_model = Sequential()
aspect_model.add(Dense(512, input_shape=(6000,), activation='relu'))
aspect_model.add((Dense(256, activation='relu')))
aspect_model.add((Dense(128, activation='relu')))
aspect_model.add(Dense(3, activation='softmax'))
#compile model
aspect_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [0]:
#tokenize review text
vocab_size = 6000 # We set a maximum size for the vocabulary
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(reviews_train.review)
reviews_tokenized = pd.DataFrame(tokenizer.texts_to_matrix(reviews_train.review))

In [32]:
reviews_tokenized

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999
0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
80,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
#encode aspect classes
label_encoder = LabelEncoder()
integer_category = label_encoder.fit_transform(reviews_train.aspect)
encoded_aspect = to_categorical(integer_category)

In [34]:
#fit aspect classifier
aspect_model.fit(reviews_tokenized, encoded_aspect, epochs=100, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7fec0e975198>

#Sentiment analyzer


In [0]:
#sentiment classifier NN sturcture, the output layer is 2 neurons corresponding two sentiments
sentiment_model = Sequential()
sentiment_model.add(Dense(512, input_shape=(6000,), activation='relu'))
sentiment_model.add((Dense(256, activation='relu')))
sentiment_model.add((Dense(128, activation='relu')))
sentiment_model.add(Dense(2, activation='softmax'))
#compile model
sentiment_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [0]:
#tokenize review text
vocab_size = 6000 # We set a maximum size for the vocabulary
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(reviews_train.review)
reviews_tokenized = pd.DataFrame(tokenizer.texts_to_matrix(reviews_train.review))

#encode sentiment classes
label_encoder_2 = LabelEncoder()
integer_sentiment = label_encoder_2.fit_transform(reviews_train.sentiment)
encoded_sentiment = to_categorical(integer_sentiment)

In [37]:
#fit aspect classifier
sentiment_model.fit(reviews_tokenized, encoded_sentiment, epochs=100, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7febca57be10>

#Testing

In [0]:
test_reviews = [
    "Good, fast service.",
    "The teller was very pleasant.",
    "The line is way too long and staffs are not so friendly",
    "Rude & unprofessional/inefficient staff (including Randy, the manager on site). Time to switch banks.",
    "The line here is way too long"
]



test_reviews2 = ["One young lady with nice eye-make up offered me service by uncomfortable facial expression. I wanted to change my account to student account,she told me I need to wait for the email,do it step by step,upload my new study permit etc... I told her this is not the first time I need to change my account,last time I met one staff,he did everything in 5 mins..she checked something on computer and used her index finger (again)showed me where I have to go to ask for my issue. Her service was no hello/no smile/no eye contact/and react like there was a long line-up behind me! I always get good experiences in this brand except TODAY! However,I would like to say thank you to the Lady in the information desk,she did e very good job.. Updated: I came back the next day,bring my new study permit as they said.I met one lady at information desk and she said I should line up to ask!!! Each person said different way for my case! I also saw nice-make up lady,her attitude was the same for other customer. I decided that I will withdraw all my money and use other bank.",
"There is this one specific person in this branch. ‘He’ has got so much attitude. He needs to learn customer service before he can even work there. He seems to be on a higher level of employment who has no sense professionalism.",
'''Worst customer service I've ever received from a TD bank branch... and it was from the MANAGER herself. I deposited my paycheck from work and she gave me a judgemental look, asking me what this paycheck was for... like duh, my job?! She then continued to deposit the check anyways and after she typed everything up and stored the check, she told me my account would be put on hold. I asked her what exactly that meant and all she said was "the funds won't be available to you until next week." I came back later in the day and received better service from one of the tellers who explained to me what was going on. That's when I found out that the lady who put my account on hold was the manager... what a shame. She's the manager and couldn't even be professional enough to explain the reasoning behind the hold. Zero stars.''',
"I don't know what to say. The service is depend on how much money you have in the account. And they are prejudicial.",
"Confirmation about online booked appointment is not firmed. You still might have to wait 20-30 minutes in branch . Be careful with your tight schedule.",
"Staff are complete novices who are not capable of completing daily transactions in person. This branches first instinct is to give up.",
"Happy to have found this branch just as I needed it! The queue wasn't bad at all it moved fast. Staff were helpful and I'm glad the floors weren't slippery",
"I regularly deal with this branch and I must say that this branch is my favorite of all. The customer service and professionalism is just outstanding. Every time I go there, I feel like they treated me like I am special customer. This level of service has hold true regardless of the employee had the pleasure of dealing with. My hat's off to all the employees, specially Simrat Teja who's been very pleasant, caring and friendly. He always come through when I need something. He explains everything very clearly and is ready to go above and beyond always.Overall ALL EXCELLENT!",
"The wait here is way too long"
]


# Aspect preprocessing
test_reviews = [review.lower() for review in test_reviews]
test_aspect_terms = []

#chunking method for keywords extraction
for review in nlp.pipe(test_reviews):
    chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
    test_aspect_terms.append(' '.join(chunks))

test_aspect_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_aspect_terms))

# Sentiment preprocessing
test_sentiment_terms = []
for review in nlp.pipe(test_reviews):
        if review.is_parsed:
            test_sentiment_terms.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            test_sentiment_terms.append('') 


test_sentiment_terms = pd.DataFrame(tokenizer.texts_to_matrix(test_sentiment_terms))


In [40]:
#get test results
test_aspect_categories = label_encoder.inverse_transform(aspect_model.predict_classes(test_aspect_terms))
test_sentiment = label_encoder_2.inverse_transform(sentiment_model.predict_classes(test_sentiment_terms))

#print results
for i in range (len(test_reviews)):
    print("Review " + str(i+1) + ":" + test_reviews[i] )
    print("Review " + str(i+1) + " is expressing " + test_sentiment[i] +" opinion about" + test_aspect_categories[i]+'\n')

Review 1:good, fast service.
Review 1 is expressing positive opinion aboutservice quality

Review 2:the teller was very pleasant.
Review 2 is expressing positive opinion aboutstaff friendliness

Review 3:the line is way too long and staffs are not so friendly
Review 3 is expressing positive opinion aboutline

Review 4:rude & unprofessional/inefficient staff (including randy, the manager on site). time to switch banks.
Review 4 is expressing negative opinion aboutstaff friendliness

Review 5:the line here is way too long
Review 5 is expressing negative opinion aboutline

