# Importing Libraries

In [2]:
import spacy
import csv
import pandas as pd
from collections import Counter
from spacytextblob.spacytextblob import SpacyTextBlob
import numpy as np
import matplotlib.pyplot as plt
import pickle
from wordcloud import WordCloud
import itertools
import seaborn as sns
import math

# Importing Dataset

In [3]:
# importing dataset
bank_df = pd.read_csv('bank_scraped_data.csv')
bank_df.head(5)

Unnamed: 0,company,rating,date,title,text,company_reply,user_url,user_num_reviews,user_geo_location
0,Monzo,Rated 5 out of 5 stars,2022-06-10T09:22:08.000Z,One of the first to make good use of…,One of the first to make good use of technolog...,,/users/5e8c6423b539f395afdff62d,12,GB
1,Monzo,Rated 1 out of 5 stars,2022-06-10T09:20:08.000Z,Stop texting my 11 year old requesting money,"While I'm not a customer, my 11 year old daugh...",,/users/62a2f09f0ca7ed001272a232,1,GB
2,Monzo,Rated 1 out of 5 stars,2022-06-10T00:35:34.000Z,I am shocked by the ignorance of the…,I am shocked by the ignorance of the customer ...,,/users/5e10fbe5361d61898f0d07be,7,CZ
3,Monzo,Rated 1 out of 5 stars,2022-06-09T21:45:13.000Z,Terrible Support,Complete waste of time. Try and get some suppo...,,/users/5ec53f30ee8f29789fd5c151,2,GB
4,Monzo,Rated 5 out of 5 stars,2022-06-09T21:25:10.000Z,Monzo is the first bank we use in UK,Monzo is the first bank we use in UK. Great! T...,,/users/62514fec30e1fa0012d654be,4,GB


# Splitting Dataframe by Company

In [4]:
monzo_df = bank_df.loc[bank_df['company'] == "Monzo"]
sb_df = bank_df.loc[bank_df['company'] == "Starling Bank"]
wise_df = bank_df.loc[bank_df['company'] == "Wise (formerly TransferWise)"]
rev_df = bank_df.loc[bank_df['company'] == "Revolut"]

monzo_df = monzo_df[monzo_df["date"].notna()].reset_index(drop=True)
sb_df = sb_df[sb_df["date"].notna()].reset_index(drop=True)
wise_df = wise_df[wise_df["date"].notna()].reset_index(drop=True)
rev_df = rev_df[rev_df["date"].notna()].reset_index(drop=True)

print("Monzo Dataframe Length = {}".format(len(monzo_df)))
print("Starling Bank Dataframe Length = {}".format(len(sb_df)))
print("Wise Dataframe Length = {}".format(len(wise_df)))
print("Revolut Dataframe Length = {}".format(len(rev_df)))

Monzo Dataframe Length = 20486
Starling Bank Dataframe Length = 27619
Wise Dataframe Length = 129503
Revolut Dataframe Length = 86474


In [5]:
# loading textual fields for EDA
# monzo
monzo_review_rating = monzo_df["rating"]
monzo_review_title = monzo_df["title"]
monzo_review_text = monzo_df["text"]
monzo_review_date = monzo_df["date"]

# starling bank
sb_review_rating = sb_df["rating"]
sb_review_title = sb_df["title"]
sb_review_text = sb_df["text"]
sb_review_date = sb_df["date"]

# wise
wise_review_rating = wise_df["rating"]
wise_review_title = wise_df["title"]
wise_review_text = wise_df["text"]
wise_review_date = wise_df["date"]

# revolut
rev_review_rating = rev_df["rating"]
rev_review_title = rev_df["title"]
rev_review_text = rev_df["text"]
rev_review_date = rev_df["date"]

# Granular Analysis - Filtering and Stopword Removal

## Splitting by Poor and Good Ratings

## Monzo

In [6]:
monzo_df_poor = monzo_df.loc[(monzo_df['rating'] == 'Rated 1 out of 5 stars') | (monzo_df['rating'] == 'Rated 2 out of 5 stars') | (monzo_df['rating'] == 'Rated 3 out of 5 stars')]
monzo_df_good = monzo_df.loc[(monzo_df['rating'] == 'Rated 4 out of 5 stars') | (monzo_df['rating'] == 'Rated 5 out of 5 stars')]

print("Number of Poor Reviews = {}".format(len(monzo_df_poor)))
print("Number of Good Reviews = {}".format(len(monzo_df_good)))

Number of Poor Reviews = 2367
Number of Good Reviews = 18119


## Starling Bank

In [7]:
sb_df_poor = sb_df.loc[(sb_df['rating'] == 'Rated 1 out of 5 stars') | (sb_df['rating'] == 'Rated 2 out of 5 stars') | (sb_df['rating'] == 'Rated 3 out of 5 stars')]
sb_df_good = sb_df.loc[(sb_df['rating'] == 'Rated 4 out of 5 stars') | (sb_df['rating'] == 'Rated 5 out of 5 stars')]

print("Number of Poor Reviews = {}".format(len(sb_df_poor)))
print("Number of Good Reviews = {}".format(len(sb_df_good)))

Number of Poor Reviews = 3306
Number of Good Reviews = 24313


## Wise

In [8]:
wise_df_poor = wise_df.loc[(wise_df['rating'] == 'Rated 1 out of 5 stars') | (wise_df['rating'] == 'Rated 2 out of 5 stars') | (wise_df['rating'] == 'Rated 3 out of 5 stars')]
wise_df_good = wise_df.loc[(wise_df['rating'] == 'Rated 4 out of 5 stars') | (wise_df['rating'] == 'Rated 5 out of 5 stars')]

print("Number of Poor Reviews = {}".format(len(wise_df_poor)))
print("Number of Good Reviews = {}".format(len(wise_df_good)))

Number of Poor Reviews = 8570
Number of Good Reviews = 120933


## Revolut

In [9]:
rev_df_poor = rev_df.loc[(rev_df['rating'] == 'Rated 1 out of 5 stars') | (rev_df['rating'] == 'Rated 2 out of 5 stars') | (rev_df['rating'] == 'Rated 3 out of 5 stars')]
rev_df_good = rev_df.loc[(rev_df['rating'] == 'Rated 4 out of 5 stars') | (rev_df['rating'] == 'Rated 5 out of 5 stars')]

print("Number of Poor Reviews = {}".format(len(rev_df_poor)))
print("Number of Good Reviews = {}".format(len(rev_df_good)))

Number of Poor Reviews = 8178
Number of Good Reviews = 78296


## Stopword Removal

In [10]:
# load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_trf")
nlp.add_pipe('spacytextblob')

<spacytextblob.spacytextblob.SpacyTextBlob at 0x1d9ae301c10>

In [56]:
# stopword_lists
stopword_list_noun = ['i', 'it', 'you', 'they', 'me', 'that', 'them', 'which', 'this', 'it', 'what', 'we', 'who', 'this', 'all', 'us']
stopword_list_verb = ['use', 'have', 'make', 'get', 'go', 'need', 'set', 'pay', 'take', 'keep', 'give', 'say', 'see', 'think', 'know', 'want', 'try', 'be', 'ask', 'come', 'put', 'seem']

# Mark them as stop words
for w in stopword_list_noun:
    nlp.vocab[w].is_stop = True
for w in stopword_list_verb:
    nlp.vocab[w].is_stop = True

## Monzo

In [12]:
# monzo text dataframes
monzo_review_text_poor = monzo_df_poor["text"]
monzo_review_text_good = monzo_df_good["text"]

In [14]:
# stopword_lists
stopword_list_noun = ['i', 'it', 'you', 'they', 'me', 'that', 'them', 'which', 'this', 'it', 'what', 'we', 'who', 'this', 'all', 'us']
stopword_list_verb = ['use', 'have', 'make', 'get', 'go', 'need', 'set', 'pay', 'take', 'keep', 'give', 'say', 'see', 'think', 'know', 'want', 'try', 'be', 'ask', 'come', 'put', 'seem']
monzo_stopwords = ['monzo', 'monzo bank', 'bank', 'nan']

# Mark them as stop words
for w in stopword_list_noun:
    nlp.vocab[w].is_stop = True
for w in stopword_list_verb:
    nlp.vocab[w].is_stop = True
for w in monzo_stopwords:
    nlp.vocab[w].is_stop = True

# removing stopwords 
monzo_review_text_poor_cleaned = monzo_review_text_poor.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))
monzo_review_text_good_cleaned = monzo_review_text_good.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))

In [15]:
# saving files

FILEPATH = 'C:/Users/Ethan Chew/Desktop/Work/Individual Project/Internship/webscraping/pickled_files/'

# saving review text before stopword removal
with open(FILEPATH + 'monzo_review_text_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_review_text_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_review_text_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_review_text_good, f, protocol=pickle.HIGHEST_PROTOCOL)

# saving review text after stopword removal
with open(FILEPATH + 'monzo_review_text_poor_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(monzo_review_text_poor_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_review_text_good_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(monzo_review_text_good_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

## Starling Bank

In [16]:
# starling bank text dataframes
sb_review_text_poor = sb_df_poor["text"]
sb_review_text_good = sb_df_good["text"]

In [17]:
# stopword_lists
stopword_list_noun = ['i', 'it', 'you', 'they', 'me', 'that', 'them', 'which', 'this', 'it', 'what', 'we', 'who', 'this', 'all', 'us']
stopword_list_verb = ['use', 'have', 'make', 'get', 'go', 'need', 'set', 'pay', 'take', 'keep', 'give', 'say', 'see', 'think', 'know', 'want', 'try', 'be', 'ask', 'come', 'put', 'seem']
sb_stopwords = ['starling', 'bank', 'starling bank', 'nan']

# Mark them as stop words
for w in stopword_list_noun:
    nlp.vocab[w].is_stop = True
for w in stopword_list_verb:
    nlp.vocab[w].is_stop = True
for w in sb_stopwords:
    nlp.vocab[w].is_stop = True

# removing stopwords 
sb_review_text_poor_cleaned = sb_review_text_poor.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))
sb_review_text_good_cleaned = sb_review_text_good.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))

In [18]:
# saving files
FILEPATH = 'C:/Users/Ethan Chew/Desktop/Work/Individual Project/Internship/webscraping/pickled_files/'

# saving review text before stopword removal
with open(FILEPATH + 'sb_review_text_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_review_text_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_review_text_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_review_text_good, f, protocol=pickle.HIGHEST_PROTOCOL)

# saving review text after stopword removal
with open(FILEPATH + 'sb_review_text_poor_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(sb_review_text_poor_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_review_text_good_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(sb_review_text_good_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

## Wise

In [20]:
# wise text dataframes
wise_review_text_poor = wise_df_poor["text"]
wise_review_text_good = wise_df_good["text"]

In [22]:
# stopword_lists
stopword_list_noun = ['i', 'it', 'you', 'they', 'me', 'that', 'them', 'which', 'this', 'it', 'what', 'we', 'who', 'this', 'all', 'us']
stopword_list_verb = ['use', 'have', 'make', 'get', 'go', 'need', 'set', 'pay', 'take', 'keep', 'give', 'say', 'see', 'think', 'know', 'want', 'try', 'be', 'ask', 'come', 'put', 'seem']
wise_stopwords = ['wise', 'bank', 'wise bank' 'nan']

# Mark them as stop words
for w in stopword_list_noun:
    nlp.vocab[w].is_stop = True
for w in stopword_list_verb:
    nlp.vocab[w].is_stop = True
for w in wise_stopwords:
    nlp.vocab[w].is_stop = True

# removing stopwords 
wise_review_text_poor_cleaned = wise_review_text_poor.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))
wise_review_text_good_cleaned = wise_review_text_good.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))

In [23]:
# saving files
FILEPATH = 'C:/Users/Ethan Chew/Desktop/Work/Individual Project/Internship/webscraping/pickled_files/'

# saving review text before stopword removal
with open(FILEPATH + 'wise_review_text_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_review_text_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_review_text_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_review_text_good, f, protocol=pickle.HIGHEST_PROTOCOL)

# saving review text after stopword removal
with open(FILEPATH + 'wise_review_text_poor_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(wise_review_text_poor_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_review_text_good_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(wise_review_text_good_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)


## Revolut

In [24]:
# revolut text dataframes
rev_review_text_poor = rev_df_poor["text"]
rev_review_text_good = rev_df_good["text"]

In [25]:
# stopword_lists
stopword_list_noun = ['i', 'it', 'you', 'they', 'me', 'that', 'them', 'which', 'this', 'it', 'what', 'we', 'who', 'this', 'all', 'us']
stopword_list_verb = ['use', 'have', 'make', 'get', 'go', 'need', 'set', 'pay', 'take', 'keep', 'give', 'say', 'see', 'think', 'know', 'want', 'try', 'be', 'ask', 'come', 'put', 'seem']
rev_stopwords = ['revolut', 'bank', 'revolut bank', 'nan']

# Mark them as stop words
for w in stopword_list_noun:
    nlp.vocab[w].is_stop = True
for w in stopword_list_verb:
    nlp.vocab[w].is_stop = True
for w in rev_stopwords:
    nlp.vocab[w].is_stop = True

# removing stopwords 
rev_review_text_poor_cleaned = rev_review_text_poor.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))
rev_review_text_good_cleaned = rev_review_text_good.apply(lambda text: " ".join(token.lemma_ for token in nlp(str(text).lower()) if not token.is_stop))

In [26]:
# saving files
FILEPATH = 'C:/Users/Ethan Chew/Desktop/Work/Individual Project/Internship/webscraping/pickled_files/'

# saving review text before stopword removal
with open(FILEPATH + 'rev_review_text_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_review_text_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_review_text_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_review_text_good, f, protocol=pickle.HIGHEST_PROTOCOL)

# saving review text after stopword removal
with open(FILEPATH + 'rev_review_text_poor_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(rev_review_text_poor_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_review_text_good_cleaned_bin.pickle', 'wb') as f:
    pickle.dump(rev_review_text_good_cleaned, f, protocol=pickle.HIGHEST_PROTOCOL)

## EDA Post-Stopword Removal

## Monzo

In [27]:
monzo_nouns_poor = []
monzo_verbs_poor = []
monzo_propn_poor = []
monzo_adj_poor = []
monzo_polarity_poor = []
monzo_subjectivity_poor = []
for text in monzo_review_text_poor_cleaned:
    doc = nlp(str(text))
    monzo_nouns_poor.append([chunk.text for chunk in doc.noun_chunks])
    monzo_verbs_poor.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    monzo_propn_poor.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    monzo_adj_poor.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    monzo_polarity_poor.append(doc._.blob.polarity)
    monzo_subjectivity_poor.append(doc._.blob.subjectivity)

monzo_nouns_good = []
monzo_verbs_good = []
monzo_propn_good = []
monzo_adj_good = []
monzo_polarity_good = []
monzo_subjectivity_good = []
for text in monzo_review_text_good_cleaned:
    doc = nlp(str(text))
    monzo_nouns_good.append([chunk.text for chunk in doc.noun_chunks])
    monzo_verbs_good.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    monzo_propn_good.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    monzo_adj_good.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    monzo_polarity_good.append(doc._.blob.polarity)
    monzo_subjectivity_good.append(doc._.blob.subjectivity)

In [28]:
# saving files

# nouns, verbs, propn, adj, polarity and subjectivity lists
# Poor
with open(FILEPATH + 'monzo_nouns_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_nouns_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_verbs_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_verbs_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_propn_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_propn_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_adj_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_adj_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_polarity_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_polarity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_subjectivity_poor_bin.pickle', 'wb') as f:
    pickle.dump(monzo_subjectivity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

# Good
with open(FILEPATH + 'monzo_nouns_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_nouns_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_verbs_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_verbs_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_propn_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_propn_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_adj_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_adj_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_polarity_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_polarity_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'monzo_subjectivity_good_bin.pickle', 'wb') as f:
    pickle.dump(monzo_subjectivity_good, f, protocol=pickle.HIGHEST_PROTOCOL)

In [29]:
############### counter to count most frequently used nouns ##############################
# Monzo (Poor Reviews)
cnt_noun_monzo_poor = Counter()
for noun_list in monzo_nouns_poor:
    for word in noun_list:
        cnt_noun_monzo_poor[word] += 1
print("Monzo Nouns (Poor)")
print(cnt_noun_monzo_poor.most_common(50))

# Monzo (Good Reviews)
cnt_noun_monzo_good = Counter()
for noun_list in monzo_nouns_good:
    for word in noun_list:
        cnt_noun_monzo_good[word] += 1
print("\nMonzo Nouns (Good)")
print(cnt_noun_monzo_good.most_common(50))

########### counter to count most frequently used verbs ################################
# Monzo (Poor Reviews)
cnt_verb_monzo_poor = Counter()
for verb_list in monzo_verbs_poor:
    for word in verb_list:
        cnt_verb_monzo_poor[word] += 1
print("\nMonzo Verbs (Poor)")
print(cnt_verb_monzo_poor.most_common(50))

# Monzo (Good Reviews)
cnt_verb_monzo_good = Counter()
for verb_list in monzo_verbs_good:
    for word in verb_list:
        cnt_verb_monzo_good[word] += 1
print("\nMonzo Verbs (Good)")
print(cnt_verb_monzo_good.most_common(50))

############## counter to count most frequently used adjectives ########################
# Monzo (Poor Reviews)
cnt_adj_monzo_poor = Counter()
for adj_list in monzo_adj_poor:
    for word in adj_list:
        cnt_adj_monzo_poor[word] += 1
print("\nMonzo Adjectives (Poor)")
print(cnt_adj_monzo_poor.most_common(50))

# Monzo (Good Reviews)
cnt_adj_monzo_good = Counter()
for adj_list in monzo_adj_good:
    for word in adj_list:
        cnt_adj_monzo_good[word] += 1
print("\nMonzo Adjectives (Good)")
print(cnt_adj_monzo_good.most_common(50))

############## counter to count most frequently used proper noun ########################
# Monzo (Poor Reviews)
cnt_propn_monzo_poor = Counter()
for propn_list in monzo_propn_poor:
    for word in propn_list:
        cnt_propn_monzo_poor[word] += 1
print("\nMonzo Proper Noun (Poor)")
print(cnt_propn_monzo_poor.most_common(50))

# Monzo (Good Reviews)
cnt_propn_monzo_good = Counter()
for propn_list in monzo_propn_good:
    for word in propn_list:
        cnt_propn_monzo_good[word] += 1
print("\nMonzo Proper Noun (Good)")
print(cnt_propn_monzo_good.most_common(50))

Monzo Nouns (Poor)
[('account', 1016), ('money', 884), ('people', 175), ('customer service', 128), ('reason', 113), ('i', 112), ('card', 111), ('email', 103), ('app', 89), ('customer', 87), ('time', 81), ('money account', 71), ('phone', 63), ('issue', 60), ('payment', 60), ('message', 57), ('company', 55), ('complaint', 52), ('cost', 51), ('bank', 48), ('problem', 42), ('thing', 42), ('fund', 39), ('joint account', 38), ('day', 38), ('they', 37), ('information', 37), ('u', 37), ('fraud', 36), ('work', 35), ('response', 35), ('explanation', 35), ('question', 34), ('transaction', 33), ('close account', 33), ('detail', 31), ('help', 31), ('open account', 31), ('business account', 31), ('cash', 29), ('evidence', 28), ('friend', 27), ('personal account', 27), ('reply', 27), ('support', 26), ('refund', 26), ('service', 26), ('contact', 26), ('answer', 25), ('chat', 24)]

Monzo Nouns (Good)
[('money', 1900), ('account', 557), ('app', 499), ('card', 381), ('pot', 372), ('customer service', 288

## Starling Bank

In [30]:
sb_nouns_poor = []
sb_verbs_poor = []
sb_propn_poor = []
sb_adj_poor = []
sb_polarity_poor = []
sb_subjectivity_poor = []
for text in sb_review_text_poor_cleaned:
    doc = nlp(str(text))
    sb_nouns_poor.append([chunk.text for chunk in doc.noun_chunks])
    sb_verbs_poor.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    sb_propn_poor.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    sb_adj_poor.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    sb_polarity_poor.append(doc._.blob.polarity)
    sb_subjectivity_poor.append(doc._.blob.subjectivity)

sb_nouns_good = []
sb_verbs_good = []
sb_propn_good = []
sb_adj_good = []
sb_polarity_good = []
sb_subjectivity_good = []
for text in sb_review_text_good_cleaned:
    doc = nlp(str(text))
    sb_nouns_good.append([chunk.text for chunk in doc.noun_chunks])
    sb_verbs_good.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    sb_propn_good.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    sb_adj_good.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    sb_polarity_good.append(doc._.blob.polarity)
    sb_subjectivity_good.append(doc._.blob.subjectivity)

In [31]:
# saving files

# nouns, verbs, propn, adj, polarity and subjectivity lists
# Poor
with open(FILEPATH + 'sb_nouns_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_nouns_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_verbs_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_verbs_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_propn_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_propn_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_adj_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_adj_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_polarity_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_polarity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_subjectivity_poor_bin.pickle', 'wb') as f:
    pickle.dump(sb_subjectivity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

# Good
with open(FILEPATH + 'sb_nouns_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_nouns_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_verbs_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_verbs_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_propn_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_propn_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_adj_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_adj_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_polarity_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_polarity_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'sb_subjectivity_good_bin.pickle', 'wb') as f:
    pickle.dump(sb_subjectivity_good, f, protocol=pickle.HIGHEST_PROTOCOL)


In [32]:
# counter to count most frequently used nouns
# Starling Bank (Poor Reviews)
cnt_noun_sb_poor = Counter()
for noun_list in sb_nouns_poor:
    for word in noun_list:
        cnt_noun_sb_poor[word] += 1
print("Starling Bank Nouns (Poor)")
print(cnt_noun_sb_poor.most_common(50))

# Starling Bank (Good Reviews)
cnt_noun_sb_good = Counter()
for noun_list in sb_nouns_good:
    for word in noun_list:
        cnt_noun_sb_good[word] += 1
print("\nStarling Bank Nouns (Good)")
print(cnt_noun_sb_good.most_common(50))

# counter to count most frequently used verbs
# Starling Bank (Poor Reviews)
cnt_verb_sb_poor = Counter()
for verb_list in sb_verbs_poor:
    for word in verb_list:
        cnt_verb_sb_poor[word] += 1
print("\nStarling Bank Verbs (Poor)")
print(cnt_verb_sb_poor.most_common(50))

# Starling Bank (Good Reviews)
cnt_verb_sb_good = Counter()
for verb_list in sb_verbs_good:
    for word in verb_list:
        cnt_verb_sb_good[word] += 1
print("\nStarling Bank Verbs (Good)")
print(cnt_verb_sb_good.most_common(50))

############## counter to count most frequently used adjectives ########################
# Starling Bank (Poor Reviews)
cnt_adj_sb_poor = Counter()
for adj_list in sb_adj_poor:
    for word in adj_list:
        cnt_adj_sb_poor[word] += 1
print("\nStarling Bank Adjectives (Poor)")
print(cnt_adj_sb_poor.most_common(50))

# Starling Bank (Good Reviews)
cnt_adj_sb_good = Counter()
for adj_list in monzo_adj_good:
    for word in adj_list:
        cnt_adj_sb_good[word] += 1
print("\nStarling Bank Adjectives (Good)")
print(cnt_adj_sb_good.most_common(50))

############## counter to count most frequently used proper noun ########################
# Starling Bank (Poor Reviews)
cnt_propn_sb_poor = Counter()
for propn_list in sb_propn_poor:
    for word in propn_list:
        cnt_propn_sb_poor[word] += 1
print("\nStarling Bank Proper Noun (Poor)")
print(cnt_propn_sb_poor.most_common(50))

# Starling Bank (Good Reviews)
cnt_propn_sb_good = Counter()
for propn_list in sb_propn_good:
    for word in propn_list:
        cnt_propn_sb_good[word] += 1
print("\nStarling Bank Proper Noun (Good)")
print(cnt_propn_sb_good.most_common(50))

Starling Bank Nouns (Poor)
[('account', 1090), ('money', 655), ('business account', 309), ('i', 254), ('customer service', 245), ('app', 216), ('time', 214), ('business', 173), ('people', 171), ('customer', 141), ('payment', 136), ('email', 125), ('card', 123), ('personal account', 119), ('information', 111), ('phone', 106), ('reason', 106), ('open account', 105), ('message', 96), ('issue', 92), ('application', 86), ('review', 82), ('bank', 76), ('problem', 73), ('money account', 67), ('day', 67), ('company', 63), ('document', 61), ('work', 58), ('thing', 57), ('cost', 56), ('detail', 54), ('fund', 54), ('complaint', 54), ('question', 53), ('service', 51), ('explanation', 51), ('transaction', 51), ('proof', 49), ('they', 49), ('open business account', 48), ('response', 47), ('help', 47), ('way', 46), ('sole trader account', 43), ('cheque', 39), ('overdraft', 39), ('cash', 39), ('support', 38), ('current account', 38)]

Starling Bank Nouns (Good)
[('money', 1947), ('account', 1662), ('a

## Wise

In [33]:
wise_nouns_poor = []
wise_verbs_poor = []
wise_propn_poor = []
wise_adj_poor = []
wise_polarity_poor = []
wise_subjectivity_poor = []
for text in wise_review_text_poor_cleaned:
    doc = nlp(str(text))
    wise_nouns_poor.append([chunk.text for chunk in doc.noun_chunks])
    wise_verbs_poor.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    wise_propn_poor.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    wise_adj_poor.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    wise_polarity_poor.append(doc._.blob.polarity)
    wise_subjectivity_poor.append(doc._.blob.subjectivity)

wise_nouns_good = []
wise_verbs_good = []
wise_propn_good = []
wise_adj_good = []
wise_polarity_good = []
wise_subjectivity_good = []
for text in wise_review_text_good_cleaned:
    doc = nlp(str(text))
    wise_nouns_good.append([chunk.text for chunk in doc.noun_chunks])
    wise_verbs_good.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    wise_propn_good.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    wise_adj_good.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    wise_polarity_good.append(doc._.blob.polarity)
    wise_subjectivity_good.append(doc._.blob.subjectivity)


In [34]:
# saving files

# nouns, verbs, propn, adj, polarity and subjectivity lists
# Poor
with open(FILEPATH + 'wise_nouns_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_nouns_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_verbs_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_verbs_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_propn_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_propn_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_adj_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_adj_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_polarity_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_polarity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_subjectivity_poor_bin.pickle', 'wb') as f:
    pickle.dump(wise_subjectivity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

# Good
with open(FILEPATH + 'wise_nouns_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_nouns_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_verbs_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_verbs_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_propn_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_propn_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_adj_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_adj_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_polarity_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_polarity_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'wise_subjectivity_good_bin.pickle', 'wb') as f:
    pickle.dump(wise_subjectivity_good, f, protocol=pickle.HIGHEST_PROTOCOL)



In [35]:
# counter to count most frequently used nouns
# Wise (Poor Reviews)
cnt_noun_wise_poor = Counter()
for noun_list in wise_nouns_poor:
    for word in noun_list:
        cnt_noun_wise_poor[word] += 1
print("Wise Nouns (Poor)")
print(cnt_noun_wise_poor.most_common(50))

# Wise (Good Reviews)
cnt_noun_wise_good = Counter()
for noun_list in wise_nouns_good:
    for word in noun_list:
        cnt_noun_wise_good[word] += 1
print("\nWise Nouns (Good)")
print(cnt_noun_wise_good.most_common(50))

# counter to count most frequently used verbs
# Wise (Poor Reviews)
cnt_verb_wise_poor = Counter()
for verb_list in wise_verbs_poor:
    for word in verb_list:
        cnt_verb_wise_poor[word] += 1
print("\nWise Verbs (Poor)")
print(cnt_verb_wise_poor.most_common(50))

# Wise (Good Reviews)
cnt_verb_wise_good = Counter()
for verb_list in wise_verbs_good:
    for word in verb_list:
        cnt_verb_wise_good[word] += 1
print("\nWise Verbs (Good)")
print(cnt_verb_wise_good.most_common(50))

############## counter to count most frequently used adjectives ########################
# Wise (Poor Reviews)
cnt_adj_wise_poor = Counter()
for adj_list in wise_adj_poor:
    for word in adj_list:
        cnt_adj_wise_poor[word] += 1
print("\nWise Adjectives (Poor)")
print(cnt_adj_wise_poor.most_common(50))

# Wise (Good Reviews)
cnt_adj_wise_good = Counter()
for adj_list in monzo_adj_good:
    for word in adj_list:
        cnt_adj_wise_good[word] += 1
print("\nWise Adjectives (Good)")
print(cnt_adj_wise_good.most_common(50))

############## counter to count most frequently used proper noun ########################
# Wise (Poor Reviews)
cnt_propn_wise_poor = Counter()
for propn_list in wise_propn_poor:
    for word in propn_list:
        cnt_propn_wise_poor[word] += 1
print("\nWise Proper Noun (Poor)")
print(cnt_propn_wise_poor.most_common(50))

# Wise (Good Reviews)
cnt_propn_wise_good = Counter()
for propn_list in wise_propn_good:
    for word in propn_list:
        cnt_propn_wise_good[word] += 1
print("\nWise Proper Noun (Good)")
print(cnt_propn_wise_good.most_common(50))

Wise Nouns (Poor)
[('money', 3190), ('account', 1552), ('transfer', 884), ('email', 541), ('i', 433), ('time', 415), ('service', 373), ('payment', 336), ('customer service', 302), ('transaction', 300), ('company', 252), ('people', 242), ('reason', 227), ('fund', 220), ('money account', 198), ('customer', 194), ('day', 188), ('problem', 171), ('information', 170), ('document', 154), ('fee', 154), ('recipient', 147), ('transferwise', 141), ('issue', 134), ('money transfer', 119), ('support', 116), ('card', 115), ('business', 112), ('they', 111), ('website', 110), ('refund', 109), ('detail', 108), ('work', 105), ('app', 102), ('customer support', 96), ('process', 95), ('thing', 94), ('message', 87), ('answer', 82), ('business account', 82), ('scam', 79), ('friend', 77), ('system', 77), ('help', 77), ('exchange rate', 77), ('review', 74), ('phone', 70), ('way', 69), ('person', 68), ('response', 68)]

Wise Nouns (Good)
[('money', 16474), ('service', 3461), ('transfer', 2918), ('excellent se

## Revolut

In [36]:
rev_nouns_poor = []
rev_verbs_poor = []
rev_propn_poor = []
rev_adj_poor = []
rev_polarity_poor = []
rev_subjectivity_poor = []
for text in rev_review_text_poor_cleaned:
    doc = nlp(str(text))
    rev_nouns_poor.append([chunk.text for chunk in doc.noun_chunks])
    rev_verbs_poor.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    rev_propn_poor.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    rev_adj_poor.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    rev_polarity_poor.append(doc._.blob.polarity)
    rev_subjectivity_poor.append(doc._.blob.subjectivity)

rev_nouns_good = []
rev_verbs_good = []
rev_propn_good = []
rev_adj_good = []
rev_polarity_good = []
rev_subjectivity_good = []
for text in rev_review_text_good_cleaned:
    doc = nlp(str(text))
    rev_nouns_good.append([chunk.text for chunk in doc.noun_chunks])
    rev_verbs_good.append([token.lemma_ for token in doc if token.pos_ == "VERB"])
    rev_propn_good.append([token.lemma_ for token in doc if token.pos_ == "PROPN"])
    rev_adj_good.append([token.lemma_ for token in doc if token.pos_ == "ADJ"])
    rev_polarity_good.append(doc._.blob.polarity)
    rev_subjectivity_good.append(doc._.blob.subjectivity)


In [37]:
# saving files

# nouns, verbs, propn, adj, polarity and subjectivity lists
# Poor
with open(FILEPATH + 'rev_nouns_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_nouns_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_verbs_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_verbs_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_propn_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_propn_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_adj_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_adj_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_polarity_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_polarity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_subjectivity_poor_bin.pickle', 'wb') as f:
    pickle.dump(rev_subjectivity_poor, f, protocol=pickle.HIGHEST_PROTOCOL)

# Good
with open(FILEPATH + 'rev_nouns_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_nouns_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_verbs_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_verbs_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_propn_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_propn_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_adj_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_adj_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_polarity_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_polarity_good, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(FILEPATH + 'rev_subjectivity_good_bin.pickle', 'wb') as f:
    pickle.dump(rev_subjectivity_good, f, protocol=pickle.HIGHEST_PROTOCOL)


In [38]:
# counter to count most frequently used nouns
# Revolut (Poor Reviews)
cnt_noun_rev_poor = Counter()
for noun_list in rev_nouns_poor:
    for word in noun_list:
        cnt_noun_rev_poor[word] += 1
print("Revolut Nouns (Poor)")
print(cnt_noun_rev_poor.most_common(50))

# Revolut (Good Reviews)
cnt_noun_rev_good = Counter()
for noun_list in rev_nouns_good:
    for word in noun_list:
        cnt_noun_rev_good[word] += 1
print("\nRevolut Nouns (Good)")
print(cnt_noun_rev_good.most_common(50))

# counter to count most frequently used verbs
# Revolut (Poor Reviews)
cnt_verb_rev_poor = Counter()
for verb_list in rev_verbs_poor:
    for word in verb_list:
        cnt_verb_rev_poor[word] += 1
print("\nRevolut Verbs (Poor)")
print(cnt_verb_rev_poor.most_common(50))

# Revolut (Good Reviews)
cnt_verb_rev_good = Counter()
for verb_list in rev_verbs_good:
    for word in verb_list:
        cnt_verb_rev_good[word] += 1
print("\nRevolut Verbs (Good)")
print(cnt_verb_rev_good.most_common(50))

############## counter to count most frequently used adjectives ########################
# Revolut (Poor Reviews)
cnt_adj_rev_poor = Counter()
for adj_list in rev_adj_poor:
    for word in adj_list:
        cnt_adj_rev_poor[word] += 1
print("\nRevolut Adjectives (Poor)")
print(cnt_adj_rev_poor.most_common(50))

# Revolut (Good Reviews)
cnt_adj_rev_good = Counter()
for adj_list in monzo_adj_good:
    for word in adj_list:
        cnt_adj_rev_good[word] += 1
print("\nRevolut Adjectives (Good)")
print(cnt_adj_rev_good.most_common(50))

############## counter to count most frequently used proper noun ########################
# Revolut (Poor Reviews)
cnt_propn_rev_poor = Counter()
for propn_list in rev_propn_poor:
    for word in propn_list:
        cnt_propn_rev_poor[word] += 1
print("\nRevolut Proper Noun (Poor)")
print(cnt_propn_rev_poor.most_common(50))

# Revolut (Good Reviews)
cnt_propn_rev_good = Counter()
for propn_list in rev_propn_good:
    for word in propn_list:
        cnt_propn_rev_good[word] += 1
print("\nRevolut Proper Noun (Good)")
print(cnt_propn_rev_good.most_common(50))


Revolut Nouns (Poor)
[('money', 2261), ('account', 2181), ('card', 739), ('i', 658), ('app', 529), ('customer service', 511), ('time', 431), ('people', 404), ('problem', 330), ('issue', 320), ('company', 278), ('friend', 278), ('support', 274), ('fund', 260), ('payment', 254), ('service', 244), ('email', 241), ('message', 237), ('chat', 221), ('money account', 216), ('document', 206), ('information', 205), ('reason', 200), ('customer', 199), ('transaction', 199), ('transfer', 192), ('agent', 185), ('work', 156), ('help', 152), ('business account', 146), ('customer support', 141), ('day', 137), ('review', 136), ('thing', 135), ('response', 130), ('business', 126), ('answer', 122), ('way', 121), ('question', 118), ('phone', 115), ('person', 114), ('hour', 111), ('they', 105), ('contact', 102), ('detail', 93), ('live chat', 90), ('refund', 85), ('proof', 85), ('fee', 84), ('it', 84)]

Revolut Nouns (Good)
[('money', 6581), ('card', 2652), ('app', 2156), ('problem', 1708), ('account', 1646