### Trustpilot Lexicon Sentiment Analysis

In [4]:
import numpy as np
import pandas as pd

# Reloading lexicon sentiment analysis csv built on trust pilot
trustpilot_lexicon_df = pd.read_csv("trust_pilot_lexicon_sentiment.csv")

In [5]:
trustpilot_lexicon_df.head()

Unnamed: 0,Topic,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14
0,1,together,imagine,assure,fraudulent,procedure,unknown,violate,reality,polite,opportunity,specialist,unhappy,damage,utensil
1,2,whole,resolution,deposit,manager,base,evidence,nowhere,hide,shock,line,space,dog,submit,difficult
2,3,nature,narrative,political,perpetuate,partially,ounce,oppose,noncare,prime,implore,instantly,ideal,propose,incidence
3,4,reimburse,phishing,model,cut,act,super,tour,glad,identity,slow,expire,afraid,dreadful,scammed
4,5,guy,positive,item,learn,past,human,tiny,video,corona,prevent,general,trustpilot,basement,summer


In [6]:
import lbsa

In [7]:
nrc_sa_lexicon = lbsa.get_lexicon('sa', language='english', source='nrc')

In [8]:
nrc_all_languages, tag_names = lbsa.load_nrc_lexicon()

In [9]:
nrc_all_languages

Unnamed: 0,english,arabic,basque,bengali,catalan,chinese (simplified),chinese (traditional),danish,dutch,esperanto,...,positive,negative,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
0,aback,الى الوراء,aback,পশ্চাতে,darrere,吓了一跳,嚇了一跳,overrasket,verrast,surprizita,...,0,0,0,0,0,0,0,0,0,0
1,abacus,طبلية تاج,abako,গণনা-যন্ত্রবিশেষ,àbac,算盘,算盤,abacus,abacus,abako,...,0,0,0,0,0,0,0,0,0,1
2,abandon,تخلى,alde batera utzi,বর্জিত করা,abandonar,放弃,放棄,opgive,verlaten,forlasos,...,0,1,0,0,0,1,0,1,0,0
3,abandoned,مهجور,abandonatu,পরিত্যক্ত,abandonat,弃,棄,forladte,verlaten,forlasita,...,0,1,1,0,0,1,0,1,0,0
4,abandonment,هجر,abandono,বিসর্জন,abandonament,放弃,放棄,opgivelse,verlatenheid,forlasas,...,0,1,1,0,0,1,0,1,1,0
5,abate,انحسر,abate,প্রকোপ হ্রাস করা,disminuir,杀,殺,aftage,verminderen,Abate,...,0,0,0,0,0,0,0,0,0,0
6,abatement,انحسار,murrizteko,উপশম,disminució,减排,減排,nedskæring,vermindering,abatement,...,0,0,0,0,0,0,0,0,0,0
7,abba,أبا,abba,Abba,abba,ABBA,ABBA,abba,abba,ABBA,...,1,0,0,0,0,0,0,0,0,0
8,abbot,رئيس الدير,abade,মঠাধ্যক্ষ,abat,方丈,方丈,abbed,abt,abato,...,0,0,0,0,0,0,0,0,0,1
9,abbreviate,اختصر,laburtu,খাট করা,abreujar,简略,簡略,forkorte,afkorten,mallongigi,...,0,0,0,0,0,0,0,0,0,0


In [10]:
tag_names

['positive',
 'negative',
 'anger',
 'anticipation',
 'disgust',
 'fear',
 'joy',
 'sadness',
 'surprise',
 'trust']

### Using lexicon model
Example:
Use nrc sentiment analysis lexicon

print(nrc_lexicon_model.process('frustrate')):
1. {'anger': 1, 'anticipation': 0, 'disgust': 1, 'fear': 0, 'joy': 0, 'sadness': 1, 'surprise': 0, 'trust': 0}

print(extractor.process('frustrate')):
2. [1. 0. 1. 0. 0. 1. 0. 0.]

In [11]:
trustpilot_lexicon_df

Unnamed: 0,Topic,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14
0,1,together,imagine,assure,fraudulent,procedure,unknown,violate,reality,polite,opportunity,specialist,unhappy,damage,utensil
1,2,whole,resolution,deposit,manager,base,evidence,nowhere,hide,shock,line,space,dog,submit,difficult
2,3,nature,narrative,political,perpetuate,partially,ounce,oppose,noncare,prime,implore,instantly,ideal,propose,incidence
3,4,reimburse,phishing,model,cut,act,super,tour,glad,identity,slow,expire,afraid,dreadful,scammed
4,5,guy,positive,item,learn,past,human,tiny,video,corona,prevent,general,trustpilot,basement,summer
5,6,airbnb,host,stay,get,refund,place,review,day,money,guest,book,use,take,leave
6,7,would,book,cancel,call,time,reservation,airbnb,try,ask,account,booking,say,tell,email
7,8,key,hard,appear,fire,law,sad,upset,attend,connect,sofa_be,violation,supply,deserve,remote
8,9,helpful,kind,bit,quite,conversation,stand,instruction,honestly,best,strongly,pandemic,manager,concerned,chain
9,10,card,fee,word,easy,access,text,traveller,policy,info,communicate,virus,similar,useless,log


In [35]:
nrc_lexicon_model = lbsa.create_sa_lexicon(source='nrc', language='english')
extractor = lbsa.FeatureExtractor(nrc_lexicon_model)

print('Feature names:')
print('{}\n'.format(extractor.feature_names))

Feature names:
['nrc_anger', 'nrc_anticipation', 'nrc_disgust', 'nrc_fear', 'nrc_joy', 'nrc_sadness', 'nrc_surprise', 'nrc_trust']



In [19]:
df = trustpilot_lexicon_df.drop('Topic', axis=1)

In [30]:
trust_pilot_topics = [" ".join(i) for i in df.values.tolist()]

In [31]:
topic_dict = dict()
for index, val in enumerate(trust_pilot_topics):
    topic_dict['topic' + str(index + 1)] = val

In [82]:
topic_dict

{'topic1': 'together imagine assure fraudulent procedure unknown violate reality polite opportunity specialist unhappy damage utensil',
 'topic2': 'whole resolution deposit manager base evidence nowhere hide shock line space dog submit difficult',
 'topic3': 'nature narrative political perpetuate partially ounce oppose noncare prime implore instantly ideal propose incidence',
 'topic4': 'reimburse phishing model cut act super tour glad identity slow expire afraid dreadful scammed',
 'topic5': 'guy positive item learn past human tiny video corona prevent general trustpilot basement summer',
 'topic6': 'airbnb host stay get refund place review day money guest book use take leave',
 'topic7': 'would book cancel call time reservation airbnb try ask account booking say tell email',
 'topic8': 'key hard appear fire law sad upset attend connect sofa_be violation supply deserve remote',
 'topic9': 'helpful kind bit quite conversation stand instruction honestly best strongly pandemic manager co

In [83]:
def get_lexicon_model(lexicon_dict):
    ANGER = 0
    ANTICIPATION = 0
    DISGUST = 0
    FEAR = 0
    JOY = 0
    SADNESS = 0
    SURPRISE = 0
    TRUST = 0
    new_dict = dict()
    for i in topic_dict[lexicon_dict].split(" "):
        lexicon_dict = nrc_lexicon_model.process(i)

        ANGER += lexicon_dict['anger']
        ANTICIPATION += lexicon_dict['anticipation']
        DISGUST += lexicon_dict['disgust']
        FEAR += lexicon_dict['fear']
        JOY += lexicon_dict['joy']
        SADNESS += lexicon_dict['sadness']
        SURPRISE += lexicon_dict['surprise']
        TRUST += lexicon_dict['trust']

    new_dict['anger'] = ANGER
    new_dict['anticipation'] = ANTICIPATION
    new_dict['disgust'] = DISGUST
    new_dict['fear'] = FEAR
    new_dict['joy'] = JOY
    new_dict['sadness'] = SADNESS
    new_dict['surprise'] = SURPRISE
    new_dict['trust'] = TRUST

    return new_dict

In [118]:
j =[]
for i in range(14):
    new_dict = get_lexicon_model('topic' + str(i + 1))
    print('topic' + str(i + 1) + ': ' + str(new_dict))
    j.append(new_dict)

topic1: {'anger': 3, 'anticipation': 2, 'disgust': 3, 'fear': 2, 'joy': 0, 'sadness': 2, 'surprise': 0, 'trust': 2}
topic2: {'anger': 1, 'anticipation': 1, 'disgust': 0, 'fear': 3, 'joy': 0, 'sadness': 0, 'surprise': 1, 'trust': 1}
topic3: {'anger': 0, 'anticipation': 1, 'disgust': 0, 'fear': 0, 'joy': 0, 'sadness': 0, 'surprise': 0, 'trust': 0}
topic4: {'anger': 1, 'anticipation': 2, 'disgust': 1, 'fear': 2, 'joy': 1, 'sadness': 2, 'surprise': 0, 'trust': 0}
topic5: {'anger': 0, 'anticipation': 0, 'disgust': 0, 'fear': 1, 'joy': 0, 'sadness': 0, 'surprise': 0, 'trust': 1}
topic6: {'anger': 1, 'anticipation': 1, 'disgust': 0, 'fear': 0, 'joy': 1, 'sadness': 1, 'surprise': 2, 'trust': 1}
topic7: {'anger': 0, 'anticipation': 1, 'disgust': 0, 'fear': 0, 'joy': 0, 'sadness': 1, 'surprise': 0, 'trust': 1}
topic8: {'anger': 3, 'anticipation': 1, 'disgust': 0, 'fear': 2, 'joy': 0, 'sadness': 2, 'surprise': 1, 'trust': 2}
topic9: {'anger': 0, 'anticipation': 0, 'disgust': 0, 'fear': 2, 'joy': 

In [119]:
j

[{'anger': 3,
  'anticipation': 2,
  'disgust': 3,
  'fear': 2,
  'joy': 0,
  'sadness': 2,
  'surprise': 0,
  'trust': 2},
 {'anger': 1,
  'anticipation': 1,
  'disgust': 0,
  'fear': 3,
  'joy': 0,
  'sadness': 0,
  'surprise': 1,
  'trust': 1},
 {'anger': 0,
  'anticipation': 1,
  'disgust': 0,
  'fear': 0,
  'joy': 0,
  'sadness': 0,
  'surprise': 0,
  'trust': 0},
 {'anger': 1,
  'anticipation': 2,
  'disgust': 1,
  'fear': 2,
  'joy': 1,
  'sadness': 2,
  'surprise': 0,
  'trust': 0},
 {'anger': 0,
  'anticipation': 0,
  'disgust': 0,
  'fear': 1,
  'joy': 0,
  'sadness': 0,
  'surprise': 0,
  'trust': 1},
 {'anger': 1,
  'anticipation': 1,
  'disgust': 0,
  'fear': 0,
  'joy': 1,
  'sadness': 1,
  'surprise': 2,
  'trust': 1},
 {'anger': 0,
  'anticipation': 1,
  'disgust': 0,
  'fear': 0,
  'joy': 0,
  'sadness': 1,
  'surprise': 0,
  'trust': 1},
 {'anger': 3,
  'anticipation': 1,
  'disgust': 0,
  'fear': 2,
  'joy': 0,
  'sadness': 2,
  'surprise': 1,
  'trust': 2},
 {'anger

In [121]:
trustpilot_lexicon_df['lexicon_sentiment'] = j

In [122]:
trustpilot_lexicon_df.head()

Unnamed: 0,Topic,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,lexicon_sentiment
0,1,together,imagine,assure,fraudulent,procedure,unknown,violate,reality,polite,opportunity,specialist,unhappy,damage,utensil,"{'anger': 3, 'anticipation': 2, 'disgust': 3, ..."
1,2,whole,resolution,deposit,manager,base,evidence,nowhere,hide,shock,line,space,dog,submit,difficult,"{'anger': 1, 'anticipation': 1, 'disgust': 0, ..."
2,3,nature,narrative,political,perpetuate,partially,ounce,oppose,noncare,prime,implore,instantly,ideal,propose,incidence,"{'anger': 0, 'anticipation': 1, 'disgust': 0, ..."
3,4,reimburse,phishing,model,cut,act,super,tour,glad,identity,slow,expire,afraid,dreadful,scammed,"{'anger': 1, 'anticipation': 2, 'disgust': 1, ..."
4,5,guy,positive,item,learn,past,human,tiny,video,corona,prevent,general,trustpilot,basement,summer,"{'anger': 0, 'anticipation': 0, 'disgust': 0, ..."


In [124]:
# Creating Lexicon Csv to store the topic 
import csv
CsvHeader = ['Topic', 'W1', 'W2', 'W3', 'W4', 'W5', 'W6','W7','W8','W9','W10','W11','W12','W13','W14','lexicon_sentiment']
with open('lexicon-sentiment2.csv', mode='a', newline='') as file_header:
        head_writer = csv.writer(file_header, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        head_writer.writerow(CsvHeader)

In [125]:
for word in trustpilot_lexicon_df.values.tolist():
    print(word)
    with open('lexicon-sentiment2.csv', mode='a', newline='') as file_header:
        head_writer = csv.writer(file_header, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        head_writer.writerow(word)

[1, 'together', 'imagine', 'assure', 'fraudulent', 'procedure', 'unknown', 'violate', 'reality', 'polite', 'opportunity', 'specialist', 'unhappy', 'damage', 'utensil', {'anger': 3, 'anticipation': 2, 'disgust': 3, 'fear': 2, 'joy': 0, 'sadness': 2, 'surprise': 0, 'trust': 2}]
[2, 'whole', 'resolution', 'deposit', 'manager', 'base', 'evidence', 'nowhere', 'hide', 'shock', 'line', 'space', 'dog', 'submit', 'difficult', {'anger': 1, 'anticipation': 1, 'disgust': 0, 'fear': 3, 'joy': 0, 'sadness': 0, 'surprise': 1, 'trust': 1}]
[3, 'nature', 'narrative', 'political', 'perpetuate', 'partially', 'ounce', 'oppose', 'noncare', 'prime', 'implore', 'instantly', 'ideal', 'propose', 'incidence', {'anger': 0, 'anticipation': 1, 'disgust': 0, 'fear': 0, 'joy': 0, 'sadness': 0, 'surprise': 0, 'trust': 0}]
[4, 'reimburse', 'phishing', 'model', 'cut', 'act', 'super', 'tour', 'glad', 'identity', 'slow', 'expire', 'afraid', 'dreadful', 'scammed', {'anger': 1, 'anticipation': 2, 'disgust': 1, 'fear': 2, '

In [126]:
# Reloading lexicon sentiment analysis csv built on trust pilot
sentiment_lexicon_df = pd.read_csv("lexicon-sentiment2.csv")
sentiment_lexicon_df.head()

Unnamed: 0,Topic,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,lexicon_sentiment
0,1,together,imagine,assure,fraudulent,procedure,unknown,violate,reality,polite,opportunity,specialist,unhappy,damage,utensil,"{'anger': 3, 'anticipation': 2, 'disgust': 3, ..."
1,2,whole,resolution,deposit,manager,base,evidence,nowhere,hide,shock,line,space,dog,submit,difficult,"{'anger': 1, 'anticipation': 1, 'disgust': 0, ..."
2,3,nature,narrative,political,perpetuate,partially,ounce,oppose,noncare,prime,implore,instantly,ideal,propose,incidence,"{'anger': 0, 'anticipation': 1, 'disgust': 0, ..."
3,4,reimburse,phishing,model,cut,act,super,tour,glad,identity,slow,expire,afraid,dreadful,scammed,"{'anger': 1, 'anticipation': 2, 'disgust': 1, ..."
4,5,guy,positive,item,learn,past,human,tiny,video,corona,prevent,general,trustpilot,basement,summer,"{'anger': 0, 'anticipation': 0, 'disgust': 0, ..."
