**Sentiment Analysis**

**I. Individual Analysis**

**Unified Data**

*General Imports*

In [1]:
import nltk
from random import shuffle
from sklearn.linear_model import LogisticRegression
from nltk.metrics.scores import (precision, recall, accuracy)
#from sklearn.tree import DecisionTreeClassifier
#from sklearn.ensemble import RandomForestClassifier

**Features Construction**

Useful functions to load the different features models

In [2]:
def load_bow(thefeatures,filename):
    '''
    Loads the tags in the filename and appends them to the
    features list.
    '''
    f = open(filename, "r")
    line = f.readline()
    while len(line) >0:
        splited = line.split(' ')
        feats = {}
        for word in range(len(splited)-1):
            theword = splited[word].split(":")
            feats[theword[0]] = int(theword[1])
        tag = splited[-1].split("#:")[1][0:3]
        thefeatures.append((feats,tag))
        line = f.readline()
    f.close()

In [3]:
def load_binary_bow(thefeatures,filename):
    '''
    Loads the tags in the filename and appends them to the
    features list. Filters them to change it to 1 for each 
    existent element in the Bag of Words.
    '''
    f = open(filename, "r")
    line = f.readline()
    while len(line) >0:
        splited = line.split(' ')
        feats = {}
        for word in range(len(splited)-1):
            theword = splited[word].split(":")
            feats[theword[0]] = 1
        tag = splited[-1].split("#:")[1][0:3]
        thefeatures.append((feats,tag))
        line = f.readline()
    f.close()

In [4]:
def load_lexicon(thefeatures,filename):
    '''
    Loads the tags process them to create the Features based on the
    AFINN-111 sentiment lexicon.
    '''
    lex = load_affin()
    f = open(filename, "r")
    line = f.readline()
    while len(line) >0:
        splited = line.split(' ')
        feats1 = {}
        summa = 0
        negga = 0
        pos = 0
        neg = 0
        tot = 0
        
        for word in range(len(splited)-1):
            add = False
            theword = splited[word].split(":")
            lilwords = theword[0].split("_")
            for lw in lilwords:
                if lw in lex:
                    add = True
                    if lex[lw] > 0:
                        summa += lex[lw]*int(theword[1])
                        pos += int(theword[1])
                    else:
                        negga += lex[lw]*int(theword[1])
                        neg += int(theword[1])
            tot += int(theword[1])
        feats1["point_pos"] = summa
        feats1["point_neg"] = -negga
        feats1["count_pos"] = pos
        feats1["count_neg"] = neg
        feats1["size"] = tot
        feats1["porc_pos"] = pos/tot
        feats1["porc_neg"] = neg/tot
        tag = splited[-1].split("#:")[1][0:3]
        thefeatures.append((feats1,tag))
        line = f.readline()
    f.close()

def load_affin():
    '''
    Loads the lexicon of sentiment words included in the AFINN-11 lexicon
    '''
    f = open("AFINN-111.txt", "r")
    line = f.readline()
    affin = {}
    while len(line) >0:
        splited = line.split('\t')
        affin[splited[0]] = int(splited[1])
        line = f.readline()
    return affin

*Additional Function*

Shows the most informative features for the Linear Regression Models.

Taken from the answers to: https://stackoverflow.com/questions/11116697/how-to-get-most-informative-features-for-scikit-learn-classifiers Kudos to him.

In [5]:
def show_most_informative_features(vectorizer, clf, n=20):
    '''
    Gets the most informative features from a Linear Regression Model
    '''
    feature_names = vectorizer.get_feature_names()
    coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
    top = zip(coefs_with_fns[:n], coefs_with_fns[:-(n + 1):-1])
    for (coef_1, fn_1), (coef_2, fn_2) in top:
        print ("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (coef_1, fn_1, coef_2, fn_2))

**BoW**

Loads the data and creates the feature to train. Loads and uses both review files.

In [6]:
BoWFeatures = []

#Loads the positive reviews
load_bow(BoWFeatures,"reviews/books/positive.review")
load_bow(BoWFeatures,"reviews/dvd/positive.review")
load_bow(BoWFeatures,"reviews/electronics/positive.review")
load_bow(BoWFeatures,"reviews/kitchen/positive.review")

#Loads the negative reviews 
load_bow(BoWFeatures,"reviews/books/negative.review")
load_bow(BoWFeatures,"reviews/dvd/negative.review")
load_bow(BoWFeatures,"reviews/electronics/negative.review")
load_bow(BoWFeatures,"reviews/kitchen/negative.review")

print("Loaded",len(BoWFeatures),"reviews")

Loaded 8000 reviews


Loads the data and creates the feature to test the classifier. 

In [7]:
BoWTesting =[]

#Loads the testing reviews 
load_bow(BoWTesting,"reviews/books/unlabeled.review")
load_bow(BoWTesting,"reviews/dvd/unlabeled.review")
load_bow(BoWTesting,"reviews/electronics/unlabeled.review")
load_bow(BoWTesting,"reviews/kitchen/unlabeled.review")

print("Loaded",len(BoWTesting),"reviews")

Loaded 19677 reviews


**Binary BoW**

Loads the data and creates the feature to train. Loads and uses both review files. Filters them to change it to 1 for each existent element in the Bag of Words.

In [8]:
BinaryBoWFeatures = []

#Loads the positive reviews
load_binary_bow(BinaryBoWFeatures,"reviews/books/positive.review")
load_binary_bow(BinaryBoWFeatures,"reviews/dvd/positive.review")
load_binary_bow(BinaryBoWFeatures,"reviews/electronics/positive.review")
load_binary_bow(BinaryBoWFeatures,"reviews/kitchen/positive.review")

#Loads the negative reviews 
load_binary_bow(BinaryBoWFeatures,"reviews/books/negative.review")
load_binary_bow(BinaryBoWFeatures,"reviews/dvd/negative.review")
load_binary_bow(BinaryBoWFeatures,"reviews/electronics/negative.review")
load_binary_bow(BinaryBoWFeatures,"reviews/kitchen/negative.review")

print("Loaded",len(BinaryBoWFeatures),"reviews")

Loaded 8000 reviews


Loads the data and creates the feature to test the classifier. 

In [9]:
BinaryBoWTesting =[]

#Loads the testing reviews 
load_binary_bow(BinaryBoWTesting,"reviews/books/unlabeled.review")
load_binary_bow(BinaryBoWTesting,"reviews/dvd/unlabeled.review")
load_binary_bow(BinaryBoWTesting,"reviews/electronics/unlabeled.review")
load_binary_bow(BinaryBoWTesting,"reviews/kitchen/unlabeled.review")

print("Loaded",len(BinaryBoWTesting),"reviews")

Loaded 19677 reviews


**Lexicon Features**

Loads the data and creates the feature to train. Loads and uses both review files. Takes into use the following Features:

- Number of positive words
- Number of negative words
- Positive score following the AFINN-111 scores
- Negative score following the AFINN-111 scores
- Size (number of words) of the review
- Percentage of positive words
- Percentage of negative words

In [10]:
LexiconFeatures = []

#Loads the positive reviews
load_lexicon(LexiconFeatures,"reviews/books/positive.review")
load_lexicon(LexiconFeatures,"reviews/dvd/positive.review")
load_lexicon(LexiconFeatures,"reviews/electronics/positive.review")
load_lexicon(LexiconFeatures,"reviews/kitchen/positive.review")

#Loads the negative reviews 
load_lexicon(LexiconFeatures,"reviews/books/negative.review")
load_lexicon(LexiconFeatures,"reviews/dvd/negative.review")
load_lexicon(LexiconFeatures,"reviews/electronics/negative.review")
load_lexicon(LexiconFeatures,"reviews/kitchen/negative.review")

print("Loaded",len(LexiconFeatures),"reviews")

Loaded 8000 reviews


Loads the data and creates the feature to test the classifier. 

In [11]:
LexiconTesting =[]

#Loads the testing reviews 
load_lexicon(LexiconTesting,"reviews/books/unlabeled.review")
load_lexicon(LexiconTesting,"reviews/dvd/unlabeled.review")
load_lexicon(LexiconTesting,"reviews/electronics/unlabeled.review")
load_lexicon(LexiconTesting,"reviews/kitchen/unlabeled.review")

print("Loaded",len(LexiconTesting),"reviews")

Loaded 19677 reviews


**Testing Zone**

Now we are gonna test the six different combinations of Features and classification algorithms

**BoW - Naive Bayes**

Trains the dataset over a Naive Bayes with a Bag of Words Model.

**Training**

In [12]:
shuffle(BoWFeatures)
classifier = nltk.NaiveBayesClassifier.train(BoWFeatures)
classifier.show_most_informative_features(10)

Most Informative Features
             don't_waste = 1                 neg : pos    =     70.3 : 1.0
              waste_your = 1                 neg : pos    =     40.1 : 1.0
                waste_of = 1                 neg : pos    =     33.2 : 1.0
           not_recommend = 1                 neg : pos    =     31.0 : 1.0
                  refund = 1                 neg : pos    =     30.2 : 1.0
                 of_junk = 1                 neg : pos    =     29.0 : 1.0
            poor_quality = 1                 neg : pos    =     25.7 : 1.0
               not_worth = 1                 neg : pos    =     23.7 : 1.0
             great_value = 1                 pos : neg    =     23.7 : 1.0
                 a_waste = 1                 neg : pos    =     23.6 : 1.0


**Validating**

In [13]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BoWFeatures):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For Validating set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For Validating set:
Accuracy: 0.99875
Precision: 0.9989994997498749
Recall: 0.9985
F1 Score: 0.9987496874218555


**Testing**

In [14]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BoWTesting):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For testing set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For testing set:
Accuracy: 0.868679168572445
Precision: 0.882574963304676
Recall: 0.8518518518518519
F1 Score: 0.866941297631308


**BoW - Linear Regression**

Trains the dataset over a Linear Regression with a Bag of Words Model.

**Training**

In [15]:
shuffle(BoWFeatures)
classifier = nltk.classify.SklearnClassifier(LogisticRegression(max_iter=1000))
classifier.train(BoWFeatures)

<SklearnClassifier(LogisticRegression(max_iter=1000))>

**Validating**

In [16]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BoWFeatures):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For Validating set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For Validating set:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


**Testing**

In [17]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BoWTesting):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For testing set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For testing set:
Accuracy: 0.8744727346648371
Precision: 0.8734381297863765
Recall: 0.8771503744181339
F1 Score: 0.8752903160658386


In [18]:
show_most_informative_features(classifier._vectorizer,classifier._clf,10)  

	-1.4868	disappointed   		1.7761	excellent      
	-1.3615	poor           		1.4369	great          
	-1.3520	disappointing  		1.2640	perfect        
	-1.2462	boring         		1.0984	best           
	-1.2190	bad            		0.9337	easy           
	-1.2162	terrible       		0.8940	love           
	-1.1844	waste          		0.8909	fantastic      
	-1.1553	worst          		0.8703	works          
	-1.1539	disappointment 		0.8244	a_must         
	-0.9744	not            		0.8161	wonderful      


**Binary BoW - Naive Bayes**

Trains the dataset over a Naive Bayes with a Binary Bag of Words Model.

**Training**

In [19]:
shuffle(BinaryBoWFeatures)
classifier = nltk.NaiveBayesClassifier.train(BinaryBoWFeatures)
classifier.show_most_informative_features(10)

Most Informative Features
             don't_waste = 1                 neg : pos    =     71.0 : 1.0
              waste_your = 1                 neg : pos    =     41.9 : 1.0
                waste_of = 1                 neg : pos    =     35.4 : 1.0
           not_recommend = 1                 neg : pos    =     32.2 : 1.0
                 of_junk = 1                 neg : pos    =     31.7 : 1.0
            poor_quality = 1                 neg : pos    =     31.0 : 1.0
               not_worth = 1                 neg : pos    =     24.3 : 1.0
                 a_waste = 1                 neg : pos    =     24.1 : 1.0
                  refund = 1                 neg : pos    =     23.9 : 1.0
              it_started = 1                 neg : pos    =     21.7 : 1.0


**Validating**

In [20]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BinaryBoWFeatures):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For Validating set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For Validating set:
Accuracy: 0.99875
Precision: 0.9992492492492493
Recall: 0.99825
F1 Score: 0.9987493746873436


**Testing**

In [21]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BinaryBoWTesting):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For testing set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For testing set:
Accuracy: 0.8688824515932306
Precision: 0.8844776748104465
Recall: 0.8499291641368144
F1 Score: 0.8668593250077407


**Binary BoW - Linear Regression**

Trains the dataset over a Linear Regression with a Binary Bag of Words Model.

**Training**

In [22]:
shuffle(BinaryBoWFeatures)
classifier = nltk.classify.SklearnClassifier(LogisticRegression(max_iter=1000))
classifier.train(BinaryBoWFeatures)

<SklearnClassifier(LogisticRegression(max_iter=1000))>

**Validating**

In [23]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BinaryBoWFeatures):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For Validating set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For Validating set:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


**Testing**

In [24]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(BinaryBoWTesting):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For testing set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For testing set:
Accuracy: 0.8759465365655333
Precision: 0.8772178850248403
Recall: 0.8755312689738919
F1 Score: 0.8763737655102558


In [25]:
show_most_informative_features(classifier._vectorizer,classifier._clf,10)  

	-1.5098	disappointed   		1.8732	excellent      
	-1.4847	poor           		1.6654	great          
	-1.3389	disappointing  		1.2843	perfect        
	-1.2802	not            		1.1470	best           
	-1.2759	terrible       		0.9637	easy           
	-1.2590	bad            		0.9520	love           
	-1.2569	waste          		0.9388	the_best       
	-1.2490	worst          		0.9299	works          
	-1.2166	boring         		0.8940	fantastic      
	-1.1517	disappointment 		0.8776	awesome        


**Lexicon Features - Naive Bayes**

Trains the dataset over a Naive Bayes with a Lexicon Features Model.

**Training**

In [26]:
shuffle(LexiconFeatures)
classifier = nltk.NaiveBayesClassifier.train(LexiconFeatures)
classifier.show_most_informative_features(10)

Most Informative Features
               count_pos = 4                 pos : neg    =     27.0 : 1.0
               count_neg = 10                neg : pos    =     19.8 : 1.0
               count_neg = 13                neg : pos    =     17.0 : 1.0
               count_neg = 16                neg : pos    =     14.3 : 1.0
               count_neg = 7                 neg : pos    =     11.4 : 1.0
               point_neg = 37                neg : pos    =     11.0 : 1.0
                porc_pos = 0.04838709677419355    neg : pos    =      9.7 : 1.0
                porc_pos = 0.3333333333333333    pos : neg    =      9.7 : 1.0
                porc_neg = 0.039473684210526314    neg : pos    =      9.0 : 1.0
                porc_neg = 0.054878048780487805    neg : pos    =      9.0 : 1.0


**Validating**

In [27]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(LexiconFeatures):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For Validating set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For Validating set:
Accuracy: 0.847625
Precision: 0.8505167632972019
Recall: 0.8435
F1 Score: 0.8469938496297226


**Testing**

In [28]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(LexiconTesting):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For testing set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For testing set:
Accuracy: 0.7138791482441429
Precision: 0.7154002026342452
Recall: 0.7145314713620724
F1 Score: 0.7149655731065209


**Lexicon Features - Linear Regression**

Trains the dataset over a Linear Regression with a Lexicon Features Model.

**Training**

In [29]:
shuffle(LexiconFeatures)
classifier = nltk.classify.SklearnClassifier(LogisticRegression(max_iter=1000))
classifier.train(LexiconFeatures)

<SklearnClassifier(LogisticRegression(max_iter=1000))>

**Validating**

In [30]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(LexiconFeatures):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For Validating set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For Validating set:
Accuracy: 0.7285
Precision: 0.7400210084033614
Recall: 0.7045
F1 Score: 0.7218237704918034


**Testing**

In [31]:
refsets = nltk.collections.defaultdict(set)
testsets = nltk.collections.defaultdict(set)
real = []
result = []

for i, (feats, label) in enumerate(LexiconTesting):
    refsets[label].add(i)
    observed = classifier.classify(feats)
    testsets[observed].add(i)
    real.append(label)
    result.append(observed)

print( 'For testing set:')
print( 'Accuracy:', nltk.accuracy(real,result) )
print( 'Precision:', nltk.scores.precision(refsets['pos'], testsets['pos']) )
print( 'Recall:', nltk.recall(refsets['pos'], testsets['pos']) )
print( 'F1 Score:', nltk.f_measure(refsets['pos'], testsets['pos']) )

For testing set:
Accuracy: 0.7294302993342481
Precision: 0.7325510204081632
Recall: 0.7264723740133576
F1 Score: 0.7294990346509501


In [32]:
show_most_informative_features(classifier._vectorizer,classifier._clf,10)  

	-7.1087	porc_neg       		9.1354	porc_pos       
	-0.0907	count_pos      		0.0542	point_pos      
	-0.0278	point_neg      		0.0018	size           
	0.0014	count_neg      		0.0014	count_neg      
	0.0018	size           		-0.0278	point_neg      
	0.0542	point_pos      		-0.0907	count_pos      
	9.1354	porc_pos       		-7.1087	porc_neg       


**Table of Results - Unified Data**

| Combination | Accuracy | Precision | Recall | F1 Score|
| --- | ----------- |----------- |----------- |----------- |
| **BOW - NB** | 0.868679168572445 | 0.882574963304676 | 0.8518518518518519 | 0.866941297631308 |
| **BOW - LR** | 0.8744727346648371 | 0.8734381297863765 | **0.8771503744181339** | 0.8752903160658386 |
| **BBOW - NB** | 0.8688824515932306 | **0.8844776748104465** | 0.8499291641368144 | 0.8668593250077407 |
| **BBOW - LR** | **0.8759465365655333** | 0.8772178850248403 | 0.8755312689738919 | **0.8763737655102558** |
| **Lexicon - NB** | 0.7138791482441429 | 0.7154002026342452 | 0.7145314713620724 | 0.7149655731065209 |
| **Lexicon - LR** | 0.7294302993342481 | 0.7325510204081632 | 0.7264723740133576 | 0.7294990346509501 |
