## Imports

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from ast import literal_eval
import seaborn as sns
from tqdm import tqdm

from keras.datasets import imdb
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix
from sklearn.cluster import KMeans
from keras.datasets import imdb

sns.set_style('whitegrid')
plt.rcParams.update({'font.size': 24})

## Load data

In [None]:
df = pd.read_csv('results.csv', index_col=False)

del df['Unnamed: 0']

a = []

df['target'] = df['target'].apply(literal_eval)
joe = df['target'].values
for v in joe:
    a.append(np.array(v))
joe = np.array(a).argmax(1)
df['target'] = joe

df['mean0'] = df['mean0'].astype('float64')
df['std0'] = df['std0'].astype('float64')
df['mean1'] = df['mean1'].astype('float64')
df['std1'] = df['std1'].astype('float64')
display(df.head())

In [None]:
def classify(x, threshold):
    '''
    This function classifies a given review as the
    class with the highest mean, if the std is lower
    than the given threshold. If not, it returns -1
    '''
    mean = np.array([x['mean0'], x['mean1']])
    std = np.array([x['std0'], x['std1']])
    if (std[mean.argmax()] <= threshold):
        return mean.argmax()
    else:
        return -1

In [None]:
print('Max std')
df['std1'].max()

## Precision and recall trade off
This cell runs the classify function for a range of given thresholds and plots the precision and recall curve given the threshold.

In [None]:
thresholds = list(np.arange(0,0.2,0.005))
precisions = []
recalls = []

for t in tqdm(thresholds):
    testdf = df
    testdf['test'] = testdf.apply(classify, args=[t], axis=1)
    certain = df[df['test']!=-1]
    precision = certain['target'] == certain['test']
    recall = len(certain)/12500
    precisions.append(precision.mean())
    recalls.append(recall)

datf = pd.DataFrame({'threshold':thresholds[1:], 'precision':precisions[1:], 'recall':recalls[1:]})
fig, ax = plt.subplots(figsize=(15,10))
sns.lineplot(data=datf, x='threshold', y='precision')
sns.lineplot(data=datf, x='threshold', y='recall')
plt.title('Precision and Recall with uncertainty threshold')
plt.legend(['precision','recall'])
ax.set_xlabel('Uncertainty threshold')
ax.set_ylabel('Precision/recall')
plt.show()

Uncomment to sort the dataframe on how certain it is

In [None]:
sortd = df
# df['test'] = df.apply(classify, args=[1], axis=1)
# sortd = df.sort_values(['std0'])
# sortd = sortd[sortd['target'] != sortd['test']]
# sortd = sortd.reset_index(drop=True)
# sortd.head()

## Qualitative analysis
The cells below show the results for each specific review in the 'sortd' dataframe. It shows the review and an error bar plot showing the mean and std of the prediction for each class. The green colored class is the ground truth.

In [None]:
i = 0

In [None]:
print('Index:',i)
s = sortd.iloc[i]
print('\nReview:\n')
print(s['text'].strip())
if s['target'] == 0:
    colors = ['green', 'red']
else:
    colors = ['red', 'green']

x_0 = [0]
x_1 = [1]
y_0 = [s['mean0']]
y_1 = [s['mean1']]
err_0 = [s['std0']*2]
err_1 = [s['std1']*2]
plt.figure(figsize=(4,8))
plt.errorbar(x_0, y_0, yerr=err_0, fmt='o', capsize=8, color=colors[0])
plt.errorbar(x_1, y_1, yerr=err_1, fmt='o', capsize=8, color=colors[1])

plt.xticks([0, 1], ['negative', 'positive'])
plt.ylim([0,1])
plt.xlim([-0.5, 1.5])
plt.xlabel('Class')
plt.ylabel('Probability')

i += 1

## Confusion matrices

In [None]:
df['y_pred'] = df.apply(classify, args=[1], axis=1)

tdf = df[df['y_pred'] != -1]

cm = confusion_matrix(tdf['target'].values, tdf['y_pred'].values)

df_cm = pd.DataFrame(cm, index = ['negative', 'positive'],
                  columns = ['predicted negative', 'predicted positive'])


fig, ax = plt.subplots(figsize=(15,10))
sns.heatmap(df_cm, annot=True, fmt='g', cmap='GnBu', ax=ax)
plt.yticks(np.arange(2)+0.5,['actual negative', 'actual positive'], va='center')
plt.title('Confusion matrix without an uncertainty threshold')
plt.show()

In [None]:
df['y_pred'] = df.apply(classify, args=[0.075], axis=1)

tdf = df[df['y_pred'] != -1]

cm = confusion_matrix(tdf['target'].values, tdf['y_pred'].values)

df_cm = pd.DataFrame(cm, index = ['negative', 'positive'],
                  columns = ['predicted negative', 'predicted positive'])

fig, ax = plt.subplots(figsize=(15,10))
sns.heatmap(df_cm, annot=True, fmt='g', cmap='GnBu', ax=ax)
plt.yticks(np.arange(2)+0.5,['actual negative', 'actual positive'], va='center')
plt.title('Confusion matrix with an uncertainty threshold of 0.075')
plt.show()

## Naive symbolic approach

In [None]:
# sentiment lexicon
# !wget https://gist.githubusercontent.com/bastings/d6f99dcb6c82231b94b013031356ba05/raw/f80a0281eba8621b122012c89c8b5e2200b39fd6/sent_lexicon

In [None]:
# make a dictionary where {word1: sentiment1, word2: sentiment2}, extracted from the sentiment lexicon

def get_pripo_type():

    with open("sent_lexicon", mode="r", encoding="utf-8") as f:
        lex = f.readlines()

    lex_pripo = {}
    lex_type = {}

    for line in lex:
        holder = []
        for entry in line.split(' '):
            holder.append(entry)

        tag, word = holder[2].split('=')
        tag, lex_pripo[word] = holder[5].split('=')
        lex_pripo[word] = lex_pripo[word][0:-1]
        tag, lex_type[word] = holder[0].split('=')
    return lex_pripo, lex_type

lex_pripo, _ = get_pripo_type()

In [None]:
# classify a review with a fixed threshold
def classify_review(review):
    score = 0
    for word in review.split():
        try:
            if lex_pripo[word] == 'positive':
                score += 1
            if lex_pripo[word] == 'negative':
                score += -1
        except:
            pass
    value = 0
    if score > 3:
        value = 1
    return value

# count the sentiment score of a review
def count_lex(review):
    score = 0
    for word in review.split():
        try:
            if lex_pripo[word] == 'positive':
                score += 1
            if lex_pripo[word] == 'negative':
                score += -1
        except:
            pass
    return score

In [None]:
# normalize the dictionary values
def norm_dict(my_dict):
    factor = 1.0 / sum(my_dict.values())
    for i in my_dict:
        my_dict[i] = my_dict[i] * factor
    return my_dict

# used threshold for model uncertainty
threshold = 0.075

all_reviews = 0
symbolic = 0

symbolic_uncertain_correct = []
symbolic_uncertain_incorrect = []

symbolic_missclass_correct = []
symbolic_missclass_incorrect = []

bad = []
good = []

certain = []
uncertain = []

all_list = []
all_dict = dict.fromkeys(range(-22, 35), 0)

for index, row in df.iterrows():

    all_reviews += 1

    review = row['text']    
    target = row['target']
    std = np.array([row['std0'], row['std1']])
    prediction = np.array([row['mean0'], row['mean1']]).argmax()
    
    sentiment = classify_review(review)
    sent_score = count_lex(review)
    
    all_dict[sent_score] += 1
    all_list.append(sent_score)
    
    if sentiment == target:
        symbolic += 1
        
    # in the case that our model is uncertain:
    if std[prediction] > threshold:
        
        uncertain.append(sent_score)
        
        if sentiment == target:
            symbolic_uncertain_correct.append(sent_score)
        else:
            symbolic_uncertain_incorrect.append(sent_score)

    # in the case that our model is certain:
    else:
        certain.append(sent_score)
    
    # in the case that our model makes a wrong prediction
    if prediction != target:
        bad.append(sent_score)
        
        if sentiment == target:
            symbolic_missclass_correct.append(sent_score)
        else:
            symbolic_missclass_incorrect.append(sent_score)

    # in the case that our model makes a good prediction
    else:
        good.append(sent_score)
        

all_dict = norm_dict(all_dict)

# plot the stats

print('mean sent score:', sum(all_list)/len(all_list))
print('accuracy given by symbolic approach over whole test set:', symbolic/all_reviews)

print('-------------------------------')
        
print("when our model was correct we achieve an average sent score of:", sum(good)/len(good), len(good))
print('when our model was incorrect we achieve an average sent score of:', sum(bad)/len(bad), len(bad))
a = np.array(good)
b = np.array(bad)
print('std correct', a.std())
print('std incorrect', b.std())

print('when our model was certain we achieve an average sent score of:', sum(certain)/len(certain), len(certain))
print('when our model was uncertain we achieve an average sent score of:', sum(uncertain)/len(uncertain), len(uncertain))
a = np.array(certain)
b = np.array(uncertain)
print('std certain', a.std())
print('std uncertain', b.std())

print('-------------------------------')
        
print('number of correclty classified by naive approach when model was wrong:', len(symbolic_missclass_correct))
print('mean sent score:', sum(symbolic_missclass_correct)/len(symbolic_missclass_correct))
print('number of incorrectly classified by naive approach when model was wrong', len(symbolic_missclass_incorrect))
print('mean sent score', sum(symbolic_missclass_incorrect)/len(symbolic_missclass_incorrect))

print('-------------------------------')

print('number of correclty classified by naive approach when model was uncertain', len(symbolic_uncertain_correct))
print('mean sent_score:', sum(symbolic_uncertain_correct)/len(symbolic_uncertain_correct))
print('number of uncorreclty classified by naive approach when model was uncertain', len(symbolic_uncertain_incorrect))
print('mean sent_score:', sum(symbolic_uncertain_incorrect)/len(symbolic_uncertain_incorrect))
        

# plot the distribution

fig, ax = plt.subplots(figsize=(15,10))
plt.bar(range(len(all_dict)), list(all_dict.values()), align='center')

x_labels = list(all_dict.keys())
x_labels = range(min(x_labels), max(x_labels), 5)

plt.xticks(range(0, len(all_dict), 5), list(x_labels), fontsize=16)

plt.xlabel('Sentiment score', fontsize=22)
plt.ylabel('Percentage of reviews', fontsize=22)
plt.show()