# SP Model Training

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from glove import *
from myLayers import CustomAttention, Projection, MaskSum, WordAspectFusion

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
import tensorflow as tf
from tensorflow import keras

## Load test dataset and set data types

In [3]:
reviews = pd.read_csv('../data/reviews_revision_test.csv', index_col=0)

reviews['ids'] = reviews['ids'].apply(lambda x: list(map(int, x[1:-1].split(', '))))
reviews['meta_review_pros'] = reviews['meta_review_pros'].apply(lambda x: x[2:-2].split('\', \''))
reviews['meta_review_so-so'] = reviews['meta_review_so-so'].apply(lambda x: x[2:-2].split('\', \''))
reviews['meta_review_cons'] = reviews['meta_review_cons'].apply(lambda x: x[2:-2].split('\', \''))
reviews['meta_review_labels'] = reviews['meta_review_labels'].apply(lambda x: x[2:-2].split('\', \''))

## Load GloVe model

In [4]:
glove_model = GloveModel.from_pretrained('../trained_models/el.glove.300.txt')

## Load SP model and SP model with attention output

In [5]:
custom_objects = {'MaskSum': MaskSum, 'WordAspectFusion': WordAspectFusion,
                  'CustomAttention': CustomAttention, 'Projection': Projection}
SP_model = keras.models.load_model('../trained_models/SP_model.h5',
                                   custom_objects=custom_objects)
SP_attention_model = keras.models.load_model('../trained_models/SP_attention_model.h5',
                                             custom_objects=custom_objects)

## Reformat dataset rows to have a query aspect and a target aspect sentiment

In [6]:
# Expand the list of labels to separate rows and build a labels df
labels = reviews['meta_review_labels'].apply(pd.Series).stack().rename('meta_review_labels').reset_index()

# Join the labels df to the original df
reviews = pd.merge(labels, reviews, left_on='level_0', right_index=True, suffixes=(['','_old']))[reviews.columns]

# Rename column
reviews = reviews.rename(columns={'meta_review_labels': 'aspect'})

# Add product type as a prefix to aspect
reviews['aspect_prefixed'] = reviews['meta_product_type'] + ' ' + reviews['aspect']

# If aspect is 'Σχέση ποιότητας τιμής' make prefix 'Γενικά'
reviews.loc[reviews['aspect'] == 'Σχέση ποιότητας τιμής', 'aspect_prefixed'] = 'Γενικά Σχέση ποιότητας τιμής'

## Read top labels (balanced labels with many instances) and drop the rest

In [7]:
with open('../data/top_labels.txt', 'r', encoding='utf-8') as f:
    f_lines = f.readlines()
    top_labels = []
    for i in f_lines:
        top_labels.append(i.strip().replace('#', ' ').replace('_', ' '))
        
# Drop unbalanced aspects
condition = False
for label in top_labels:
    condition |= (reviews['aspect_prefixed'] == label)
condition = ~condition
    
reviews.drop(index=reviews[condition].index.tolist(), inplace=True)
reviews.reset_index(inplace=True)

## Get aspect ids using GloVe model

In [8]:
reviews['aspect_ids'] = reviews['aspect_prefixed'].apply(glove_model.string_to_ids)

In [9]:
reviews.sample(5)

Unnamed: 0,index,text,meta_product_type,meta_review_pros,meta_review_so-so,meta_review_cons,aspect,ids,ids_length,aspect_prefixed,aspect_ids
23,28,παιδια μακρια απο αυτα διοτη εχουν τεραστιο πρ...,Gaming Headsets,"[Ποιότητα ήχου, Μπάσο]",[],"[Ποιότητα Μικροφώνου, Ποιότητα κατασκευής]",Ποιότητα Μικροφώνου,"[151, 921, 13, 99, 314596, 59, 2300, 328, 10, ...",59,Gaming Headsets Ποιότητα Μικροφώνου,"[10459, 143766, 751, 34972]"
105,125,3 μήνες λειτουργίας το κινητό..από την αρχή εί...,Κινητά Τηλέφωνα,"[Φωτογραφίες, Καταγραφή Video]",[],"[Ταχύτητα, Σχέση ποιότητας τιμής]",Φωτογραφίες,"[408, 560, 4, 2224, 2, 2, 13, 9, 316, 515, 337...",119,Κινητά Τηλέφωνα Φωτογραφίες,"[4367, 3730, 644]"
37,42,Η οθόνη και η μπαταρία είναι τα δυνατά του σημ...,Κινητά Τηλέφωνα,"[Οθόνη, Μπαταρία]","[Φωτογραφίες, Καταγραφή Video]",[],Καταγραφή Video,"[8, 1315, 3, 8, 3897, 17, 14, 2866, 5, 744, 40...",31,Κινητά Τηλέφωνα Καταγραφή Video,"[4367, 3730, 3379, 1421]"
84,102,κατά τη γνώμη μου το Xiaomi Mi band 3 αξίζει τ...,Activity Trackers,"[Ακρίβεια μετρήσεων, Πλήθος λειτουργιών, Σχέση...",[],[],Σχέση ποιότητας τιμής,"[56, 29, 1074, 54, 4, 14352, 15009, 15422, 919...",83,Γενικά Σχέση ποιότητας τιμής,"[783, 225, 757, 2251]"
150,184,Στην πρώτη μισή ώρα είχα ήδη απογοητευτεί. Δύσ...,PS4 Games,[],[],"[Αντοχή στο χρόνο, Gameplay]",Αντοχή στο χρόνο,"[23, 145, 3585, 194, 515, 223, 52768, 2, 3143,...",47,PS4 Games Αντοχή στο χρόνο,"[15070, 7333, 3763, 20, 199]"


## Pad ids of each entry to match max length

In [10]:
padded_ids = keras.preprocessing.sequence.pad_sequences(
    reviews['ids'].to_numpy(), padding="post", value=0, maxlen=SP_model.input_shape[0][-1]
)

padded_aspect_ids = keras.preprocessing.sequence.pad_sequences(
    reviews['aspect_ids'].to_numpy(), padding="post", maxlen=SP_model.input_shape[1][-1], value=0
)

## Transform target labels to one-hot encoding

In [11]:
def encode_category(df):
    return [1 if df['aspect'] in df['meta_review_cons'] else 0,
            1 if df['aspect'] in df['meta_review_so-so'] else 0,
            1 if df['aspect'] in df['meta_review_pros'] else 0]

In [12]:
target = np.array([elem for elem in reviews.apply(encode_category, axis='columns')])

assert all(np.sum(target, axis=1) == 1)

## Evaluate model

In [13]:
# Predict and convert one-hot to integer {0: negative, 1: neutral, 2: positive}
y_pred = np.argmax(SP_model.predict([padded_ids, padded_aspect_ids]), axis=1)
y_test = np.argmax(target, axis=1)

print(pd.DataFrame(metrics.confusion_matrix(y_test, y_pred),
                   index=['Actual Negative', 'Actual Neutral', 'Actual Positive'],
                   columns=['Predicted Negative', 'Neutral', 'Positive']))
print('\n')
print(metrics.classification_report(y_test, y_pred,
                                    target_names=['Negative', 'Neutral', 'Positive']))

                 Predicted Negative  Neutral  Positive
Actual Negative                 108        3         6
Actual Neutral                   21       29        15
Actual Positive                  11        9       146


              precision    recall  f1-score   support

    Negative       0.77      0.92      0.84       117
     Neutral       0.71      0.45      0.55        65
    Positive       0.87      0.88      0.88       166

    accuracy                           0.81       348
   macro avg       0.78      0.75      0.75       348
weighted avg       0.81      0.81      0.80       348



In [14]:
# Find unique aspects
unique_aspects = reviews['aspect_prefixed'].unique()

for aspect in unique_aspects:
    # Find logical indices corresponding to that aspect
    idx = reviews['aspect_prefixed'] == aspect
    
    print(f'Confusion matrix for aspect "{aspect}"\n')
    print(pd.DataFrame(metrics.confusion_matrix(y_test[idx], y_pred[idx], labels=[0, 1, 2]),
                       index=['Actual Negative', 'Actual Neutral', 'Actual Positive'],
                       columns=['Predicted Negative', 'Neutral', 'Positive']))
    print('\n')
    print(metrics.classification_report(y_test[idx], y_pred[idx], labels=[0, 1, 2],
                                        target_names=['Negative', 'Neutral', 'Positive']))
    
    print('\n======================================================\n')

Confusion matrix for aspect "PS4 Games Γραφικά"

                 Predicted Negative  Neutral  Positive
Actual Negative                   0        0         2
Actual Neutral                    0        0         1
Actual Positive                   0        1        13


              precision    recall  f1-score   support

    Negative       0.00      0.00      0.00         2
     Neutral       0.00      0.00      0.00         1
    Positive       0.81      0.93      0.87        14

    accuracy                           0.76        17
   macro avg       0.27      0.31      0.29        17
weighted avg       0.67      0.76      0.71        17



Confusion matrix for aspect "PS4 Games Gameplay"

                 Predicted Negative  Neutral  Positive
Actual Negative                   9        0         0
Actual Neutral                    1        1         1
Actual Positive                   0        0         4


              precision    recall  f1-score   support

    Negative       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
f1 = []
examples = []

for aspect in unique_aspects:
    # Find logical indices corresponding to that aspect
    idx = (reviews['aspect_prefixed'] == aspect)
    # Count number of examples
    examples.append(idx.astype('int').sum())
    
    f1.append(metrics.f1_score(y_test[idx], y_pred[idx], labels=[0, 1, 2], average='weighted'))
    
f1 = np.array(f1)
examples = np.array(examples)
aspects = reviews['aspect_prefixed'].unique()
sort_idx = np.argsort(f1)[-1::-1]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [16]:
for a, f, e in zip(aspects[sort_idx], f1[sort_idx], examples[sort_idx]):
    print(f'{a} & {f*100:.2f} & {e} \\\\')

PS4 Games Αντοχή στο χρόνο & 100.00 & 16 \\
PS4 Games Single Player & 88.89 & 18 \\
Κινητά Τηλέφωνα Καταγραφή Video & 84.95 & 27 \\
PS4 Games Gameplay & 84.89 & 16 \\
Κινητά Τηλέφωνα Φωτογραφίες & 84.16 & 33 \\
Κινητά Τηλέφωνα Οθόνη & 83.78 & 23 \\
Γενικά Σχέση ποιότητας τιμής & 83.10 & 61 \\
Gaming Headsets Ποιότητα κατασκευής & 82.16 & 12 \\
Activity Trackers Αυτονομία μπαταρίας & 81.82 & 11 \\
Κινητά Τηλέφωνα Ταχύτητα & 77.20 & 37 \\
Κινητά Τηλέφωνα Μπαταρία & 76.97 & 29 \\
Gaming Headsets Ποιότητα ήχου & 76.59 & 16 \\
PS4 Games Γραφικά & 71.37 & 17 \\
Activity Trackers Πλήθος λειτουργιών & 66.67 & 8 \\
Activity Trackers Ακρίβεια μετρήσεων & 58.47 & 15 \\
Gaming Headsets Ποιότητα Μικροφώνου & 53.54 & 9 \\


## Query a sentence and aspect and demonstrate attention

In [17]:
from IPython.display import display, Markdown

sentiment_list = ['Αρνητικό', 'Ουδέτερο', 'Θετικό']

In [19]:
index = np.random.randint(0, len(padded_ids))
X_query = padded_ids[index:(index+1)]
s_query = padded_aspect_ids[index:(index+1)]

condition = np.all(padded_ids == X_query, axis=1)
assert np.any(condition)
X_text = reviews['text'].iloc[np.argmax(condition)]

aspect = reviews['aspect'].iloc[np.argmax(np.all(padded_aspect_ids == s_query, axis=1))]

# calculate scores and sentiment
scores = np.squeeze(SP_attention_model.predict([X_query, s_query])[1])
scores /= np.max(scores)
sentiment = sentiment_list[np.argmax(np.squeeze(SP_model.predict([X_query, s_query])))]

display(Markdown('**Aspect: ' + aspect.strip() + '**'))
display(Markdown('**Συναίσθημα: ' + sentiment + '**'))

myUtils.print_scores(glove_model, X_text, scores)

**Aspect: Μπαταρία**

**Συναίσθημα: Αρνητικό**

<span style="background-color: rgba(0,180,0,0.0050278594717383385)">Μικρο</span> <span style="background-color: rgba(0,180,0,0.00242828787304461)">σχετικά</span> <span style="background-color: rgba(0,180,0,0.00024754105834290385)">σε</span> <span style="background-color: rgba(0,180,0,0.0015338179655373096)">μέγεθος</span> <span style="background-color: rgba(0,180,0,0.06288985162973404)">,</span> <span style="background-color: rgba(0,180,0,0.023512667044997215)">αλλά</span> <span style="background-color: rgba(0,180,0,0.01591234840452671)">μετά</span> <span style="background-color: rgba(0,180,0,0.009519084356725216)">από</span> <span style="background-color: rgba(0,180,0,0.004523775074630976)">χρήση</span> <span style="background-color: rgba(0,180,0,0.011331030167639256)">ενός</span> <span style="background-color: rgba(0,180,0,0.05135344713926315)">χρόνου</span> <span style="background-color: rgba(0,180,0,0.09084995836019516)">η</span> <span style="background-color: rgba(0,180,0,1.0)">μπαταρία</span> <span style="background-color: rgba(0,180,0,0.6004175543785095)">έχει</span> <span style="background-color: rgba(0,180,0,0.4037981927394867)">διάρκεια</span> <span style="background-color: rgba(0,180,0,0.2818520963191986)">περίπου</span> <span style="background-color: rgba(0,180,0,0.3373975455760956)">ωρών</span> <span style="background-color: rgba(0,180,0,0.10905608534812927)">σε</span> <span style="background-color: rgba(0,180,0,0.10440421104431152)">αναμονή</span> <span style="background-color: rgba(0,180,0,0.05878259241580963)">.</span> <span style="background-color: rgba(0,180,0,0.027285199612379074)">Μπορεί</span> <span style="background-color: rgba(0,180,0,0.014259514398872852)">φυσικά</span> <span style="background-color: rgba(0,180,0,0.008573067374527454)">να</span> <span style="background-color: rgba(0,180,0,0.0016095772152766585)">φταίει</span> <span style="background-color: rgba(0,180,0,0.0008868633885867894)">το</span> <span style="background-color: rgba(0,180,0,0.00023179445997811854)">δικό</span> <span style="background-color: rgba(0,180,0,7.545357948401943e-05)">μου</span> <span style="background-color: rgba(0,180,0,3.9391361497109756e-05)">.</span> <span style="background-color: rgba(0,180,0,0.00010012862185249105)">Παντως</span> <span style="background-color: rgba(0,180,0,0.000173582520801574)">ο</span> <span style="background-color: rgba(0,180,0,0.00027332830359227955)">καθημερινός</span> <span style="background-color: rgba(0,180,0,9.896057599689811e-05)">μου</span> <span style="background-color: rgba(0,180,0,0.0002660528989508748)">χρόνος</span> <span style="background-color: rgba(0,180,0,0.0005695822765119374)">ομιλίας</span> <span style="background-color: rgba(0,180,0,0.00020717416191473603)">δεν</span> <span style="background-color: rgba(0,180,0,0.00033817527582868934)">ξεπερνάει</span> <span style="background-color: rgba(0,180,0,0.0002638018049765378)">τις</span> <span style="background-color: rgba(0,180,0,0.0005999627755954862)">ώρες</span> <span style="background-color: rgba(0,180,0,0.00046149390982463956)">κατά</span> <span style="background-color: rgba(0,180,0,0.0007501100772060454)">τη</span> <span style="background-color: rgba(0,180,0,0.0014343529473990202)">διάρκεια</span> <span style="background-color: rgba(0,180,0,0.0031602054368704557)">της</span> <span style="background-color: rgba(0,180,0,0.05366229638457298)">ημέρας</span> <span style="background-color: rgba(0,180,0,0.12744128704071045)">.</span> <span style="background-color: rgba(0,180,0,0.09136446565389633)">Το</span> <span style="background-color: rgba(0,180,0,0.14779651165008545)">σώζει</span> <span style="background-color: rgba(0,180,0,0.20303651690483093)">η</span> <span style="background-color: rgba(0,180,0,0.2948908805847168)">.</span> 