In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from Load import json_to_df
import tensorflow_hub as hub
from keras import backend as K
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer

Using TensorFlow backend.


In [2]:
from sklearn.metrics import accuracy_score

In [3]:
import re
def cleanText(text):
    text = text.strip().replace("\n", " ").replace("\r", " ")
    text = replace_contraction(text)
    text = replace_links(text, "link")
    text = remove_numbers(text)
    text = re.sub(r'[,!@#$%^&*)(|/><";:.?\'\\}{]',"",text)
    text = text.lower()
    return text

In [4]:
def replace_contraction(text):
    contraction_patterns = [ (r'won\'t', 'will not'), (r'can\'t', 'can not'), (r'i\'m', 'i am'), (r'ain\'t', 'is not'), (r'(\w+)\'ll', '\g<1> will'), (r'(\w+)n\'t', '\g<1> not'),
                         (r'(\w+)\'ve', '\g<1> have'), (r'(\w+)\'s', '\g<1> is'), (r'(\w+)\'re', '\g<1> are'), (r'(\w+)\'d', '\g<1> would'), (r'&', 'and'), (r'dammit', 'damn it'), (r'dont', 'do not'), (r'wont', 'will not') ]
    patterns = [(re.compile(regex), repl) for (regex, repl) in contraction_patterns]
    for (pattern, repl) in patterns:
        (text, count) = re.subn(pattern, repl, text)
    return text
def replace_links(text, filler=' '):
        text = re.sub(r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*',
                      filler, text).strip()
        return text
def remove_numbers(text):
    text = ''.join([i for i in text if not i.isdigit()])
    return text

In [5]:
class ElmoEmbeddingLayer(layers.Layer):
    def __init__(self, **kwargs):
        self.dimensions = 1024
        self.trainable = True
        super(ElmoEmbeddingLayer, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable, name="{}_module".format(self.name))
        self.trainable_weights += tf.trainable_variables(scope="^{}_module/.*".format(self.name))
        super(ElmoEmbeddingLayer, self).build(input_shape)
        
    def call(self, x, mask=None):
        result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                      as_dict=True,
                      signature='default',
                      )['default']
        return result
    
    def compute_mask(self, inputs, mask=None):
        return K.not_equal(inputs, '--PAD--')
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.dimensions)

In [6]:
def build_model():
    input_text = layers.Input(shape=(1,), dtype="string")
    embedding = ElmoEmbeddingLayer()(input_text)
    dense = layers.Dense(256, activation='relu')(embedding)
    pred = layers.Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[input_text], outputs=pred)
    return model

In [7]:
def test_model(model_path, cheatsheet_path):
    model = None
    model = build_model()
    
    model.load_weights(model_path)
    
    cheatsheet = pd.read_csv(cheatsheet_path).replace({"positive":1,"negative":0})
    cheatsheet["review_body"] = cheatsheet["review_body"].apply(cleanText)
    hard_text = [' '.join(t.split()) for t in cheatsheet["review_body"]]
    hard_text = np.array(hard_text, dtype=object)[:, np.newaxis]
    
    results = model.predict(hard_text)
    
    preds = []
    for i in results:
        if i >= 0.5:
            preds.append(1)
        else:
            preds.append(0)
    cheatsheet["preds"] = preds
    print("Overall accuracy: ", accuracy_score(preds, cheatsheet["sentiment"]))
    for case in set(cheatsheet["casetype"]):
        temp_df = cheatsheet[cheatsheet["casetype"]==case]
        print(case, " accuracy: ", accuracy_score(temp_df["preds"],temp_df["sentiment"]))


# Music

In [8]:
test_model("model_org_.04-0.93.hdf5","Experiment results\\hardcases_music_cheatsheet.csv")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.




















Overall accuracy:  0.69
negation  accuracy:  0.1
temporal  accuracy:  0.65
fairness  accuracy:  0.95
regular  accuracy:  0.95
invariance  accuracy:  0.8


# Beauty

In [9]:
test_model("model_beauty.04-0.91.hdf5","Experiment results\\hardcases_beauty_cheatsheet.csv")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Overall accuracy:  0.73
negation  accuracy:  0.5
temporal  accuracy:  0.5
fairness  accuracy:  0.8
regular  accuracy:  0.95
invariance  accuracy:  0.9


# Sports

In [10]:
test_model("model_sports.05-0.92.hdf5","Experiment results\\hardcases_sports_cheatsheet.csv")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Overall accuracy:  0.72
negation  accuracy:  0.5
temporal  accuracy:  0.5
fairness  accuracy:  0.9
regular  accuracy:  0.8
invariance  accuracy:  0.9
