In [3]:
#!/usr/bin/env python
# coding: utf-8
from collections import defaultdict

from tensorflow.keras.models import load_model
from lstm_network import LSTM_network
import tensorflow as tf
import numpy as np
from IPython.display import display, HTML
from heatmap import html_heatmap

# load the keras model and the word-2-vec dictionary. The keras model is trained for the sentiment analysis dataset
# from stanford university and classifies movie reviews into five different categories.
if True:
    # this is the model which train on sigmoid and binary_crossentropy
    keras_model = load_model('model/mymodel.hdf5')
    n_classes = 1
else:
    # this is the model which train on softmax and sparse_categorical_crossentropy
    keras_model = load_model('tmp/trainmodel.model')
    n_classes = 2
f = open("model/myvec.vector", mode="r", encoding="utf-8")
w2v = defaultdict()
line = f.readline()
while line != "":
    line = line.strip().split()
    w2v[line[0]] = [float(i) for i in line[1:]]
    assert len(w2v[line[0]]) == 60
    line = f.readline()
f.close()

# create the lstm-lrp model
n_hidden = 60
embedding_dim = 60

# the max len of sentence
n_words_len = 40
weights = keras_model.get_weights()
# if the len is seven, maybe you need to  execute "weights.append(np.zeros((n_classes,)))"
print(len(weights))

# our keras model has no bias in the final dense layer. Therefore we add a bias of zero to the weights
# weights.append(np.zeros((n_classes,)))

lrp_model = LSTM_network(n_hidden, embedding_dim, n_classes, weights=weights)

def wordsToNum(words):
    """
    :param words:
    :return: 40 * 60
    """
    a = list()
    for i in words:
        a.append(w2v[i])
    #
    # if len(a) > n_words_len:
    #     return a[:n_words_len]
    if len(a) < n_words_len:
        while len(a) != n_words_len:
            a.append([0] * embedding_dim)
    return a

def readOneLine(line):
    a = list(list())
    if type(line) is not list:
        line = line.split()
    a.append(wordsToNum(line))
    return np.array(a)

def get_result(tokens, vecs):
    y_keras = keras_model.predict(vecs)
    if n_classes == 1:
        # output is 0 - 1
        print("the class of this sentence is %d" % (1 if y_keras[0][0] > 0.5 else 0))
    elif n_classes == 2:
        print("the class of this sentence is %d" % (0 if y_keras[0][0] > y_keras[0][1] else 1))

    # explain the classification
    eps = 1e-3
    bias_factor = 0.0
    # by setting y=None, the relevances will be calculated for the predicted class of the sample. We recommend this
    # usage, however, if you are interested in the relevances towards the 1st class, you could use y = np.array([1])
    explanation, Rest = lrp_model.lrp(vecs, eps=eps, bias_factor=bias_factor)
    print(explanation.shape)

    # LRP assigns each dimension in the embedding vector a relevance value. To get relevances for each word we can
    # sum up these values
    word_relevances = tf.reduce_sum(explanation, axis=2)
    for word, relevance in zip(tokens, word_relevances[0]):
        print('{0:>13}:   {1:8.2f}'.format(word, relevance))

    # Whether to draw a picture, default is False
    if True:
        display(HTML(html_heatmap(tokens, word_relevances[0][:len(tokens)])))
    # to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
    print(
        "to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0")
    eps = 0.0
    bias_factor = 1.0
    explanation, Rest = lrp_model.lrp(vecs, eps=eps, bias_factor=bias_factor)
    y_lrpnet, _, _ = lrp_model.full_pass(vecs)
    y_lrpnet = y_lrpnet.numpy()
    check = np.allclose(np.max(y_lrpnet), np.sum(explanation) + np.sum(Rest))
    print('LRP pass is {}.'.format('correct' if check else 'wrong'))

    # if all your input sequences have the same length you can process them batch-wise efficiently
    if False:
        batch_size = 100
        length = 10
        some_random_data = tf.constant(np.random.randn(batch_size, length, embedding_dim))
        # explain 100 instances at once
        relevances, _ = lrp_model.lrp(some_random_data)
        print(relevances.shape)



8


In [5]:
fread = open("train_dt/test1.txt", mode="r", encoding="utf-8")

fcont = fread.readlines()
i = 0
for line in fcont:
    line = line.strip()
    if line != "":
        i += 1
        print(i)
        tokens = line.split()
        if len(tokens) > n_words_len:
            # The input length is longer than n_words_length, so split it to get the result
            n = len(tokens) // n_words_len + 1
            for i in range(n):
                onceTokens = tokens[i*n_words_len:(i+1)*n_words_len]
                vecs = readOneLine(onceTokens)
                get_result(onceTokens, vecs)
        else:
            vecs = readOneLine(line)
            get_result(tokens, vecs)

        if i > 5:
            break

1
the class of this sentence is 1
(1, 40, 60)
            i:      -0.98
       really:      -0.09
      enjoyed:      -1.58
         this:      -0.13
          one:      -1.81
            .:       0.63
          the:      -0.49
      premise:      -0.01
       behind:       0.81
           it:       1.88
          was:      -1.18
         well:       0.69
      thought:       0.31
          out:      -1.00
            .:       2.48
            i:       1.91
         look:      -0.56
      forward:      -1.75
           to:       2.29
          her:      -0.22
    finishing:       0.02
         this:       1.00
       series:      -0.90
            .:       2.00


to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
LRP pass is correct.
2
the class of this sentence is 1
(1, 40, 60)
            i:      -0.51
        found:      -0.56
         this:      -0.72
           to:      -0.49
           be:      -0.27
            a:      -0.53
         very:      -1.02
         good:      -0.75
         book:      -0.74
            .:      -0.54
        there:       0.86
          was:      -1.11
   excitement:       0.57
          and:       0.01
      romance:       1.31
          and:       0.42
         even:       1.35
       family:       1.83
       living:       2.17
            a:       2.32
       fairly:       0.63
    christian:       2.28
         life:       3.14
            .:       3.64


to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
LRP pass is correct.
3
the class of this sentence is 1
(1, 40, 60)
            i:      -0.18
       really:      -0.60
        loved:      -0.37
         this:      -0.30
         book:      -0.47
            .:      -0.52
           it:       0.10
          was:      -1.21
            a:       0.38
         true:       0.39
  page-turner:       0.22
            .:       0.88
            a:       1.76
        great:      -0.27
         read:      -0.04
          for:       1.18
          the:       0.75
     hopeless:       0.14
     romantic:       0.59
      wanting:      -0.60
            a:       1.92
        happy:       0.09
       ending:       0.07
            .:       1.10


to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
LRP pass is correct.
4
the class of this sentence is 1
(1, 40, 60)
           my:       0.88
      husband:       0.45
            ,:       1.29
          who:       0.23
 particularly:      -0.27
       enjoys:      -2.23
          dog:       0.80
      stories:       0.15
          and:       0.89
        could:      -0.42
          n't:       0.59
          put:      -1.78
          the:      -0.11
         book:       0.16
         down:      -0.68
        until:      -1.02
           it:       1.86
          was:      -0.26
     finished:      -0.14
            ,:       2.00
          has:      -0.69
        rated:       1.08
         this:       1.61
        story:       0.40
        three:       1.38
        stars:       2.20
            .:       2.05


to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
LRP pass is correct.
5
the class of this sentence is 1
(1, 40, 60)
         this:      -0.37
       writer:      -0.39
           is:      -0.55
        quite:      -0.54
   insightful:      -0.23
          and:       0.74
            i:      -0.36
         know:       0.25
         alot:      -0.10
           of:      -0.16
      parents:       0.85
          who:       0.84
         read:      -0.98
          and:       0.69
       really:       0.30
          got:       0.97
         what:      -1.05
         this:       1.42
       writer:       0.33
          was:      -0.02
       trying:       0.79
           to:       1.64
          get:       1.63
       across:       0.26
           to:       2.38
          her:       0.80
      readers:      -0.29
            .:       3.27


to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
LRP pass is correct.
6
the class of this sentence is 1
(1, 40, 60)
            i:      -0.92
         will:      -1.22
          not:       0.59
     hesitate:       0.21
           to:       0.09
         pick:       0.32
           up:      -0.78
     anything:       1.31
      written:      -3.59
           by:      -0.97
           ms:       0.13
        flynn:       0.26
            .:       0.29
          boy:       1.72
            ,:       1.98
          can:      -0.05
          she:       2.78
        write:       0.10
            !:      -0.99
          and:       1.95
         what:      -1.09
       twists:       2.46
            &:       2.55
        turns:       3.13
            !:      -0.75


to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
LRP pass is correct.
