# part1

In [None]:
!pip install numpy --upgrade
!pip install bert-embedding
!pip install mxnet-cu100
!pip install sentence-transformers

In [None]:
import re
import itertools
import mxnet as mx
import pandas as pd
import numpy as np
from bert_embedding import BertEmbedding
from sentence_transformers import SentenceTransformer
import tensorflow as tf
from keras import backend as K
import pickle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
def mean(z):
    return sum(itertools.chain(z))/len(z)
  

def embeddToBERT(text):
    sentences = re.split('!|\?|\.',text)
    sentences = list(filter(None, sentences)) 

    bert = BertEmbedding()

    result = bert(sentences, 'avg') # avg is refer to handle OOV

    bert_vocabs_of_sentence = []
    for sentence in range(len(result)):
        for word in range(len(result[sentence][1])):
            bert_vocabs_of_sentence.append(result[sentence][1][word])
    feature = [mean(x) for x in zip(*bert_vocabs_of_sentence)]

    return feature

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
dataset_path = '/content/drive/MyDrive/isarcasm/isarcasm_datasets/train.En.csv'
dataset = pd.read_csv(dataset_path)[["tweet", "sarcastic"]]
dataset = dataset[dataset['tweet'].notna()]

In [None]:
bert_word_training_features = []
for i in range(len(dataset)):
    print(i, "/", len(dataset), end='')
    bert_word_training_features.append(embeddToBERT(dataset['tweet'].iloc[i]))
    print('',end='\r')

In [None]:
f = open("/content/drive/MyDrive/isarcasm/isarcasm_datasets/test.pkl", "wb")

pickle.dump(bert_word_training_features, f)

In [None]:
f = open("/content/drive/MyDrive/isarcasm/isarcasm_datasets/bert_word_training_features.pkl", "rb")

bert_word_training_features = pickle.load(f)

In [None]:
bert_word_training_features

In [None]:
bert_word_training_features = np.array(bert_word_training_features)

bert_word_training_features.shape

(3467, 768)

In [None]:
X = np.array(bert_word_training_features)
y = dataset['sarcastic']

In [None]:
X_data = dataset.tweet
Y_data = dataset.sarcastic
vocab_size = 10000
embedding_dim = 16
max_length = 150
trunc_type = 'post'


tokenizer = Tokenizer(num_words = vocab_size)
tokenizer.fit_on_texts(X_data)
sequences = tokenizer.texts_to_sequences(X_data)
padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)

X = padded
Y = Y_data

In [None]:
vocab_size = 30000
embedding_dim = 64
input_length = 768

model_lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(1000, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

model_lstm.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 64)          64000     
                                                                 
 bidirectional_2 (Bidirectio  (None, 64)               24832     
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 6)                 390       
                                                                 
 dense_5 (Dense)             (None, 1)                 7         
                                                                 
Total params: 89,229
Trainable params: 89,229
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_lstm.fit(X,y)

InvalidArgumentError: ignored

#part2

In [None]:
!pip install bert-for-tf2
!pip install sentencepiece

In [None]:
try:
    %tensorflow_version 2.x
except Exception:
    pass
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers
import bert
import random
import math
import pandas as pd
import numpy as np
from keras import backend as K

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
dataset_train_path = '/content/drive/MyDrive/isarcasm/isarcasm_datasets/train.En.csv'
dataset_train = pd.read_csv(dataset_train_path)[["tweet", "sarcastic"]]
dataset_train = dataset_train[dataset_train['tweet'].notna()]


dataset_test_path = '/content/drive/MyDrive/isarcasm/isarcasm_datasets/Test_Dataset.csv'
dataset_test = pd.read_csv(dataset_test_path)[["text", "sarcastic"]]
dataset_test = dataset_test[dataset_test['text'].notna()]

In [None]:
class_weights = {1:4, 0:1}

In [None]:
BertTokenizer = bert.bert_tokenization.FullTokenizer
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",
                            trainable=False)
vocabulary_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = BertTokenizer(vocabulary_file, to_lower_case)

In [None]:
def tokenize_tweet(tweet):
    return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(tweet))

In [None]:
tokenized_tweets_train = [tokenize_tweet(tweet) for tweet in dataset_train['tweet']]

tokenized_tweets_test = [tokenize_tweet(tweet) for tweet in dataset_test['text']]

In [None]:
tweets_with_len_train = [[tweet, dataset_train['sarcastic'].iloc[i], len(tweet)] for i, tweet in enumerate(tokenized_tweets_train)]
random.Random(42).shuffle(tweets_with_len_train)

tweets_with_len_test = [[tweet, dataset_test['sarcastic'].iloc[i], len(tweet)] for i, tweet in enumerate(tokenized_tweets_test)]
random.Random(42).shuffle(tweets_with_len_test)

In [None]:
tweets_with_len_train.sort(key=lambda x: x[2])
sorted_tweets_labels_train = [(tweet_lab[0], tweet_lab[1]) for tweet_lab in tweets_with_len_train] # remove tweet len
processed_dataset_train = tf.data.Dataset.from_generator(lambda: sorted_tweets_labels_train, output_types=(tf.int32, tf.int32))

tweets_with_len_test.sort(key=lambda x: x[2])
sorted_tweets_labels_test = [(tweet_lab[0], tweet_lab[1]) for tweet_lab in tweets_with_len_test] # remove tweet len
processed_dataset_test = tf.data.Dataset.from_generator(lambda: sorted_tweets_labels_test, output_types=(tf.int32, tf.int32))

In [None]:
#for train test split


# BATCH_SIZE = 32
# batched_dataset = processed_dataset.padded_batch(BATCH_SIZE, padded_shapes=((None, ), ()))

# TOTAL_BATCHES = math.ceil(len(sorted_tweets_labels) / BATCH_SIZE)
# TEST_BATCHES = TOTAL_BATCHES // 10
# batched_dataset.shuffle(TOTAL_BATCHES)
# test_data = batched_dataset.take(TEST_BATCHES)
# train_data = batched_dataset.skip(TEST_BATCHES)

In [None]:
BATCH_SIZE = 32

train_data = processed_dataset_train.padded_batch(BATCH_SIZE, padded_shapes=((None, ), ()))

test_data = processed_dataset_test.padded_batch(BATCH_SIZE, padded_shapes=((None, ), ()))

# Bidirectional lstm

In [None]:
BDlstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.vocab), 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(64, activation='relu')),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(64, activation='relu')),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

BDlstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

BDlstm.summary()

Model: "sequential_45"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_57 (Embedding)    (None, None, 64)          1953408   
                                                                 
 bidirectional_35 (Bidirecti  (None, None, 64)         24832     
 onal)                                                           
                                                                 
 time_distributed_49 (TimeDi  (None, None, 64)         4160      
 stributed)                                                      
                                                                 
 bidirectional_36 (Bidirecti  (None, None, 64)         24832     
 onal)                                                           
                                                                 
 time_distributed_50 (TimeDi  (None, None, 64)         4160      
 stributed)                                          

In [None]:
BDlstm.fit(train_data, epochs=10, validation_data = test_data, class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb6a0226410>

In [None]:
result = BDlstm.evaluate(test_data)
result



[0.6929839849472046, 0.8571428656578064, 0.0]

# Simple LSTM

In [None]:
lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.vocab), 32),
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(32, activation='relu')),
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(32, activation='relu')),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

lstm.summary()

Model: "sequential_42"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_54 (Embedding)    (None, None, 32)          976704    
                                                                 
 lstm_88 (LSTM)              (None, None, 32)          8320      
                                                                 
 time_distributed_43 (TimeDi  (None, None, 32)         1056      
 stributed)                                                      
                                                                 
 lstm_89 (LSTM)              (None, None, 32)          8320      
                                                                 
 time_distributed_44 (TimeDi  (None, None, 32)         1056      
 stributed)                                                      
                                                                 
 lstm_90 (LSTM)              (None, 32)              

In [None]:
lstm.fit(train_data, epochs=10, validation_data=test_data, class_weight={1:4, 0:1})

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb6942a9e10>

# CNN

In [None]:
class CNN_MODEL(tf.keras.Model):
    
    def __init__(self,
                 vocabulary_size,
                 embedding_dimensions=128,
                 cnn_filters=50,
                 dnn_units=512,
                 dropout_rate=0.1,
                 training=False,
                 name="cnn_model"):
        super(CNN_MODEL, self).__init__(name=name)
        
        self.embedding = layers.Embedding(vocabulary_size, embedding_dimensions)
        
        self.cnn_layer1 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=2,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer2 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=3,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer3 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=4,
                                        padding="valid",
                                        activation="relu")
        self.pool = layers.GlobalMaxPool1D()
        
        self.dense_1 = layers.Dense(units=dnn_units, activation="relu")
        self.dropout = layers.Dropout(rate=dropout_rate)
        
        self.last_dense = layers.Dense(units=1, activation="sigmoid")
    
    def call(self, inputs, training):
        l = self.embedding(inputs)
        l_1 = self.cnn_layer1(l) 
        l_1 = self.pool(l_1) 
        l_2 = self.cnn_layer2(l) 
        l_2 = self.pool(l_2)
        l_3 = self.cnn_layer3(l)
        l_3 = self.pool(l_3) 
        
        concatenated = tf.concat([l_1, l_2, l_3], axis=-1) # (batch_size, 3 * cnn_filters)
        concatenated = self.dense_1(concatenated)
        concatenated = self.dropout(concatenated, training)
        model_output = self.last_dense(concatenated)
        
        return model_output

In [None]:
cnn = CNN_MODEL(len(tokenizer.vocab))
cnn.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

cnn.fit(train_data, epochs=10, validation_data = test_data, class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb68d28a090>

In [None]:
result = cnn.evaluate(test_data)
result



[1.378892421722412, 0.5921428799629211, 0.17441131174564362]

# LSTM + CNN

In [None]:
class LSTM_CNN_MODEL(tf.keras.Model):
    
    def __init__(self,
                 vocabulary_size,
                 embedding_dimensions=32,
                 cnn_filters=50,
                 dnn_units=512,
                 dropout_rate=0.1,
                 training=False,
                 name="lstm_cnn_model"):
        super(LSTM_CNN_MODEL, self).__init__(name=name)
        
        self.embedding = layers.Embedding(vocabulary_size, embedding_dimensions)
      
        self.lstm1 = layers.LSTM(32, return_sequences=True)
        self.lstm2 = layers.LSTM(32, return_sequences=True)
        self.lstm3 = layers.LSTM(32, return_sequences=True)

        self.time1 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(32, activation='relu'))
        self.time2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(32, activation='relu'))
        
        self.cnn_layer1 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=2,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer2 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=3,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer3 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=4,
                                        padding="valid",
                                        activation="relu")
        self.pool = layers.GlobalMaxPool1D()
        
        self.dense_1 = layers.Dense(units=dnn_units, activation="relu")
        self.dropout = layers.Dropout(rate=dropout_rate)
        
        self.last_dense = layers.Dense(units=1, activation="sigmoid")
    
    def call(self, inputs, training):
        ll = self.lstm1(self.embedding(inputs))
        ll = self.time1(ll)
        ll = self.lstm2(ll)
        ll = self.time2(ll)
        ll = self.lstm3(ll)


        l = ll

        l_1 = self.cnn_layer1(l) 
        l_1 = self.pool(l_1) 
        l_2 = self.cnn_layer2(l) 
        l_2 = self.pool(l_2)
        l_3 = self.cnn_layer3(l)
        l_3 = self.pool(l_3) 
        
        concatenated = tf.concat([l_1, l_2, l_3], axis=-1) # (batch_size, 3 * cnn_filters)
        concatenated = self.dense_1(concatenated)
        concatenated = self.dropout(concatenated, training)
        model_output = self.last_dense(concatenated)
        
        return model_output

In [None]:
lstm_cnn = LSTM_CNN_MODEL(len(tokenizer.vocab))
lstm_cnn.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

lstm_cnn.fit(train_data, epochs=10, validation_data = test_data, class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb6a74cead0>

In [None]:
result = lstm_cnn.evaluate(test_data)



# LSTM + ATTENTION

In [None]:
import os

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Lambda, Dot, Activation, Concatenate, Layer

# KERAS_ATTENTION_DEBUG: If set to 1. Will switch to debug mode.
# In debug mode, the class Attention is no longer a Keras layer.
# What it means in practice is that we can have access to the internal values
# of each tensor. If we don't use debug, Keras treats the object
# as a layer and we can only get the final output.
debug_flag = int(os.environ.get('KERAS_ATTENTION_DEBUG', 0))


class Attention(object if debug_flag else Layer):

    def __init__(self, units=128, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.units = units

    # noinspection PyAttributeOutsideInit
    def build(self, input_shape):
        input_dim = int(input_shape[-1])
        with K.name_scope(self.name if not debug_flag else 'attention'):
            self.attention_score_vec = Dense(input_dim, use_bias=False, name='attention_score_vec')
            self.h_t = Lambda(lambda x: x[:, -1, :], output_shape=(input_dim,), name='last_hidden_state')
            self.attention_score = Dot(axes=[1, 2], name='attention_score')
            self.attention_weight = Activation('softmax', name='attention_weight')
            self.context_vector = Dot(axes=[1, 1], name='context_vector')
            self.attention_output = Concatenate(name='attention_output')
            self.attention_vector = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')
        if not debug_flag:
            # debug: the call to build() is done in call().
            super(Attention, self).build(input_shape)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.units

    def __call__(self, inputs, training=None, **kwargs):
        if debug_flag:
            return self.call(inputs, training, **kwargs)
        else:
            return super(Attention, self).__call__(inputs, training, **kwargs)

    # noinspection PyUnusedLocal
    def call(self, inputs, training=None, **kwargs):
        """
        Many-to-one attention mechanism for Keras.
        @param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
        @param training: not used in this layer.
        @return: 2D tensor with shape (batch_size, units)
        @author: felixhao28, philipperemy.
        """
        if debug_flag:
            self.build(inputs.shape)
        # Inside dense layer
        #              hidden_states            dot               W            =>           score_first_part
        # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
        # W is the trainable weight matrix of attention Luong's multiplicative style score
        score_first_part = self.attention_score_vec(inputs)
        #            score_first_part           dot        last_hidden_state     => attention_weights
        # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
        h_t = self.h_t(inputs)
        score = self.attention_score([h_t, score_first_part])
        attention_weights = self.attention_weight(score)
        # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
        context_vector = self.context_vector([inputs, attention_weights])
        pre_activation = self.attention_output([context_vector, h_t])
        attention_vector = self.attention_vector(pre_activation)
        return attention_vector

    def get_config(self):
        """
        Returns the config of a the layer. This is used for saving and loading from a model
        :return: python dictionary with specs to rebuild layer
        """
        config = super(Attention, self).get_config()
        config.update({'units': self.units})
        return config

In [None]:
att_lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.vocab), 32),
    tf.keras.layers.LSTM(32, return_sequences=True),
    Attention(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
att_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

att_lstm.summary()

Model: "sequential_46"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_60 (Embedding)    (None, None, 32)          976704    
                                                                 
 lstm_103 (LSTM)             (None, None, 32)          8320      
                                                                 
 attention_14 (Attention)    (None, 128)               9216      
                                                                 
 dropout_14 (Dropout)        (None, 128)               0         
                                                                 
 dense_154 (Dense)           (None, 1)                 129       
                                                                 
Total params: 994,369
Trainable params: 994,369
Non-trainable params: 0
_________________________________________________________________


In [None]:
att_lstm.fit(train_data, epochs=10, validation_data = test_data, class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb68a006910>

In [None]:
result = att_lstm.evaluate(test_data)



# Bidirectional LSTM + Attention

In [None]:
att_BD_lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.vocab), 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
    Attention(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
att_BD_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])

att_BD_lstm.summary()

Model: "sequential_47"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_61 (Embedding)    (None, None, 64)          1953408   
                                                                 
 bidirectional_38 (Bidirecti  (None, None, 64)         24832     
 onal)                                                           
                                                                 
 attention_15 (Attention)    (None, 128)               20480     
                                                                 
 dropout_15 (Dropout)        (None, 128)               0         
                                                                 
 dense_155 (Dense)           (None, 1)                 129       
                                                                 
Total params: 1,998,849
Trainable params: 1,998,849
Non-trainable params: 0
___________________________________________

In [None]:
att_BD_lstm.fit(train_data, epochs=10, validation_data = test_data, class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb68c167110>

In [None]:
result = att_BD_lstm.evaluate(test_data)

