In [1]:
import numpy as np
import pickle
import import_ipynb
import os
import pandas as pd
from data_helper import data_loader
import tensorflow as tf
import time
tf.enable_eager_execution()

importing Jupyter notebook from data_helper.ipynb


In [2]:
class attention():
    
    def __init__(self,
                 file_name,
                 ratio = 0.9,
                 shuffle = False,
                 encoding = True,
                 unites = 128,
                 BATCH_SIZE = 64,
                 optimizer = tf.train.AdamOptimizer(),
                 loss_object = tf.keras.losses.BinaryCrossentropy()):
        self.__load_processed_data(file_name,split_ratio = ratio,shuffle=shuffle,encoding = encoding)
        self.__units = unites
        self.__BATCH_SIZE = BATCH_SIZE
        self.__encoder = Encoder(self.__vocab_size, self.__units, self.__BATCH_SIZE)
        #sample_hidden = encoder.initialize_hidden_state()
        #sample_output, sample_hidden = encoder(train_message[:64], sample_hidden)
        self.__optimizer = optimizer
        self.__loss_object = loss_object
        self.__decoder = Decoder(1, self.__units, self.__BATCH_SIZE)
        
        self.__checkpoint_dir = os.getcwd()+r'/checkpoint/training_checkpoints'+"//"+file_name
        self.__checkpoint_prefix = os.path.join(self.__checkpoint_dir, "ckpt")
        self.__checkpoint = tf.train.Checkpoint(optimizer=self.__optimizer,
                                                encoder=self.__encoder,
                                                decoder=self.__decoder)
    
    
    
    def __load_processed_data(self, file_name,split_ratio = 0.9, shuffle = False, encoding = True):
        current_path = os.getcwd()
        pickle_data_folder = current_path+ r"/data/processed"
        file_path = pickle_data_folder + "//" + file_name
        with open(file_path,'rb') as f:
            self.__data = pickle.load(f)
        self.__dic = self.__data.word_dic
        self.__vocab_size = len(self.__dic)
        self.__train_message, self.__train_tag, self.__test_message, self.__test_tag = self.__data.data_split()
        self.__train_message = tf.convert_to_tensor(self.__train_message,dtype=np.float32)
        self.__train_tag = self.__train_tag.reshape(-1,1)
        self.__train_tag = tf.convert_to_tensor(self.__train_tag,dtype=np.float32)

        self.__test_message = tf.convert_to_tensor(self.__test_message,dtype=np.float32)
        #self.__test_tag=self.__test_tag.reshape(-1,1)
        #self.__test_tag = tf.convert_to_tensor(self.__test_tag,dtype=np.float32)
        
    def __loss_function(self, real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        loss_ = self.__loss_object(real, pred)

        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask

        return tf.reduce_mean(loss_)
    
    def __batch_generator(self,message, target,batch_size):
        times = len(message)//batch_size
        remain = batch_size - len(message)%batch_size
    
        for n in range(times):
            yield message[n*batch_size: (n+1)*batch_size], target[n*batch_size: (n+1)*batch_size]
    
    def __train_steps(self, inp, targ, enc_hidden):
        loss = 0

        with tf.GradientTape() as tape:
            enc_output, enc_hidden = self.__encoder(inp, enc_hidden)

            dec_hidden = enc_hidden

            dec_input = tf.expand_dims([1.] * self.__BATCH_SIZE, 1)
            for t in range(targ.shape[1]):
                predictions, dec_hidden, _ = self.__decoder(dec_input, dec_hidden, enc_output)
                loss += self.__loss_function(targ, predictions)
                dec_input = tf.expand_dims(targ[:, t], 1)

        batch_loss = (loss / int(targ.shape[1]))

        variables = self.__encoder.trainable_variables + self.__decoder.trainable_variables
        gradients = tape.gradient(loss, variables)

        self.__optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss
    
    def train(self, EPOCHS = 10):
        EPOCHS = EPOCHS
        for epoch in range(EPOCHS):
            start = time.time()
            enc_hidden = self.__encoder.initialize_hidden_state()
            total_loss = 0
    
            for (batch, (inp, targ)) in enumerate(self.__batch_generator(self.__train_message, self.__train_tag, self.__BATCH_SIZE)):
                batch_loss = self.__train_steps(inp, targ, enc_hidden)
                total_loss += batch_loss
                #'''
                if batch % 10 == 0:
                    print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                                 batch,
                                                                 batch_loss.numpy()))
                #'''
            if (epoch + 1) % 5 == 0:
                self.__checkpoint.save(file_prefix = self.__checkpoint_prefix)
            print('Epoch {} Loss {:.4f}'.format(epoch + 1,total_loss / self.__BATCH_SIZE))
            print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
            
        
    def __evaluate(self,sentence):
        inputs = tf.convert_to_tensor([sentence],dtype=np.float32)
        result = None
        hidden = [tf.zeros((1, self.__units))]
        enc_out, enc_hidden = self.__encoder(inputs, hidden)
        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([1.], 0)
        predictions, dec_hidden, attention_weights = self.__decoder(dec_input,
                                                                    dec_hidden,
                                                                    enc_out)
        result = 1 if float(predictions)>0.5 else 0
        return result
    
    def predict(self):
        self.__checkpoint.restore(tf.train.latest_checkpoint(self.__checkpoint_dir))
        results = []
        for n in range(len(self.__test_message)):
            result= self.__evaluate(self.__test_message[n])
            results.append(result)
            '''
            if n%20 == 0:
                print("processed {} sentences".format(n))
            '''
                
        count_wrong = 0
        for p,r in zip(results,self.__test_tag):
            if p != r:
                count_wrong += 1
                
        return results,1-count_wrong/len(results)
                
        

In [3]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, enc_units, batch_sz):
        super(Encoder,self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.gru = tf.keras.layers.GRU(self.enc_units, 
                                       return_sequences = True,
                                       return_state = True,
                                       recurrent_initializer='glorot_uniform')
        
    def call(self,x,hidden):
        output, state = self.gru(x, initial_state = hidden)
        return output, state
    
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

In [4]:
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(
            self.W1(values) + self.W2(hidden_with_time_axis)))

        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [5]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size,dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size,activation='sigmoid')

        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x, hidden, enc_output):
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = tf.expand_dims(x,1)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        output, state = self.gru(x)

        output = tf.reshape(output, (-1, output.shape[2]))

        x = self.fc(output)

        return x, state, attention_weights


In [6]:
data_path = os.getcwd()+r"/data/processed"
files_name = []
for root,subFolders,files in os.walk(data_path):
    files_name += files
#proccessed = ["100d_processed_data.pkl","50d_lower_processed_data.pkl","25d_lower_processed_data.pkl","25d_processed_data.pkl","200d_processed_data.pkl","200d_lower_processed_data.pkl"]
for file in files_name:
    #if file in proccessed:
    #    continue
    print("========={}=========".format(file))
    cls = attention(file)
    print("Training:")
    cls.train(EPOCHS=10)
    print("Evaluation:")
    result, acc =cls.predict()
    print("accuracy:{}".format(acc))
    del(cls)
    print()

Training:
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1 Batch 0 Loss 0.1302
Epoch 1 Batch 10 Loss 0.0377
Epoch 1 Batch 20 Loss 0.0186
Epoch 1 Batch 30 Loss 0.0332
Epoch 1 Batch 40 Loss 0.0152
Epoch 1 Batch 50 Loss 0.0012
Epoch 1 Batch 60 Loss 0.0342
Epoch 1 Batch 70 Loss 0.0182
Epoch 1 Loss 0.0424
Time taken for 1 epoch 88.74951124191284 sec

Epoch 2 Batch 0 Loss 0.0173
Epoch 2 Batch 10 Loss 0.0151
Epoch 2 Batch 20 Loss 0.0051
Epoch 2 Batch 30 Loss 0.0108
Epoch 2 Batch 40 Loss 0.0036
Epoch 2 Batch 50 Loss 0.0005
Epoch 2 Batch 60 Loss 0.0211
Epoch 2 Batch 70 Loss 0.0168
Epoch 2 Loss 0.0106
Time taken for 1 epoch 88.43227648735046 sec

Epoch 3 Batch 0 Loss 0.0095
Epoch 3 Batch 10 Loss 0.0101
Epoch 3 Batch 20 Loss 0.0046
Epoch 3 Batch 30 Loss 0.0039
Epoch 3 Batch 40 Loss 0.0012
Epoch 3 Batch 50 Loss 0.0002
Epoch 3 Batch 60 Loss 0.0059
Epoch 3 Batch 70 Loss 0.0172
Epoch 3 Loss 0.0063
Time taken for 1 epoch 86

Epoch 6 Batch 0 Loss 0.0150
Epoch 6 Batch 10 Loss 0.0081
Epoch 6 Batch 20 Loss 0.0031
Epoch 6 Batch 30 Loss 0.0094
Epoch 6 Batch 40 Loss 0.0013
Epoch 6 Batch 50 Loss 0.0009
Epoch 6 Batch 60 Loss 0.0127
Epoch 6 Batch 70 Loss 0.0060
Epoch 6 Loss 0.0098
Time taken for 1 epoch 64.62760305404663 sec

Epoch 7 Batch 0 Loss 0.0118
Epoch 7 Batch 10 Loss 0.0046
Epoch 7 Batch 20 Loss 0.0031
Epoch 7 Batch 30 Loss 0.0026
Epoch 7 Batch 40 Loss 0.0011
Epoch 7 Batch 50 Loss 0.0020
Epoch 7 Batch 60 Loss 0.0106
Epoch 7 Batch 70 Loss 0.0032
Epoch 7 Loss 0.0088
Time taken for 1 epoch 64.62476539611816 sec

Epoch 8 Batch 0 Loss 0.0122
Epoch 8 Batch 10 Loss 0.0037
Epoch 8 Batch 20 Loss 0.0011
Epoch 8 Batch 30 Loss 0.0017
Epoch 8 Batch 40 Loss 0.0019
Epoch 8 Batch 50 Loss 0.0009
Epoch 8 Batch 60 Loss 0.0075
Epoch 8 Batch 70 Loss 0.0062
Epoch 8 Loss 0.0070
Time taken for 1 epoch 65.78391361236572 sec

Epoch 9 Batch 0 Loss 0.0095
Epoch 9 Batch 10 Loss 0.0018
Epoch 9 Batch 20 Loss 0.0027
Epoch 9 Batch 30 Loss 0

Epoch 2 Batch 60 Loss 0.0345
Epoch 2 Batch 70 Loss 0.0146
Epoch 2 Loss 0.0170
Time taken for 1 epoch 75.92698049545288 sec

Epoch 3 Batch 0 Loss 0.0230
Epoch 3 Batch 10 Loss 0.0160
Epoch 3 Batch 20 Loss 0.0016
Epoch 3 Batch 30 Loss 0.0017
Epoch 3 Batch 40 Loss 0.0014
Epoch 3 Batch 50 Loss 0.0012
Epoch 3 Batch 60 Loss 0.0072
Epoch 3 Batch 70 Loss 0.0067
Epoch 3 Loss 0.0086
Time taken for 1 epoch 74.88453555107117 sec

Epoch 4 Batch 0 Loss 0.0051
Epoch 4 Batch 10 Loss 0.0169
Epoch 4 Batch 20 Loss 0.0034
Epoch 4 Batch 30 Loss 0.0033
Epoch 4 Batch 40 Loss 0.0011
Epoch 4 Batch 50 Loss 0.0004
Epoch 4 Batch 60 Loss 0.0085
Epoch 4 Batch 70 Loss 0.0048
Epoch 4 Loss 0.0061
Time taken for 1 epoch 75.92317509651184 sec

Epoch 5 Batch 0 Loss 0.0010
Epoch 5 Batch 10 Loss 0.0131
Epoch 5 Batch 20 Loss 0.0006
Epoch 5 Batch 30 Loss 0.0007
Epoch 5 Batch 40 Loss 0.0005
Epoch 5 Batch 50 Loss 0.0029
Epoch 5 Batch 60 Loss 0.0060
Epoch 5 Batch 70 Loss 0.0018
Epoch 5 Loss 0.0043
Time taken for 1 epoch 74.65785

Epoch 9 Batch 60 Loss 0.0010
Epoch 9 Batch 70 Loss 0.0011
Epoch 9 Loss 0.0016
Time taken for 1 epoch 66.70258903503418 sec

Epoch 10 Batch 0 Loss 0.0001
Epoch 10 Batch 10 Loss 0.0001
Epoch 10 Batch 20 Loss 0.0000
Epoch 10 Batch 30 Loss 0.0002
Epoch 10 Batch 40 Loss 0.0003
Epoch 10 Batch 50 Loss 0.0003
Epoch 10 Batch 60 Loss 0.0006
Epoch 10 Batch 70 Loss 0.0012
Epoch 10 Loss 0.0013
Time taken for 1 epoch 69.93519806861877 sec

Evaluation:
accuracy:0.982078853046595

Training:
Epoch 1 Batch 0 Loss 0.1302
Epoch 1 Batch 10 Loss 0.0309
Epoch 1 Batch 20 Loss 0.0079
Epoch 1 Batch 30 Loss 0.0101
Epoch 1 Batch 40 Loss 0.0042
Epoch 1 Batch 50 Loss 0.0011
Epoch 1 Batch 60 Loss 0.0341
Epoch 1 Batch 70 Loss 0.0244
Epoch 1 Loss 0.0254
Time taken for 1 epoch 74.5204119682312 sec

Epoch 2 Batch 0 Loss 0.0308
Epoch 2 Batch 10 Loss 0.0065
Epoch 2 Batch 20 Loss 0.0070
Epoch 2 Batch 30 Loss 0.0081
Epoch 2 Batch 40 Loss 0.0010
Epoch 2 Batch 50 Loss 0.0003
Epoch 2 Batch 60 Loss 0.0159
Epoch 2 Batch 70 Loss 