In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import sys
sys.path.append("../ToolBox/")
import DataTools_ver_02 as DataTools
print(tf.__version__)

import fasttext

def plotModel(model, model_name="model"):
    model_name = model_name + ".png"
    return tf.keras.utils.plot_model(model, model_name, show_shapes=True)

2.0.0-alpha0


In [2]:
class BiLSTM(tf.keras.Model):
    def __init__(self, units, vocab_size, embedding_dim):
        super(BiLSTM, self).__init__()
        self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.lstm_layer = tf.keras.layers.LSTM(units, return_sequences=True)
        
    def call(self, inputs):
        emb = self.embedding_layer(inputs)
        h = tf.keras.layers.Bidirectional(self.lstm_layer, merge_mode='concat')(emb)
        
        return h
    
class Attention(tf.keras.Model):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.units = units
        self.w = tf.keras.layers.Dense(units)
        self.v = tf.keras.layers.Dense(1)
        
    def call(self, bilstm_ops):
        logits = self.w(bilstm_ops)
        score = self.v(tf.nn.tanh(logits))
        attention_weights =  tf.nn.softmax(score, axis=1)
        context_vec = bilstm_ops * attention_weights
        context_vec = tf.reduce_sum(context_vec, axis=1)
        
        return context_vec, attention_weights
    
class AttentionClassification(tf.keras.Model):
    def __init__(self, num_class, units_, units, vocab_size, embedding_dim):
        super(AttentionClassification, self).__init__()
        self.units_ = units_
        self.units = units
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.w = tf.keras.layers.Dense(num_class)
        
    def call(self, input_x):
        bilstm = BiLSTM(units, self.vocab_size, self.embedding_dim)
        attention = Attention(self.units_)
        h = bilstm(test_input_x)
        context_vec, attention_weights = attention(h)
        logits = self.w(context_vec)
        
        return logits, attention_weights

In [4]:
units_ = 10
units = 64
vocab_size = 50
embedding_dim = 20
num_class = 2

test_input_x = np.array(range(10)).reshape(1, 10)
classifier = AttentionClassification(num_class, units_, units, vocab_size, embedding_dim)
logits, attention_weights = classifier(test_input_x)
print(logits.numpy().flatten())
print(attention_weights.numpy().flatten())

[0.00069278 0.00225408]
[0.10003316 0.09983216 0.09964409 0.09992294 0.09965151 0.1003185
 0.09943189 0.10005135 0.10024296 0.10087139]


In [31]:
inputs = tf.keras.Input((None,))
outputs = classifier(inputs)[0]
model = tf.keras.Model(inputs, outputs)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_object = tf.keras.losses.CategoricalCrossentropy()
model.compile(optimizer=optimizer, loss=loss_object)
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
attention_classification_7 ( ((1, 2), (1, 10, 1))      258       
Total params: 258
Trainable params: 258
Non-trainable params: 0
_________________________________________________________________
