## load library

In [1]:
import tensorflow as tf
import numpy as np
import random
import numpy.ma as ma
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
tf.reset_default_graph()

## read data

In [2]:
train_data = open("dataset/atis-2.train.w-intent.iob", "r").readlines()
test_data = open("dataset/atis-2.dev.w-intent.iob", "r").readlines()

In [3]:
train_data[0]

'BOS i want to fly from baltimore to dallas round trip EOS\tO O O O O O B-fromloc.city_name O B-toloc.city_name B-round_trip I-round_trip atis_flight\n'

## preprocessing

### helper function

In [4]:
def data_pipeline(datas, max_len = 50):
    
    # split to word_sequence, slot_sequence, intent
    datas = [data.strip() for data in datas]
    datas = [[data.split('\t')[0].split()[1:-1], data.split('\t')[1].split()[:-1], data.split()[-1]] for data in datas]
    seq_words, seq_slots, intents = list(zip(*datas))
    
    # add special tokens
    seq_new_words, seq_new_slots = [],[]
    for num in range(len(seq_words)):
        temp = seq_words[num]
        
        if len(temp)<max_len:
            temp.append('<EOS>')
            while len(temp)<max_len:
                temp.append('<PAD>')
        else:
            temp = temp[:max_len]
            temp[-1] = '<EOS>'
        seq_new_words.append(temp)
        
        temp = seq_slots[num]
        if len(temp)<max_len:
            while len(temp)<max_len:
                temp.append('<PAD>')
        else:
            temp = temp[:max_len]
            
        seq_new_slots.append(temp)
        
    datas = list(zip(seq_new_words, seq_new_slots, intents))
    return datas

In [5]:
def get_mapping_dictionary(datas):
    
    seq_words, seq_slots, intents = list(zip(*datas))
    
    # flatten function
    flatten = lambda l:[item for seq in l for item in seq]
    
    # build word2id and id2word
    word2id = {"<PAD>":0, '<UNK>':1, '<SOS>':2, '<EOS>':3}
    for word in set(flatten(seq_words)):
        if word not in word2id:
            word2id[word] = len(word2id)
    id2word = {v:k for k,v in word2id.items()}
    
    # build slot2id and id2slot
    slot2id = {'<PAD>':0, '<UNK>':1, 'O':2}
    for slot in set(flatten(seq_slots)):
        if slot not in slot2id:
            slot2id[slot] = len(slot2id)
    id2slot = {v:k for k,v in slot2id.items()}
    
    # build intent2id and id2intent
    intent2id = {'<UNK>':0}
    for intent in set(intents):
        if intent not in intent2id:
            intent2id[intent] = len(intent2id)
    id2intent = {v:k for k,v in intent2id.items()}
    
    return word2id, id2word, slot2id, id2slot, intent2id, id2intent

In [6]:
def to_index(datas, word2id, slot2id, intent2id):
    
    data_with_id = []
    for seq_words, seq_slots, intent in datas:
        seq_words_id = list(map(lambda i:word2id.get(i, word2id['<UNK>']), seq_words))
        seq_len = seq_words.index('<EOS>')
        seq_slots_id = list(map(lambda i:slot2id.get(i, slot2id['<UNK>']), seq_slots))
        intents_id = intent2id.get(intent, intent2id['<UNK>'])
        data_with_id.append([seq_words_id, seq_len, seq_slots_id, intents_id])
    
    return data_with_id

In [7]:
train_data_ed = data_pipeline(train_data)
test_data_ed = data_pipeline(test_data)

word2id, id2word, slot2id, id2slot, intent2id, id2intent = get_mapping_dictionary(train_data_ed)
train_data_with_id = to_index(train_data_ed, word2id, slot2id, intent2id)
test_data_with_id = to_index(test_data_ed, word2id, slot2id, intent2id)

#### check result

In [8]:
print(train_data_ed[0])
print()
print(train_data_with_id[0])

(['i', 'want', 'to', 'fly', 'from', 'baltimore', 'to', 'dallas', 'round', 'trip', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'], ['O', 'O', 'O', 'O', 'O', 'O', 'B-fromloc.city_name', 'O', 'B-toloc.city_name', 'B-round_trip', 'I-round_trip', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'], 'atis_flight')

[[89, 659, 859, 190, 206, 164, 859, 616, 840, 812, 3, 0, 0, 0, 0, 0, 0, 0, 0,

## parameter/input/output/variable

### hyperparameters

In [9]:
epoch_num = 5

batch_size = 16

input_steps = 50

vocab_size = 871

embed_size = 64

hidden_size = 100

slot_size = 122

intent_size = 22

### Input/Output

In [10]:
encoder_input = tf.placeholder(tf.int32, [input_steps, batch_size], name = 'encoder_input')
encoder_len = tf.placeholder(tf.int32, [batch_size], name = 'encoder_len')
slot_target = tf.placeholder(tf.int32, [batch_size, input_steps], name = 'slot_target')
intent_target = tf.placeholder(tf.int32, [batch_size], name = 'intent_target')

### Variable

In [11]:
embeddings = tf.Variable(tf.random_uniform([vocab_size, embed_size], -1.0, 1.0), dtype = tf.float32, name = 'embeddings')
encoder_embed_input = tf.nn.embedding_lookup(embeddings, encoder_input)

In [12]:
slot_w = tf.Variable(tf.random_uniform([hidden_size*2, slot_size], -1.0, 1.0), dtype = tf.float32, name = 'slot_w')
slot_b = tf.zeros([slot_size], dtype = tf.float32, name = 'slot_b')
intent_w = tf.Variable(tf.random_uniform([hidden_size*2, intent_size], -1.0, 1.0), dtype = tf.float32, name = 'intent_w')
intent_b = tf.zeros([intent_size], dtype = tf.float32, name = 'intent_b')

### special token

In [13]:
sos_time_slice = tf.ones([batch_size], dtype = tf.int32)*2
sos_embed = tf.nn.embedding_lookup(embeddings, sos_time_slice, name = 'SOS')
pad_embed = tf.zeros([batch_size, hidden_size*2+embed_size], dtype = tf.float32, name = 'PAD')

#### check result

In [14]:
print('encoder_embed_input.shape:{}'.format(encoder_embed_input.shape))
print('sos_embed.shape:{}'.format(sos_embed.shape))

encoder_embed_input.shape:(50, 16, 64)
sos_embed.shape:(16, 64)


## model

### encoder

In [15]:
encoder_cell = tf.contrib.rnn.LSTMCell(hidden_size)
((encoder_fw_output, encoder_bw_output), 
 (encoder_fw_final_state, encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(cell_fw = encoder_cell,
                                                                                     cell_bw = encoder_cell,
                                                                                     inputs = encoder_embed_input,
                                                                                     sequence_length = encoder_len,
                                                                                     dtype = tf.float32,
                                                                                     time_major = True)

In [16]:
encoder_output = tf.concat((encoder_fw_output, encoder_bw_output), 2)
encoder_final_state_c = tf.concat((encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
encoder_final_state_h = tf.concat((encoder_fw_final_state.h, encoder_bw_final_state.h), 1)
encoder_final_state = tf.contrib.rnn.LSTMStateTuple(c = encoder_final_state_c,
                                                    h = encoder_final_state_h)

#### check result

In [17]:
print('encoder_output.shape:{}'.format(encoder_output.shape))
print('encoder_final_state_c.shape:{}'.format(encoder_final_state_c.shape))
print('encoder_final_state_h.shape:{}'.format(encoder_final_state_h.shape))
print('encoder_final_state:{}'.format(encoder_final_state))

encoder_output.shape:(50, 16, 200)
encoder_final_state_c.shape:(16, 200)
encoder_final_state_h.shape:(16, 200)
encoder_final_state:LSTMStateTuple(c=<tf.Tensor 'concat_1:0' shape=(16, 200) dtype=float32>, h=<tf.Tensor 'concat_2:0' shape=(16, 200) dtype=float32>)


### decoder

In [18]:
decoder_len = encoder_len

#### build custom helper function

In [19]:
def initial_fn():
    
    finished = (0>=decoder_len)
    initial_input = tf.concat((sos_embed, encoder_output[0]), 1)
    
    return finished, initial_input

In [20]:
def get_sample_id(time, outputs, state):
    
    pred_id = tf.to_int32(tf.argmax(outputs, 1))
    
    return pred_id

In [21]:
def loop_translate_fn(time, outputs, state, sample_ids):
    
    def get_next_input():
        
        pred_embed = tf.nn.embedding_lookup(embeddings, sample_ids)
        
        return tf.concat((pred_embed, encoder_output[time]), 1)
    
    finished = (time>=decoder_len)
    all_finished = tf.reduce_all(finished)
    next_input = tf.cond(all_finished, lambda:pad_embed, get_next_input)
    next_state = state
    
    return finished, next_input, next_state

In [22]:
my_helper = tf.contrib.seq2seq.CustomHelper(initial_fn, get_sample_id, loop_translate_fn)

#### build decode flow

In [23]:
def decode(helper, scope, reuse = None):
    with tf.variable_scope(scope, reuse = reuse):
        
        # build attention mechanism
        memory = tf.transpose(encoder_output, [1, 0 ,2])
        attn_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units = hidden_size,
                                                              memory = memory,
                                                              memory_sequence_length = encoder_len)
        
        # build attention cell
        cell = tf.contrib.rnn.LSTMCell(hidden_size*2)
        attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell = cell,
                                                        attention_mechanism = attn_mechanism,
                                                        attention_layer_size = hidden_size)
        
        # build output projection
        out_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = attn_cell,
                                                          output_size = slot_size,
                                                          reuse = reuse)
        
        # build decoder
        decoder = tf.contrib.seq2seq.BasicDecoder(cell = out_cell,
                                                  initial_state = out_cell.zero_state(batch_size = batch_size, dtype = tf.float32),
                                                  helper = helper)
        
        # run decoder
        (final_output, final_state, seq_len) = tf.contrib.seq2seq.dynamic_decode(decoder = decoder,
                                                                                 output_time_major = True,
                                                                                 impute_finished = True,
                                                                                 maximum_iterations = input_steps)
        
        return final_output

In [24]:
final_decoder_output = decode(my_helper, 'decode')

#### check result

In [25]:
print('pred slot id shape:{}'.format(final_decoder_output.sample_id.shape))
print('pred slot logit shape:{}'.format(final_decoder_output.rnn_output.shape))

pred slot id shape:(?, 16)
pred slot logit shape:(?, 16, 122)


### process slot prediction

In [26]:
slot_prediction_id = final_decoder_output.sample_id
slot_prediction_logits = final_decoder_output.rnn_output

In [27]:
(decoder_max_steps, decoder_batch_size, decoder_dim) = tf.unstack(tf.shape(slot_prediction_logits))
slot_target_with_time_major = tf.transpose(slot_target, [1, 0])
slot_target_with_time_major_and_true_len = slot_target_with_time_major[:decoder_max_steps]

In [28]:
mask = tf.to_float(tf.not_equal(slot_target_with_time_major_and_true_len, 0))

In [29]:
slot_loss = tf.contrib.seq2seq.sequence_loss(logits = slot_prediction_logits,
                                             targets = slot_target_with_time_major_and_true_len,
                                             weights = mask,
                                             name = 'slot_loss')

#### check result

In [30]:
print('slot_target_with_time_major_and_true_len.shape:{}'.format(slot_target_with_time_major_and_true_len.shape))
print('mask:{}'.format(mask))

slot_target_with_time_major_and_true_len.shape:(?, 16)
mask:Tensor("ToFloat:0", shape=(?, 16), dtype=float32)


### process intent prediction

In [31]:
intent_prediction_logits = tf.matmul(encoder_final_state_h, intent_w)+intent_b
intent_prediction_id = tf.argmax(intent_prediction_logits, 1)

In [32]:
intent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = intent_prediction_logits,
                                                         labels = tf.one_hot(indices = intent_target,
                                                                             depth = intent_size,
                                                                             dtype = tf.float32)))

### loss

In [33]:
loss = slot_loss+intent_loss

### optimizer

In [34]:
optimizer = tf.train.AdamOptimizer()
grads, vars = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(grads, 5)
train_op = optimizer.apply_gradients(zip(grads, vars))

## Train

### build helper function

In [35]:
def get_batch(datas, batch_size):
    
    random.shuffle(datas)
    start_index = 0
    end_index = batch_size
    
    while end_index<len(datas):
        batch = datas[start_index:end_index]
        start_index, end_index = end_index, end_index+batch_size
        
        yield batch

In [36]:
seq_index2word = lambda seq, id2word:' '.join([id2word[word] for word in seq])
seq_index2slot = lambda seq, id2slot:' '.join([id2slot[slot] for slot in seq])

In [37]:
def accuracy_score(true_data, pred_data, true_len = None):
    
    true_data = np.array(true_data)
    pred_data = np.array(pred_data)

    assert true_data.shape == pred_data.shape
    
    if true_len is not None:
        val_num = np.sum(true_len)
        assert val_num!=0
        score = 0
        
        for i in range(true_data.shape[0]):
            score+=np.sum(true_data[i, :true_len[i]] == pred_data[i, :true_len[i]])
    else:
        val_num = np.prod(true_data.shape)
        score = np.sum(true_data == pred_data)
    
    accuracy_score = score/float(val_num)
    
    return accuracy_score

In [38]:
def get_data_from_seq_batch(true_batch, pred_batch, padding_token):

    true_ma = ma.masked_equal(true_batch, padding_token)
    pred_ma = ma.masked_array(pred_batch, true_ma.mask)
    true_ma = true_ma.flatten()
    pred_ma = pred_ma.flatten()
    true_ma = true_ma[~true_ma.mask]
    pred_ma = pred_ma[~pred_ma.mask]
    
    return true_ma, pred_ma

In [39]:
def f1_score_for_seq_batch(true_batch, pred_batch, average = 'micro', padding_token = 0):
    
    true_data, pred_data = get_data_from_seq_batch(true_batch, pred_batch, padding_token)
    labels = list(set(true_data))
    return f1_score(true_data, pred_data, labels = labels, average = average)

### train

In [40]:
try:
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        train_loss, mean_loss = 0, 0
        
        for epoch in range(epoch_num):
            
            # training
            for i,training_data in enumerate(get_batch(train_data_with_id, batch_size)):
                training_data = list(zip(*training_data))
                run_target = [train_op, loss]
                fd = {
                    encoder_input:np.transpose(training_data[0], [1, 0]),
                    encoder_len:training_data[1],
                    slot_target:training_data[2],
                    intent_target:training_data[3]
                }

                _, loss_val = sess.run(run_target, feed_dict = fd)
                train_loss+=loss_val
                mean_loss+=loss_val
                
                if i%30 == 0:
                    mean_loss/=30.0
            train_loss/=(i+1)
            print("Epoch:{}, train_loss:{}".format(epoch, train_loss))
            
            # testing
            all_pred_slots = []
            for i,testing_data in enumerate(get_batch(test_data_with_id, batch_size)):
                testing_data = list(zip(*testing_data))
                run_target = [slot_prediction_id, intent_prediction_id]
                fd = {
                    encoder_input:np.transpose(testing_data[0], [1, 0]),
                    encoder_len:testing_data[1],
                }
                
                slot_prediction_batch, intent_prediction_batch = sess.run(run_target, feed_dict = fd)
                slot_prediction_batch = np.transpose(slot_prediction_batch, [1, 0])
                
                if i == 0:
                    random_index = random.choice(range(len(testing_data)))
                    print("Input Sentence:{}".format(seq_index2word(testing_data[0][random_index], id2word)))
                    print()
                    print("True Slots:{}".format(seq_index2slot(testing_data[2][random_index], id2slot)))
                    print()
                    print("Pred Slots:{}".format(seq_index2slot(slot_prediction_batch[random_index], id2slot)))
                    print()
                    print("True Intent:{}".format(id2intent.get(testing_data[3][random_index])))
                    print()
                    print("Pred Intent:{}".format(id2intent.get(intent_prediction_batch[random_index])))
                
                slot_true = np.array((testing_data[2]))
                
                slot_prediction_len = list(np.shape(slot_prediction_batch))[1]
                slot_true_len = np.array((testing_data[1]))
                
                # remove padding
                slot_true_with_true_len = slot_true[:, :slot_prediction_len]
                
                # add padding
                slot_prediction_padd = np.lib.pad(slot_prediction_batch, ((0,0), (0, input_steps - slot_prediction_len)), mode = 'constant', constant_values = 0)
                all_pred_slots.append(slot_prediction_padd)
                
                # without padding
                slot_accuracy = accuracy_score(slot_true_with_true_len, slot_prediction_batch, slot_true_len)
                intent_accuracy = accuracy_score(testing_data[3], intent_prediction_batch)
                print("slot accuracy: {}, intent accuracy: {}".format(slot_accuracy, intent_accuracy))
                
            all_pred_slots = np.vstack(all_pred_slots)
            all_true_slots = np.array(list(zip(*test_data_with_id))[2])[:all_pred_slots.shape[0]]
            print("F1 score for epoch {}: {}".format(epoch, f1_score_for_seq_batch(all_true_slots, all_pred_slots)))
            
except KeyboardInterrupt:
    print('Interrupt')

Epoch:0, train_loss:1.6559512047998366
Input Sentence:newark to cleveland daily <EOS> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

True Slots:O B-fromloc.city_name O B-toloc.city_name B-flight_days <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

Pred Slots:O B-fromloc.city_name O B-toloc.city_name I-toloc.city_name <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

True Intent:atis_flight

Pred Intent:atis_flight
slot accuracy: 0.8579545454545454, intent accuracy: 1.0
slot accuracy: 0.910614525139664

slot accuracy: 0.9759036144578314, intent accuracy: 1.0
slot accuracy: 0.9375, intent accuracy: 1.0
slot accuracy: 0.9696969696969697, intent accuracy: 1.0
slot accuracy: 0.9693251533742331, intent accuracy: 0.9375
slot accuracy: 0.9761904761904762, intent accuracy: 1.0
slot accuracy: 0.9945054945054945, intent accuracy: 1.0
slot accuracy: 0.9595959595959596, intent accuracy: 1.0
slot accuracy: 0.9061032863849765, intent accuracy: 1.0
slot accuracy: 0.96875, intent accuracy: 1.0
slot accuracy: 0.946524064171123, intent accuracy: 1.0
slot accuracy: 0.9259259259259259, intent accuracy: 0.8125
slot accuracy: 0.9493670886075949, intent accuracy: 1.0
slot accuracy: 0.9712643678160919, intent accuracy: 1.0
slot accuracy: 0.9619565217391305, intent accuracy: 0.9375
slot accuracy: 0.9774011299435028, intent accuracy: 0.9375
slot accuracy: 0.9569377990430622, intent accuracy: 1.0
slot accuracy: 0.9934640522875817, intent accuracy: 0.9375
slot accuracy: 0.9252873563218391, intent accuracy: 0.937