## load library

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import numpy.ma as ma
from sklearn.metrics import f1_score
import random
random.seed(0)

## load data

In [2]:
df = pd.read_csv('model_training_data.csv')

In [3]:
df

Unnamed: 0,sentence,label,intent
0,round trip fares Warsaw to Istanbul,O O O origin O destination,other
1,if I am going to Berlin should I bring umbrella,O O O O O location O O O O,weather
2,what is the ground transportation from philade...,O O O O O O O O O O O O,other
3,okay and on may four i would like to go from S...,O O O O O O O O O O O origin O destination O O...,flight
4,do I need to bring umbrella to Seoul today,O O O O O O O location O,weather
5,temperature in Rome,O O location,weather
6,current weather in Florence,O O O location,weather
7,what 's the cheapest round trip flight on twa ...,O O O O O O O O O O origin O destination,flight
8,will Los Angeles be hot,O location location O O,weather
9,I want to know if it will rain in Buenos Aires,O O O O O O O O O location location,weather


## preprocessing

#### build helper function for preprocessing

In [4]:
def data_pipeline(df, max_len = 50):
    '''
    Split data to seq_words, seq_slots and intents. Moreover, this function adds special token for them.
    '''
    
    seq_words = df['sentence']
    seq_slots = df['label']
    intents = list(df['intent'])
    
    seq_new_words, seq_new_slots = [], []
    
    for seq_id in range(len(seq_words)):
        temp = seq_words[seq_id]
        temp = temp.split()
        if len(temp)<max_len:
            temp.append('<EOS>')
            
            while len(temp)<max_len:
                temp.append('<PAD>')
        else:
            temp = temp[:max_len]
            temp[-1] = '<EOS>'
    
        seq_new_words.append(temp)
    
        temp = seq_slots[seq_id]
        temp = temp.split()
        if len(temp)<max_len:
            while len(temp)<max_len:
                temp.append('<PAD>')
        else:
            temp = temp[:max_len]
        
        seq_new_slots.append(temp)
    
    datas = list(zip(seq_new_words, seq_new_slots, intents))
    
    return datas

In [5]:
def get_mapping_dictionary(datas):
    '''
    get mapping dictionary for words, slots, intents.
    '''
    
    seq_words, seq_slots, intents = list(zip(*datas))
    flatten = lambda l:[item for seq in l for item in seq]
    
    word2id = {'<PAD>':0, '<UNK>':1, '<SOS>':2, 'EOS':3}
    for word in set(flatten(seq_words)):
        if word not in word2id:
            word2id[word] = len(word2id)
    id2word = {v:k for k,v in word2id.items()}
    
    slot2id = {'<PAD>':0, '<UNK>':1, 'O':2}
    for slot in set(flatten(seq_slots)):
        if slot not in slot2id:
            slot2id[slot] = len(slot2id)
    id2slot = {v:k for k,v in slot2id.items()}
    
    intent2id = {'<UNK>':0}
    for intent in set(intents):
        if intent not in intent2id:
            intent2id[intent] = len(intent2id)
    id2intent = {v:k for k,v in intent2id.items()}
    
    return word2id, id2word, slot2id, id2slot, intent2id, id2intent

In [6]:
def to_index(datas, word2id, slot2id, intent2id):
    '''
    transfer to index.
    '''
    
    data_to_index = []
    
    for seq_words, seq_slots, intent in datas:
        seq_words_id = list(map(lambda i: word2id.get(i, slot2id['<UNK>']), seq_words))
        seq_len = seq_words.index('<EOS>')
        seq_slots_id = list(map(lambda i: slot2id.get(i, slot2id['<UNK>']), seq_slots))
        intent_id = intent2id.get(intent, intent2id['<UNK>'])
        
        data_to_index.append([seq_words_id, seq_len, seq_slots_id, intent_id])
    
    return data_to_index

In [7]:
def split_to_train_test_data(datas):
    '''
    split to training data and testing data.
    '''
    random.shuffle(datas)
    split_index = int(len(datas)*0.7)
    train_data_by_id = datas[:split_index]
    test_data_by_id = datas[split_index:]
    
    return train_data_by_id, test_data_by_id

In [8]:
data_ed = data_pipeline(df)
word2id, id2word, slot2id, id2slot, intent2id, id2intent = get_mapping_dictionary(data_ed)
data_by_id = to_index(data_ed, word2id, slot2id, intent2id)
train_data_by_id, test_data_by_id = split_to_train_test_data(data_by_id)

#### check result

In [9]:
print(data_by_id[0])

[[372, 44, 230, 623, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, [5, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 1]


In [10]:
print(len(word2id))
print(len(slot2id))
print(len(intent2id))

708
6
4


## parameters/Input/Output/Variable

#### hyperparameters

In [11]:
batch_size = 16
vocab_size = 708
embed_size = 64
hidden_size = 100
slot_size = 6
intent_size = 4
input_steps = 50
epoch_num = 5

#### Input/Output

In [12]:
encoder_input = tf.placeholder(tf.int32, [input_steps, batch_size], name = 'encoder_input')
encoder_len = tf.placeholder(tf.int32, [batch_size], name = 'encoder_len')
slot_target = tf.placeholder(tf.int32, [batch_size, input_steps], name = 'slot_target')
intent_target = tf.placeholder(tf.int32, [batch_size], name = 'intent_target')

#### embeddings

In [13]:
embeddings = tf.Variable(tf.random_uniform([vocab_size, embed_size], -1.0, 1.0), dtype = tf.float32, name = 'embeddings')
encoder_embed_input = tf.nn.embedding_lookup(embeddings, encoder_input)

#### Variable

In [14]:
intent_w = tf.Variable(tf.random_uniform([hidden_size*2, intent_size], -1.0, 1.0), dtype = tf.float32, name = 'intent_w')
intent_b = tf.Variable(tf.zeros([intent_size]), dtype = tf.float32, name = 'intent_b')

#### Special Token Embeddings

In [15]:
sos_time_slice = tf.ones([batch_size], dtype = tf.int32)*2
sos_embed = tf.nn.embedding_lookup(embeddings, sos_time_slice, name = 'SOS')
pad_embed = tf.zeros([batch_size, embed_size+hidden_size*2], dtype = tf.float32, name = 'PAD')

## model

### encoder

In [16]:
encoder_cell = tf.contrib.rnn.LSTMCell(hidden_size)
((encoder_fw_output, encoder_bw_output), 
 (encoder_fw_final_state, encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(cell_fw = encoder_cell,
                                                                                     cell_bw = encoder_cell,
                                                                                     inputs = encoder_embed_input,
                                                                                     sequence_length = encoder_len,
                                                                                     dtype = tf.float32,
                                                                                     time_major = True)

In [17]:
encoder_output = tf.concat((encoder_fw_output, encoder_bw_output), 2)
encoder_final_state_h = tf.concat((encoder_fw_final_state.h, encoder_bw_final_state.h), 1)
encoder_final_state_c = tf.concat((encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
encoder_final_state = tf.contrib.rnn.LSTMStateTuple(c = encoder_final_state_c,
                                                    h = encoder_final_state_h)

### decoder

In [18]:
decoder_len = encoder_len

#### build custom helper function for decode

In [19]:
def initial_fn():
    finished = (0>=decoder_len)
    initial_input = tf.concat((sos_embed, encoder_output[0]), 1)
    
    return finished, initial_input

In [20]:
def sample_ids(time, outputs, state):
    pred_id = tf.to_int32(tf.argmax(outputs, 1))
    
    return pred_id

In [21]:
def loop_translate_fn(time, outputs, state, sample_ids):
    def get_next_input():
        pred_embed = tf.nn.embedding_lookup(embeddings, sample_ids)
        return tf.concat((pred_embed, encoder_output[time]), 1)
    
    finished = (time>decoder_len)
    all_finished = tf.reduce_all(finished)
    next_input = tf.cond(all_finished, lambda:pad_embed, get_next_input)
    next_state = state
    
    return all_finished, next_input, next_state

In [22]:
my_helper = tf.contrib.seq2seq.CustomHelper(initial_fn, sample_ids, loop_translate_fn)

#### build decode flow

In [23]:
def decode(helper, scope, reuse = None):
    
    memory = tf.transpose(encoder_output, [1, 0, 2])
    attn_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units = hidden_size,
                                                          memory = memory,
                                                          memory_sequence_length = encoder_len)
    
    cell = tf.contrib.rnn.LSTMCell(hidden_size*2)
    attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell = cell,
                                                    attention_mechanism = attn_mechanism,
                                                    attention_layer_size = hidden_size)
    
    out_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = attn_cell,
                                                      output_size = slot_size,
                                                      reuse = reuse)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(cell = out_cell,
                                              helper = helper,
                                              initial_state = out_cell.zero_state(batch_size = batch_size,
                                                                                  dtype = tf.float32))
    
    final_output, final_state, seq_len = tf.contrib.seq2seq.dynamic_decode(decoder = decoder,
                                                                           output_time_major = True,
                                                                           impute_finished = True,
                                                                           maximum_iterations = input_steps)
    
    return final_output

In [24]:
decoder_output = decode(my_helper, 'decode')

#### processing slot

In [25]:
slot_prediction_labels = decoder_output.sample_id
slot_prediction_logits = decoder_output.rnn_output

In [26]:
(decoder_max_steps, decoder_batch_size, decoder_dim) = tf.unstack(tf.shape(slot_prediction_logits))
slot_target_with_time_major = tf.transpose(slot_target, [1, 0])
slot_target_with_time_major_and_true_len = slot_target_with_time_major[:decoder_max_steps]

In [27]:
mask = tf.to_float(tf.not_equal(slot_target_with_time_major_and_true_len, 0))

#### processing intent

In [28]:
intent_prediction_logits = tf.matmul(encoder_final_state_h, intent_w)+intent_b
intent_prediction_labels = tf.argmax(intent_prediction_logits, 1)

### loss

In [29]:
intent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = intent_prediction_logits,
                                                                        labels = tf.one_hot(indices = intent_target,
                                                                                            depth = intent_size,
                                                                                            dtype = tf.float32)))

In [30]:
slot_loss = tf.contrib.seq2seq.sequence_loss(logits = slot_prediction_logits,
                                             targets = slot_target_with_time_major_and_true_len,
                                             weights = mask)

In [31]:
loss = slot_loss+intent_loss

### optimizer

In [32]:
optimizer = tf.train.AdamOptimizer()
grads, vars = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(grads, 5)
train_op = optimizer.apply_gradients(zip(grads, vars))

## Train

#### build helper function for training.

In [33]:
def get_batch(datas, batch_size):
    random.shuffle(datas)
    start_index = 0
    end_index = batch_size
    
    while end_index<len(datas):
        batch = datas[start_index:end_index]
        start_index, end_index = end_index, end_index+batch_size
        
        yield batch

In [34]:
seq_id2word = lambda seq,word2id:' '.join([word2id[word] for word in seq])
seq_id2slot = lambda seq,slot2id:' '.join([slot2id[slot] for slot in seq])

#### build helper function for validate

In [35]:
def accuracy_score(true_batch, pred_batch, true_len = None):
    
    true_batch = np.array(true_batch)
    pred_batch = np.array(pred_batch)
    assert len(true_batch) == len(pred_batch)
    
    if true_len is not None:
        target_num = np.sum(true_len)
        
        res = 0
        for i in range(len(true_batch)):
            res+=np.sum(true_batch[i, true_len[i]==pred_batch[i, true_len[i]]])
    else:
        target_num = np.prod(true_batch.shape)
        
        res = np.sum(true_batch==pred_batch)
    
    accuracy_score = res/float(target_num)
    
    return accuracy_score

In [36]:
def get_data_from_seq(true_data, pred_data, padding_token):
    
    true_ma = ma.masked_equal(true_data, padding_token)
    pred_ma = ma.masked_array(pred_data, true_ma.mask)
    true_ma = true_ma.flatten()
    pred_ma = pred_ma.flatten()
    true_ma = true_ma[~true_ma.mask]
    pred_ma = pred_ma[~pred_ma.mask]
    
    return true_ma, pred_ma

In [37]:
def f1_score_per_epoch(true_data, pred_data, padding_token = 0, average = 'micro'):
    
    true, pred = get_data_from_seq(true_data, pred_data, padding_token)
    labels = list(set(true))
    
    return f1_score(true, pred, labels = labels, average = average)

### run

In [38]:
try:
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(epoch_num):
            mean_loss, train_loss = 0,0
            
            # Train
            for i, batch in enumerate(get_batch(train_data_by_id, batch_size)):
                
                training_data = list(zip(*batch))
                run_target = [train_op, loss]
                fd = {
                    encoder_input:np.transpose(training_data[0], [1, 0]),
                    encoder_len:training_data[1],
                    slot_target:training_data[2],
                    intent_target:training_data[3]
                }
                
                _, loss_val = sess.run(run_target, feed_dict = fd)
                mean_loss+=loss_val
                train_loss+=loss_val
                
                if i%30 == 0:
                    mean_loss/=30.0
            train_loss/=(i+1)
            print("Epoch:{}, Train loss:{}".format(epoch, train_loss))
            
            # Test
            all_slot_pred = []
            for i, batch in enumerate(get_batch(test_data_by_id, batch_size)):
                
                testing_data = list(zip(*batch))
                run_target = [slot_prediction_labels ,intent_prediction_labels]
                fd = {
                    encoder_input:np.transpose(testing_data[0], [1, 0]),
                    encoder_len:testing_data[1]
                }
                slot_prediction_v, intent_prediction_v = sess.run(run_target, feed_dict = fd)
                slot_prediction_v = np.transpose(slot_prediction_v, [1, 0])
                
                if i == 0:
                    random_index = random.randint(0, batch_size-1)
                    print('Input Sentence:{}'.format(seq_id2word(np.transpose(testing_data[0], [1, 0])[random_index], id2word)))
                    print()
                    print('True Slot:{}'.format(seq_id2slot(testing_data[2][random_index], id2slot)))
                    print()
                    print('Pred Slot:{}'.format(seq_id2slot(slot_prediction_v[random_index], id2slot)))
                    print()
                    print('True Intent:{}'.format(id2intent.get(testing_data[3][random_index])))
                    print()
                    print('Pred Intent:{}'.format(id2intent.get(intent_prediction_v[random_index])))
                    print()
                
                slot_prediction_len = list(np.shape(slot_prediction_v))[1]
                slot_true_len = testing_data[1]
                
                slot_prediction_v_pad = np.lib.pad(slot_prediction_v, ((0, 0), (0, input_steps - slot_prediction_len)), mode = 'constant', constant_values = 0)
                slot_true_with_true_len = np.array(testing_data[2])[:, :slot_prediction_len]
                
                all_slot_pred.append(slot_prediction_v_pad)
                
                slot_accuracy = accuracy_score(slot_true_with_true_len, slot_prediction_v, slot_true_len)
                intent_accuracy = accuracy_score(intent_prediction_v, testing_data[3])
                print('slot_accuracy:{}, intent_accuracy:{}'.format(slot_accuracy, intent_accuracy))
                
            all_slot_pred = np.vstack(all_slot_pred)
            all_true_pred = np.array(list(zip(*test_data_by_id))[2])[: all_slot_pred.shape[0]]
            print('Epoch:{}, Test F1 Score:{}'.format(epoch, f1_score_per_epoch(all_true_pred, all_slot_pred)))
                
except KeyboardInterrupt:
    print('Interrupting')

Epoch:0, Train loss:0.7057383310073501
Input Sentence:bring <PAD> <PAD> bring Orlando <PAD> <EOS> <PAD> <PAD> <PAD> on <PAD> ticket bring <PAD> Kong

True Slot:O O O O O O location <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

Pred Slot:O O O O O O location location location location O O O O O O

True Intent:weather

Pred Intent:weather

slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.08441558441558442, intent_accuracy:0.9375
slot_accuracy:0.08387096774193549, intent_accuracy:0.9375
slot_accuracy:0.04895104895104895, intent_accuracy:0.875
slot_accuracy:0.05185185185185185, intent_accuracy:0.8125
slot_accuracy:0.12422360248447205, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.875
slot_accuracy:0.20689655172413793, intent_accuracy:1.0
slot_accuracy:0.0469798657

slot_accuracy:0.04216867469879518, intent_accuracy:1.0
slot_accuracy:0.04487179487179487, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.05185185185185185, intent_accuracy:0.9375
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.0, intent_accuracy:0.875
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.08333333333333333, intent_accuracy:1.0
slot_accuracy:0.0457516339869281, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.05, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.044585987261146494, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.12345679012345678, intent_accuracy:0.875
slot_accuracy:0.11023622047244094, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0

slot_accuracy:0.0457516339869281, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.875
slot_accuracy:0.05263157894736842, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.05426356589147287, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.043478260869565216, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.12574850299401197, intent_accuracy:0.9375
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:1.0
slot_accuracy:0.0, intent_accuracy:0.9375
slot_accuracy:0.051470588235294115, intent_accuracy:1.0
slot_accuracy:0.0958904109589041, intent_accuracy:1.0
slot_accuracy:0.05185185185185185, intent_accuracy:1.0
slot_accuracy:0.03825136612021858, intent_accuracy:1.0
slot_accuracy:0.040697674418604654, intent_accuracy:0.8125
