In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.crf import crf_log_likelihood
from tensorflow.contrib.crf import viterbi_decode
from tensorflow.contrib.layers.python.layers import initializers

import rnncell as rnn

In [20]:
class Model:
    def __init__(self, config):
        self.config = config
        self.lr = config['lr']
        self.batch_size = config['batch_size']
        
        self.char_dim = config['char_dim']
        self.lstm_dim = config['lstm_dim']
        self.seg_dim = config["seg_dim"]

        self.num_tags = config['num_tags']
        self.num_chars = config['num_chars']
        self.num_segs = 4

        self.global_step = tf.Variable(0, trainable=False)
        self.best_dev_f1 = tf.Variable(0.0, trainable=False)
        self.best_test_f1 = tf.Variable(0.0, trainable=False)
        self.initializer = initializers.xavier_initializer()
        
        self.dataset = {} # a set of dataset
        self.iterator = tf.data.Iterator.from_structure((tf.int32, tf.int32, tf.int32), 
                (tf.TensorShape([None, None]), tf.TensorShape([None, None]), tf.TensorShape([None, None])))
        
        # add inputs for the model
        self.char_inputs, self.seg_inputs, self.targets = self.iterator.get_next()
        
        # dropout keep prob
        self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout")
        
        # lengths, batch_size, num_steps
        self.lengths = tf.cast(tf.reduce_sum(
            # 0: padding char, 1: used char; so the reduce sum should be lengths of the batch
            tf.sign(tf.abs(self.char_inputs)), reduction_indices=1), tf.int32)
        self.batch_size = tf.shape(self.char_inputs)[0]
        self.num_steps = tf.shape(self.char_inputs)[-1]
        
#         # neural layers:
#         # TODO:embeddings for chinese character and segmentation representation
#         embedding = self.embedding_layer(self.char_inputs, self.seg_inputs, config)

#         # TODO:apply dropout before feed to lstm layer
#         lstm_inputs = tf.nn.dropout(embedding, self.dropout)

#         # TODO:bi-directional lstm layer
#         lstm_outputs = self.biLSTM_layer(lstm_inputs, self.lstm_dim, self.lengths)

#         # TODO:logits for tags
#         self.logits = self.project_layer(lstm_outputs)
        
#         # TODO:add predictal operation if not using crf
#         if not config['crf']:
#             self.labels_pred = tf.cast(tf.argmax(self.logits, axis=-1), tf.int32)

#         # TODO:loss of the model
#         self.loss = self.loss_layer(self.logits, self.lengths)

#         with tf.variable_scope("optimizer"):
#             optimizer = self.config['optimizer']
#             if optimizer == "sgd":
#                 self.opt = tf.train.GradientDescentOptimizer(self.lr)
#             elif optimizer == "adam":
#                 self.opt = tf.train.AdamOptimizer(self.lr)
#             elif optimizer == "adgrad":
#                 self.opt = tf.train.AdagradOptimizer(self.lr)
#             else:
#                 raise KeyError

#             # apply grad clip to avoid gradient explosion
#             grads_vars = self.opt.compute_gradients(self.loss)
#             capped_grads_vars = [[tf.clip_by_value(g, -self.config['clip'], self.config['clip']), v]
#                                  for g, v in grads_vars]
#             self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)

#         # saver of the model
#         self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)

    def set_dataset(self, data, dataset_name):
        # TODO
        def gen_data():
            for d in sorted(data, key=lambda x: len(x[0])):
                yield (d[1], d[2], d[3])
        self.dataset[dataset_name] = tf.data.Dataset.from_generator(gen_data, (tf.int32,tf.int32,tf.int32), 
                (tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])))
    
    def make_dataset_init(self, dataset_name, shuffle=0):
        '''
        TODO
        shuffle = 0 means dont shuffle
        '''
        dataset_batch = self.dataset[dataset_name].padded_batch(self.batch_size, 
                padded_shapes=(tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])))
        dataset_batch = dataset_batch.shuffle(shuffle) if shuffle else dataset_batch
        return self.iterator.make_initializer(dataset_batch)
                