In [1]:
"""
    Named Entity Recognition 

        Author : Sangkeun Jung (2017)
        - using Tensorflow
"""

import sys, os, inspect

# add common to path
from pathlib import Path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
common_path = str(Path(currentdir).parent.parent)
sys.path.append( common_path )

from common.nlp.vocab import Vocab
from common.nlp.data_loader import N2NTextData
from common.nlp.converter import N2NConverter

from dataset import NERDataset
from dataset import load_data
from common.ml.hparams import HParams

import numpy as np
import copy 
import time 
import tensorflow as tf
from tensorflow.python.ops import rnn
from tensorflow.contrib.layers.python.layers import linear
from tensorflow.python.ops import variable_scope
from tensorflow.contrib.seq2seq import sequence_loss#sequence의 loss의 평균값을 구할 수있음

from common.ml.tf.deploy import freeze_graph



print( "Tensorflow Version : ", tf.__version__)

class NER():
    def __init__(self, hps, mode="train"):
        self.hps = hps
        self.x = tf.placeholder(tf.int32,   [None, hps.num_steps], name="pl_tokens")
        self.y = tf.placeholder(tf.int32,   [None, hps.num_steps], name="pl_target")
        self.w = tf.placeholder(tf.float32, [None, hps.num_steps], name="pl_weight")
        self.keep_prob = tf.placeholder(tf.float32, [], name="pl_keep_prob")

        ### 4 blocks ###
        # 1) embedding
        # 2) dropout on input embedding
        # 3) sentence encoding using rnn
        # 4) bidirectional rnn's output to target classes
        # 5) loss calcaulation

        def _embedding(x):
            # character embedding 
            shape       = [hps.vocab_size, hps.emb_size]
            initializer = tf.initializers.variance_scaling(distribution="uniform", dtype=tf.float32)
            emb_mat     = tf.get_variable("emb", shape, initializer=initializer, dtype=tf.float32)
            input_emb   = tf.nn.embedding_lookup(emb_mat, x)   # [batch_size, sent_len, emb_dim]

            # split input_emb -> num_steps
            step_inputs = tf.unstack(input_emb, axis=1)
            return step_inputs

        def _sequence_dropout(step_inputs, keep_prob):
            # apply dropout to each input
            # input : a list of input tensor which shape is [None, input_dim]
            with tf.name_scope('sequence_dropout') as scope:
                step_outputs = []
                for t, input in enumerate(step_inputs):
                    step_outputs.append( tf.nn.dropout(input, keep_prob) )
            return step_outputs

        def sequence_encoding_n2n(step_inputs, seq_length, cell_size):
            # birnn based N2N encoding and output
            f_rnn_cell = tf.contrib.rnn.GRUCell(cell_size, reuse=False)
            b_rnn_cell = tf.contrib.rnn.GRUCell(cell_size, reuse=False)
            _inputs    = tf.stack(step_inputs, axis=1)

            # step_inputs = a list of [batch_size, emb_dim]
            # input = [batch_size, num_step, emb_dim]
            # np.stack( [a,b,c,] )
            outputs, states, = tf.nn.bidirectional_dynamic_rnn( f_rnn_cell,
                                                                b_rnn_cell,
                                                                _inputs,
                                                                sequence_length=tf.cast(seq_length, tf.int64),
                                                                time_major=False,
                                                                dtype=tf.float32,
                                                                scope='birnn',
                                                            )
            output_fw, output_bw = outputs
            states_fw, states_bw = states 

            output       = tf.concat([output_fw, output_bw], 2)
            step_outputs = tf.unstack(output, axis=1)

            final_state  = tf.concat([states_fw, states_bw], 1)
            return step_outputs # a list of [batch_size, enc_dim]

        def _to_class_n2n(step_inputs, num_class):
            T = len(step_inputs)
            step_output_logits = []
            for t in range(T):
                # encoder to linear(map)
                out = step_inputs[t]
                if t==0: out = linear(out, num_class, scope="Rnn2Target")
                else:    out = linear(out, num_class, scope="Rnn2Target", reuse=True)
                step_output_logits.append(out)
            return step_output_logits

        def _loss(step_outputs, step_refs, weights):
            # step_outputs : a list of [batch_size, num_class] float32 - unscaled logits
            # step_refs    : [batch_size, num_steps] int32
            # weights      : [batch_size, num_steps] float32
            # calculate sequence wise loss function using cross-entropy
            _batch_output_logits = tf.stack(step_outputs, axis=1)
            loss = sequence_loss(
                                    logits=_batch_output_logits,        
                                    targets=step_refs,
                                    weights=weights
                                )
            return loss
        
        seq_length    = tf.reduce_sum(self.w, 1) # [batch_size]

        step_inputs       = _embedding(self.x)
        step_inputs       = _sequence_dropout(step_inputs, self.keep_prob)
        step_enc_outputs  = sequence_encoding_n2n(step_inputs, seq_length, hps.enc_dim)
        step_outputs      = _to_class_n2n(step_enc_outputs, hps.num_target_class)

        self.loss = _loss(step_outputs, self.y, self.w)

        # step_preds and step_out_probs
        step_out_probs = []
        step_out_preds = []
        for _output in step_outputs:
            _out_probs  = tf.nn.softmax(_output)
            _out_pred   = tf.argmax(_out_probs, 1)

            step_out_probs.append(_out_probs)
            step_out_preds.append(_out_pred)

        # stack for interface
        self.step_out_probs = tf.stack(step_out_probs, axis=1, name="step_out_probs")
        self.step_out_preds = tf.stack(step_out_preds, axis=1, name="step_out_preds")

        self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.zeros_initializer, trainable=False)

        if mode == "train":
            optimizer       = tf.train.AdamOptimizer(hps.learning_rate)
            self.train_op   = optimizer.minimize(self.loss, global_step=self.global_step)
        else:
            self.train_op = tf.no_op()

        for v in tf.trainable_variables(): print(v.name)

    @staticmethod
    def get_default_hparams():
        return HParams(
            learning_rate     = 0.005,
            keep_prob         = 0.5,
        )


def train(train_id_data, num_vocabs, num_taget_class):
    #
    # train sentiment analysis using given train_id_data
    #
    max_epoch = 900
    model_dir = "./trained_models"
    hps = NER.get_default_hparams()
    hps.update(
                    batch_size= 100,
                    num_steps = 85,
                    emb_size  = 40,
                    enc_dim   = 100,
                    vocab_size=num_vocabs,
                    num_target_class=num_taget_class
               )

    with tf.variable_scope("model"):
        model = NER(hps, "train")

    sv = tf.train.Supervisor(is_chief=True,
                             logdir=model_dir,
                             summary_op=None,  
                             global_step=model.global_step)

    # tf assign compatible operators for gpu and cpu 
    tf_config = tf.ConfigProto(allow_soft_placement=True)

    with sv.managed_session(config=tf_config) as sess:
        local_step       = 0
        prev_global_step = sess.run(model.global_step)

        train_data_set = NERDataset(train_id_data, hps.batch_size, hps.num_steps)
        losses = []
        while not sv.should_stop():
            fetches = [model.global_step, model.loss, model.train_op]
            a_batch_data = next( train_data_set.iterator )
            y, x, w = a_batch_data
            fetched = sess.run(fetches, {
                                            model.x: x, 
                                            model.y: y, 
                                            model.w: w,

                                            model.keep_prob: hps.keep_prob,
                                        }
                              )

            local_step += 1

            _global_step = fetched[0]
            _loss        = fetched[1]
            losses.append( _loss )
            if local_step < 10 or local_step % 10 == 0:
                epoch = train_data_set.get_epoch_num()
                print("Epoch = {:3d} Step = {:7d} loss = {:5.3f}".format(epoch, _global_step, np.mean(losses)) )
                _loss = []                
                if epoch >= max_epoch : break 

        print("Training is done.")
    sv.stop()

    # model.out_pred, model.out_probs
    freeze_graph(model_dir, "model/step_out_preds,model/step_out_probs", "frozen_graph.tf.pb") ## freeze graph with params to probobuf format
    
from tensorflow.core.framework import graph_pb2
def predict(token_vocab, target_vocab, sent):
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force to use cpu only (prediction)
    model_dir = "./trained_models"

    # prepare sentence converting
    # to make raw sentence to id data easily
    pred_data     = N2NTextData(sent, mode='sentence')
    pred_id_data  = N2NConverter.convert(pred_data, target_vocab, token_vocab)
    pred_data_set = NERDataset(pred_id_data, 1, 85)
    #
    try:
        a_batch_data = next(pred_data_set.predict_iterator) # a result
        b_nes_id, b_token_ids, b_weight = a_batch_data
    except StopIteration:
        print(pred_data_set)

    # Restore graph
    # note that frozen_graph.tf.pb contains graph definition with parameter values in binary format
    _graph_fn =  os.path.join(model_dir, 'frozen_graph.tf.pb')
    with tf.gfile.GFile(_graph_fn, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)

    with tf.Session(graph=graph) as sess:
        # to check load graph
        #for n in tf.get_default_graph().as_graph_def().node: print(n.name)

        # make interface for input
        pl_token     = graph.get_tensor_by_name('import/model/pl_tokens:0')
        pl_weight    = graph.get_tensor_by_name('import/model/pl_weight:0')
        pl_keep_prob = graph.get_tensor_by_name('import/model/pl_keep_prob:0')

        # make interface for output
        step_out_preds = graph.get_tensor_by_name('import/model/step_out_preds:0')
        step_out_probs = graph.get_tensor_by_name('import/model/step_out_probs:0')
        

        # predict sentence 
        b_best_step_pred_indexs, b_step_pred_probs = sess.run([step_out_preds, step_out_probs], 
                                                              feed_dict={
                                                                            pl_token  : b_token_ids,
                                                                            pl_weight : b_weight,
                                                                            pl_keep_prob : 1.0,
                                                                        }
                                                             )
        best_step_pred_indexs = b_best_step_pred_indexs[0]
        step_pred_probs = b_step_pred_probs[0]

        step_best_targets = []
        step_best_target_probs = []
        for time_step, best_pred_index in enumerate(best_step_pred_indexs):
            _target_class = target_vocab.get_symbol(best_pred_index)
            step_best_targets.append( _target_class )
            _prob = step_pred_probs[time_step][best_pred_index]
            step_best_target_probs.append( _prob ) 
        return sent, step_best_targets



if __name__ == '__main__':
    train_id_data, token_vocab, target_vocab = load_data()
    num_vocabs       = token_vocab.get_num_tokens()
    num_target_class = target_vocab.get_num_targets()

    train_data_set = NERDataset(train_id_data, 5, 85)
    train(train_id_data, num_vocabs, num_target_class)
    
    #predict(token_vocab, target_vocab, '아프가니스탄의 장래를 더욱 불투명하게 하는 것은 강경파 헤즈비 이슬라미와 우즈베크 민병대의 대립이다.')


  from ._conv import register_converters as _register_converters


Tensorflow Version :  1.10.0
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
model/emb:0
model/birnn/fw/gru_cell/gates/kernel:0
model/birnn/fw/gru_cell/gates/bias:0
model/birnn/fw/gru_cell/candidate/kernel:0
model/birnn/fw/gru_cell/candidate/bias:0
model/birnn/bw/gru_cell/gates/kernel:0
model/birnn/bw/gru_cell/gates/bias:0
model/birnn/bw/gru_cell/candidate/kernel:0
model/birnn/bw/gru_cell/candidate/bias:0
model/Rnn2Target/weights:0
model/Rnn2Target/biases:0
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path ./trained_models\model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:model/global_step/sec: 0
Epoch =   1 Step =       1 loss = 2.559
Epoch =   1 Step =       2 loss = 2.484
Epoc

Epoch = 177 Step =    1540 loss = 0.076
Epoch = 178 Step =    1550 loss = 0.075
Epoch = 179 Step =    1560 loss = 0.075
Epoch = 180 Step =    1570 loss = 0.074
Epoch = 181 Step =    1580 loss = 0.074
Epoch = 182 Step =    1590 loss = 0.074
Epoch = 184 Step =    1600 loss = 0.073
Epoch = 185 Step =    1610 loss = 0.073
Epoch = 186 Step =    1620 loss = 0.072
INFO:tensorflow:model/global_step/sec: 4.60003
Epoch = 187 Step =    1630 loss = 0.072
Epoch = 188 Step =    1640 loss = 0.072
Epoch = 189 Step =    1650 loss = 0.071
Epoch = 190 Step =    1660 loss = 0.071
Epoch = 192 Step =    1670 loss = 0.070
Epoch = 193 Step =    1680 loss = 0.070
Epoch = 194 Step =    1690 loss = 0.070
Epoch = 195 Step =    1700 loss = 0.069
Epoch = 196 Step =    1710 loss = 0.069
Epoch = 197 Step =    1720 loss = 0.069
Epoch = 198 Step =    1730 loss = 0.068
Epoch = 200 Step =    1740 loss = 0.068
Epoch = 201 Step =    1750 loss = 0.068
Epoch = 202 Step =    1760 loss = 0.067
Epoch = 203 Step =    1770 loss =

Epoch = 404 Step =    3530 loss = 0.036
Epoch = 406 Step =    3540 loss = 0.036
Epoch = 407 Step =    3550 loss = 0.036
Epoch = 408 Step =    3560 loss = 0.036
Epoch = 409 Step =    3570 loss = 0.036
Epoch = 410 Step =    3580 loss = 0.036
Epoch = 411 Step =    3590 loss = 0.036
Epoch = 412 Step =    3600 loss = 0.036
Epoch = 414 Step =    3610 loss = 0.036
Epoch = 415 Step =    3620 loss = 0.036
Epoch = 416 Step =    3630 loss = 0.036
Epoch = 417 Step =    3640 loss = 0.035
Epoch = 418 Step =    3650 loss = 0.035
Epoch = 419 Step =    3660 loss = 0.035
Epoch = 420 Step =    3670 loss = 0.035
Epoch = 422 Step =    3680 loss = 0.035
Epoch = 423 Step =    3690 loss = 0.035
Epoch = 424 Step =    3700 loss = 0.035
Epoch = 425 Step =    3710 loss = 0.035
Epoch = 426 Step =    3720 loss = 0.035
Epoch = 427 Step =    3730 loss = 0.035
Epoch = 428 Step =    3740 loss = 0.035
Epoch = 430 Step =    3750 loss = 0.035
Epoch = 431 Step =    3760 loss = 0.035
Epoch = 432 Step =    3770 loss = 0.034


INFO:tensorflow:Saving checkpoint to path ./trained_models\model.ckpt
Epoch = 636 Step =    5550 loss = 0.025
Epoch = 637 Step =    5560 loss = 0.025
Epoch = 638 Step =    5570 loss = 0.025
Epoch = 639 Step =    5580 loss = 0.025
Epoch = 640 Step =    5590 loss = 0.025
Epoch = 641 Step =    5600 loss = 0.025
Epoch = 642 Step =    5610 loss = 0.025
Epoch = 644 Step =    5620 loss = 0.025
Epoch = 645 Step =    5630 loss = 0.025
Epoch = 646 Step =    5640 loss = 0.025
Epoch = 647 Step =    5650 loss = 0.025
Epoch = 648 Step =    5660 loss = 0.025
Epoch = 649 Step =    5670 loss = 0.025
Epoch = 650 Step =    5680 loss = 0.025
Epoch = 652 Step =    5690 loss = 0.025
Epoch = 653 Step =    5700 loss = 0.025
Epoch = 654 Step =    5710 loss = 0.025
Epoch = 655 Step =    5720 loss = 0.025
Epoch = 656 Step =    5730 loss = 0.025
Epoch = 657 Step =    5740 loss = 0.025
Epoch = 658 Step =    5750 loss = 0.025
Epoch = 660 Step =    5760 loss = 0.025
Epoch = 661 Step =    5770 loss = 0.025
Epoch = 66

Epoch = 869 Step =    7590 loss = 0.020
Epoch = 870 Step =    7600 loss = 0.020
Epoch = 871 Step =    7610 loss = 0.020
Epoch = 872 Step =    7620 loss = 0.020
Epoch = 873 Step =    7630 loss = 0.020
Epoch = 875 Step =    7640 loss = 0.020
Epoch = 876 Step =    7650 loss = 0.020
Epoch = 877 Step =    7660 loss = 0.020
Epoch = 878 Step =    7670 loss = 0.020
Epoch = 879 Step =    7680 loss = 0.020
Epoch = 880 Step =    7690 loss = 0.020
Epoch = 882 Step =    7700 loss = 0.020
Epoch = 883 Step =    7710 loss = 0.020
Epoch = 884 Step =    7720 loss = 0.020
Epoch = 885 Step =    7730 loss = 0.020
Epoch = 886 Step =    7740 loss = 0.020
Epoch = 887 Step =    7750 loss = 0.020
Epoch = 888 Step =    7760 loss = 0.020
Epoch = 890 Step =    7770 loss = 0.020
Epoch = 891 Step =    7780 loss = 0.020
Epoch = 892 Step =    7790 loss = 0.020
Epoch = 893 Step =    7800 loss = 0.020
Epoch = 894 Step =    7810 loss = 0.020
Epoch = 895 Step =    7820 loss = 0.020
Epoch = 896 Step =    7830 loss = 0.020


In [None]:
if __name__ == '__main__':
    with open('./data/trip.test.txt',encoding='utf8') as f:
        lines = f.readlines()
        for line in lines:
            sent, targets = predict(token_vocab, target_vocab,line)
            print(sent)
            print(list(targets))