In [1]:
import os
from datetime import datetime
import tensorflow.compat.v1 as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [2]:
DATA_IN_PATH = './naver_sentiment_movie_corpus/'
DATA_OUT_PATH = './naver_sentiment_movie_corpus_out/'
TRAIN_INPUT_DATA = 'nsmc_train_input.npy'
TRAIN_LABEL_DATA = 'nsmc_train_label.npy'
DATA_CONFIGS = 'data_configs.json'

In [3]:
input_data = np.load(open(DATA_IN_PATH + TRAIN_INPUT_DATA,'rb'))
label_data = np.load(open(DATA_IN_PATH + TRAIN_LABEL_DATA,'rb'))
prepro_configs = json.load(open(DATA_IN_PATH + DATA_CONFIGS, 'r'))

In [4]:
BATCH_SIZE = 16
NUM_EPOCHS = 10
VOCAB_SIZE = prepro_configs['vocab_size']
EMB_SIZE = 128

input_train, input_eval, label_train, label_eval = train_test_split(input_data,label_data,test_size=0.1,random_state=13371447)

In [5]:
def mapping_fn(X, Y):
    input, label = {'x': X}, Y
    return input, label

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((input_train, label_train))
    dataset = dataset.shuffle(buffer_size=len(input_train))
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(mapping_fn)
    dataset = dataset.repeat(count=NUM_EPOCHS)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def eval_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((input_eval, label_eval))
    dataset = dataset.shuffle(buffer_size=len(input_eval))
    dataset = dataset.batch(16)
    dataset = dataset.map(mapping_fn)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [6]:
prepro_configs['vocab_size']

43473

In [7]:
VOCAB_SIZE = 43473

In [8]:
def model_fn(features, labels, mode, params):
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT

    embedding_layer = tf.keras.layers.Embedding(
                    VOCAB_SIZE,
                    EMB_SIZE)(features['x'])

    dropout_emb = tf.keras.layers.Dropout(rate = 0.2)(embedding_layer)
    
    conv = tf.keras.layers.Conv1D(
           filters=32,
           kernel_size=3,
           padding='same',
           activation=tf.nn.relu)(dropout_emb)
  
    pool = tf.keras.layers.GlobalMaxPool1D()(conv)

    hidden = tf.keras.layers.Dense(units=250, activation=tf.nn.relu)(pool)   


    dropout_hidden = tf.keras.layers.Dropout(rate=0.2)(hidden, training = TRAIN)
    logits = tf.keras.layers.Dense(units=1)(dropout_hidden)

    if labels is not None:
        labels = tf.reshape(labels, [-1, 1])
        
    if TRAIN:
        global_step = tf.train.get_global_step()
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        train_op = tf.train.AdamOptimizer(0.001).minimize(loss, global_step)

        return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss = loss)
    
    elif EVAL:
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        pred = tf.nn.sigmoid(logits)
        accuracy = tf.metrics.accuracy(labels, tf.round(pred))
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops={'acc': accuracy})
        
    elif PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={
                'prob': tf.nn.sigmoid(logits),
            }
        )

In [9]:
BATCH_SIZE = 16
NUM_EPOCHS = 10
VOCAB_SIZE = prepro_configs['vocab_size']
EMB_SIZE = 128

In [10]:
est = tf.estimator.Estimator(model_fn, model_dir="data_out/checkpoint/cnn_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'data_out/checkpoint/cnn_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [11]:
time_start = datetime.utcnow()
print("Experiment startd at {}".format(time_start.strftime("%H%M%S")))
print(".......................................")

est.train(train_input_fn)

time_end = datetime.utcnow()
print("........................................")
print("Experiment startd at {}".format(time_start.strftime("%H%M%S")))
print()
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))

Experiment startd at 070148
.......................................
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
This is a deprecated API that should only be used in TF 1 graph mode and legacy TF 2 graph mode available through `tf.compat.v1`. In all other situations -- namely, eager mode and inside `tf.function` -- you can consume dataset elements using `for elem in dataset: ...` or by explicitly creating iterator via `iterator = iter(dataset)` and fetching its elements via `values = next(iterator)`. Furthermore, this API is not available in TF 2. During the transition from TF 1 to TF 2 you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)` to create a TF 1 graph mode style iterator for a dataset created through TF 2 APIs. Note that this should be a transient state of your code base as there are in general no guarantees about the interoperability

INFO:tensorflow:loss = 0.4716058, step = 6300 (4.207 sec)
INFO:tensorflow:global_step/sec: 23.6948
INFO:tensorflow:loss = 0.6196131, step = 6400 (4.219 sec)
INFO:tensorflow:global_step/sec: 24.0233
INFO:tensorflow:loss = 0.5167574, step = 6500 (4.164 sec)
INFO:tensorflow:global_step/sec: 24.0664
INFO:tensorflow:loss = 0.34406418, step = 6600 (4.156 sec)
INFO:tensorflow:global_step/sec: 24.0478
INFO:tensorflow:loss = 0.2974472, step = 6700 (4.157 sec)
INFO:tensorflow:global_step/sec: 23.889
INFO:tensorflow:loss = 0.30701646, step = 6800 (4.186 sec)
INFO:tensorflow:global_step/sec: 23.7655
INFO:tensorflow:loss = 0.5894432, step = 6900 (4.208 sec)
INFO:tensorflow:global_step/sec: 23.9328
INFO:tensorflow:loss = 0.27901274, step = 7000 (4.178 sec)
INFO:tensorflow:global_step/sec: 23.7866
INFO:tensorflow:loss = 0.3607431, step = 7100 (4.204 sec)
INFO:tensorflow:global_step/sec: 23.7951
INFO:tensorflow:loss = 0.71623135, step = 7200 (4.203 sec)
INFO:tensorflow:global_step/sec: 24.0702
INFO:te

INFO:tensorflow:global_step/sec: 22.6728
INFO:tensorflow:loss = 0.5004051, step = 14300 (4.411 sec)
INFO:tensorflow:global_step/sec: 23.9092
INFO:tensorflow:loss = 0.21192107, step = 14400 (4.184 sec)
INFO:tensorflow:global_step/sec: 23.4984
INFO:tensorflow:loss = 0.26545197, step = 14500 (4.255 sec)
INFO:tensorflow:global_step/sec: 23.8224
INFO:tensorflow:loss = 0.3881058, step = 14600 (4.198 sec)
INFO:tensorflow:global_step/sec: 23.6854
INFO:tensorflow:loss = 0.23997921, step = 14700 (4.222 sec)
INFO:tensorflow:global_step/sec: 24.0037
INFO:tensorflow:loss = 0.57647765, step = 14800 (4.166 sec)
INFO:tensorflow:global_step/sec: 23.9678
INFO:tensorflow:loss = 0.5757001, step = 14900 (4.172 sec)
INFO:tensorflow:global_step/sec: 23.7114
INFO:tensorflow:loss = 0.4998163, step = 15000 (4.217 sec)
INFO:tensorflow:global_step/sec: 23.7299
INFO:tensorflow:loss = 0.24232295, step = 15100 (4.214 sec)
INFO:tensorflow:global_step/sec: 23.5403
INFO:tensorflow:loss = 0.34589407, step = 15200 (4.248

INFO:tensorflow:loss = 0.12827139, step = 22400 (4.202 sec)
INFO:tensorflow:global_step/sec: 23.768
INFO:tensorflow:loss = 0.12967287, step = 22500 (4.208 sec)
INFO:tensorflow:global_step/sec: 23.6534
INFO:tensorflow:loss = 0.16496179, step = 22600 (4.226 sec)
INFO:tensorflow:global_step/sec: 23.8341
INFO:tensorflow:loss = 0.12632467, step = 22700 (4.197 sec)
INFO:tensorflow:global_step/sec: 23.7239
INFO:tensorflow:loss = 0.26324624, step = 22800 (4.215 sec)
INFO:tensorflow:global_step/sec: 23.949
INFO:tensorflow:loss = 0.08278661, step = 22900 (4.176 sec)
INFO:tensorflow:global_step/sec: 23.7274
INFO:tensorflow:loss = 0.09411651, step = 23000 (4.215 sec)
INFO:tensorflow:global_step/sec: 23.3045
INFO:tensorflow:loss = 0.202391, step = 23100 (4.293 sec)
INFO:tensorflow:global_step/sec: 23.8704
INFO:tensorflow:loss = 0.24200088, step = 23200 (4.186 sec)
INFO:tensorflow:global_step/sec: 23.8293
INFO:tensorflow:loss = 0.0925432, step = 23300 (4.197 sec)
INFO:tensorflow:global_step/sec: 23.

INFO:tensorflow:loss = 0.08212199, step = 30300 (4.198 sec)
INFO:tensorflow:global_step/sec: 23.5071
INFO:tensorflow:loss = 0.14851205, step = 30400 (4.255 sec)
INFO:tensorflow:global_step/sec: 23.8233
INFO:tensorflow:loss = 0.08902353, step = 30500 (4.197 sec)
INFO:tensorflow:global_step/sec: 23.653
INFO:tensorflow:loss = 0.056753535, step = 30600 (4.228 sec)
INFO:tensorflow:global_step/sec: 23.8238
INFO:tensorflow:loss = 0.18321873, step = 30700 (4.198 sec)
INFO:tensorflow:global_step/sec: 23.7978
INFO:tensorflow:loss = 0.042925987, step = 30800 (4.201 sec)
INFO:tensorflow:global_step/sec: 23.7755
INFO:tensorflow:loss = 0.64766896, step = 30900 (4.206 sec)
INFO:tensorflow:global_step/sec: 23.7099
INFO:tensorflow:loss = 0.25007737, step = 31000 (4.218 sec)
INFO:tensorflow:global_step/sec: 23.7274
INFO:tensorflow:loss = 0.1670376, step = 31100 (4.215 sec)
INFO:tensorflow:global_step/sec: 23.8325
INFO:tensorflow:loss = 0.49052012, step = 31200 (4.196 sec)
INFO:tensorflow:global_step/sec

INFO:tensorflow:global_step/sec: 23.7274
INFO:tensorflow:loss = 0.08572017, step = 38500 (4.215 sec)
INFO:tensorflow:global_step/sec: 23.9518
INFO:tensorflow:loss = 0.092532516, step = 38600 (4.176 sec)
INFO:tensorflow:global_step/sec: 24.032
INFO:tensorflow:loss = 0.12639326, step = 38700 (4.160 sec)
INFO:tensorflow:global_step/sec: 23.7939
INFO:tensorflow:loss = 0.022006743, step = 38800 (4.203 sec)
INFO:tensorflow:global_step/sec: 24.0165
INFO:tensorflow:loss = 0.08415064, step = 38900 (4.164 sec)
INFO:tensorflow:global_step/sec: 23.6161
INFO:tensorflow:loss = 0.16120183, step = 39000 (4.234 sec)
INFO:tensorflow:global_step/sec: 24.134
INFO:tensorflow:loss = 0.061203077, step = 39100 (4.144 sec)
INFO:tensorflow:global_step/sec: 23.8321
INFO:tensorflow:loss = 0.029255502, step = 39200 (4.196 sec)
INFO:tensorflow:global_step/sec: 23.9504
INFO:tensorflow:loss = 0.15897758, step = 39300 (4.175 sec)
INFO:tensorflow:global_step/sec: 23.7613
INFO:tensorflow:loss = 0.2596263, step = 39400 (

INFO:tensorflow:loss = 0.01731256, step = 46300 (4.411 sec)
INFO:tensorflow:global_step/sec: 23.2446
INFO:tensorflow:loss = 0.014719367, step = 46400 (4.300 sec)
INFO:tensorflow:global_step/sec: 22.6729
INFO:tensorflow:loss = 0.0752182, step = 46500 (4.411 sec)
INFO:tensorflow:global_step/sec: 23.2422
INFO:tensorflow:loss = 0.0063965805, step = 46600 (4.304 sec)
INFO:tensorflow:global_step/sec: 22.7711
INFO:tensorflow:loss = 0.050835006, step = 46700 (4.391 sec)
INFO:tensorflow:global_step/sec: 22.3935
INFO:tensorflow:loss = 0.24703918, step = 46800 (4.466 sec)
INFO:tensorflow:global_step/sec: 23.1792
INFO:tensorflow:loss = 0.012149977, step = 46900 (4.314 sec)
INFO:tensorflow:global_step/sec: 22.7918
INFO:tensorflow:loss = 0.49342516, step = 47000 (4.388 sec)
INFO:tensorflow:global_step/sec: 22.9673
INFO:tensorflow:loss = 0.14671806, step = 47100 (4.354 sec)
INFO:tensorflow:global_step/sec: 23.1213
INFO:tensorflow:loss = 0.028802093, step = 47200 (4.325 sec)
INFO:tensorflow:global_ste

INFO:tensorflow:loss = 0.24340756, step = 54400 (4.311 sec)
INFO:tensorflow:global_step/sec: 23.1936
INFO:tensorflow:loss = 0.0018904775, step = 54500 (4.312 sec)
INFO:tensorflow:global_step/sec: 22.5755
INFO:tensorflow:loss = 0.13883293, step = 54600 (4.430 sec)
INFO:tensorflow:global_step/sec: 22.9275
INFO:tensorflow:loss = 0.047551863, step = 54700 (4.362 sec)
INFO:tensorflow:global_step/sec: 23.3764
INFO:tensorflow:loss = 0.03493376, step = 54800 (4.280 sec)
INFO:tensorflow:global_step/sec: 23.4074
INFO:tensorflow:loss = 0.16948824, step = 54900 (4.270 sec)
INFO:tensorflow:global_step/sec: 23.0918
INFO:tensorflow:loss = 0.0481989, step = 55000 (4.331 sec)
INFO:tensorflow:global_step/sec: 23.3207
INFO:tensorflow:loss = 0.5514208, step = 55100 (4.288 sec)
INFO:tensorflow:global_step/sec: 23.3547
INFO:tensorflow:loss = 0.05704186, step = 55200 (4.282 sec)
INFO:tensorflow:global_step/sec: 22.2309
INFO:tensorflow:loss = 0.21471198, step = 55300 (4.498 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:global_step/sec: 23.4244
INFO:tensorflow:loss = 0.030759243, step = 62300 (4.269 sec)
INFO:tensorflow:global_step/sec: 22.8653
INFO:tensorflow:loss = 0.009245032, step = 62400 (4.373 sec)
INFO:tensorflow:global_step/sec: 23.6042
INFO:tensorflow:loss = 0.285545, step = 62500 (4.238 sec)
INFO:tensorflow:global_step/sec: 23.2702
INFO:tensorflow:loss = 0.008655027, step = 62600 (4.296 sec)
INFO:tensorflow:global_step/sec: 22.8951
INFO:tensorflow:loss = 0.04414434, step = 62700 (4.368 sec)
INFO:tensorflow:global_step/sec: 22.9884
INFO:tensorflow:loss = 0.44256127, step = 62800 (4.350 sec)
INFO:tensorflow:global_step/sec: 23.0984
INFO:tensorflow:loss = 0.043143403, step = 62900 (4.329 sec)
INFO:tensorflow:global_step/sec: 23.0968
INFO:tensorflow:loss = 0.015672868, step = 63000 (4.330 sec)
INFO:tensorflow:global_step/sec: 23.4789
INFO:tensorflow:loss = 0.031264067, step = 63100 (4.259 sec)
INFO:tensorflow:global_step/sec: 22.902
INFO:tensorflow:loss = 0.095179155, step = 6320

INFO:tensorflow:global_step/sec: 21.9153
INFO:tensorflow:loss = 0.08628969, step = 70400 (4.563 sec)
INFO:tensorflow:global_step/sec: 22.9719
INFO:tensorflow:loss = 0.082790576, step = 70500 (4.353 sec)
INFO:tensorflow:global_step/sec: 23.2786
INFO:tensorflow:loss = 0.0018721009, step = 70600 (4.296 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 70701...
INFO:tensorflow:Saving checkpoints for 70701 into data_out/checkpoint/cnn_model\model.ckpt.
Instructions for updating:
Use standard file APIs to delete files with this prefix.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 70701...
INFO:tensorflow:global_step/sec: 21.9343
INFO:tensorflow:loss = 0.0010360716, step = 70700 (4.559 sec)
INFO:tensorflow:global_step/sec: 23.0676
INFO:tensorflow:loss = 0.021069732, step = 70800 (4.335 sec)
INFO:tensorflow:global_step/sec: 21.9031
INFO:tensorflow:loss = 0.002062063, step = 70900 (4.566 sec)
INFO:tensorflow:global_step/sec: 23.1905
INFO:tensorfl

INFO:tensorflow:global_step/sec: 22.3262
INFO:tensorflow:loss = 0.0020609216, step = 77900 (4.479 sec)
INFO:tensorflow:global_step/sec: 22.5222
INFO:tensorflow:loss = 0.00013023007, step = 78000 (4.440 sec)
INFO:tensorflow:global_step/sec: 21.6919
INFO:tensorflow:loss = 0.003585413, step = 78100 (4.610 sec)
INFO:tensorflow:global_step/sec: 23.3752
INFO:tensorflow:loss = 0.060257103, step = 78200 (4.278 sec)
INFO:tensorflow:global_step/sec: 22.4591
INFO:tensorflow:loss = 0.13317955, step = 78300 (4.453 sec)
INFO:tensorflow:global_step/sec: 22.8646
INFO:tensorflow:loss = 0.0015449559, step = 78400 (4.374 sec)
INFO:tensorflow:global_step/sec: 22.5274
INFO:tensorflow:loss = 0.16264269, step = 78500 (4.439 sec)
INFO:tensorflow:global_step/sec: 22.6911
INFO:tensorflow:loss = 0.02773121, step = 78600 (4.407 sec)
INFO:tensorflow:global_step/sec: 22.5035
INFO:tensorflow:loss = 0.010527313, step = 78700 (4.444 sec)
INFO:tensorflow:global_step/sec: 22.4971
INFO:tensorflow:loss = 0.032635126, step

In [12]:
valid = est.evaluate(eval_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-12-11T17:15:17Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from data_out/checkpoint/cnn_model\model.ckpt-84380
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.44451s
INFO:tensorflow:Finished evaluation at 2020-12-11-17:15:18
INFO:tensorflow:Saving dict for global step 84380: acc = 0.79366666, global_step = 84380, loss = 1.0436769
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 84380: data_out/checkpoint/cnn_model\model.ckpt-84380


In [13]:
INPUT_TEST_DATA = 'nsmc_test_input.npy'
LABEL_TEST_DATA = 'nsmc_test_label.npy'

test_input_data = np.load(open(DATA_IN_PATH + INPUT_TEST_DATA, 'rb'))
test_label_data = np.load(open(DATA_IN_PATH + LABEL_TEST_DATA, 'rb'))

In [14]:
def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((test_input_data, test_label_data))
    dataset = dataset.batch(16)
    dataset = dataset.map(mapping_fn)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [15]:
predict = est.evaluate(test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-12-11T17:22:53Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from data_out/checkpoint/cnn_model\model.ckpt-84380
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.15554s
INFO:tensorflow:Finished evaluation at 2020-12-11-17:22:54
INFO:tensorflow:Saving dict for global step 84380: acc = 0.79318, global_step = 84380, loss = 1.0628787
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 84380: data_out/checkpoint/cnn_model\model.ckpt-84380
