In [1]:
"""
1. Data generator
    a. Load vocab
    b. Loads image features
    c. provide data for training
2. Builds image caption model.
3. Trains the model
"""

import os
import sys
import tensorflow as tf
from tensorflow import gfile
from tensorflow import logging
import pprint
import pickle
import numpy as np
import random
import math

input_description_file = './flickr30k/results_20130124.token'
input_img_feature_dir = './download_inception_v3_features/'
input_vocab_file = './flickr30k/vocab.txt'
output_dir = './flickr30k/local_run'

if not gfile.Exists(output_dir):
    gfile.MakeDirs(output_dir)

def get_default_params():
    return tf.contrib.training.HParams(
        num_vocab_word_threshold = 3,
        num_embedding_nodes = 32,
        num_timesteps = 10,
        num_lstm_nodes = [64, 64],
        num_lstm_layers = 2,
        num_fc_nodes = 32,
        batch_size = 80,
        cell_type = "lstm",
        clip_lstm_grads = 1.0,
        learning_rate = 0.001,
        keep_prob = 0.8,
        log_frequent = 100,
        save_frequent = 1000,
    )

hps = get_default_params()

W0819 17:00:06.412602 140572283238208 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
class Vocab(object):
    def __init__(self, filename, word_num_threshold):
        self._id_to_word = {}
        self._word_to_id = {}
        self._unk = -1
        self._eos = -1
        self._word_num_threshold= word_num_threshold
        self._read_dict(filename)
        
    def _read_dict(self, filename):
        with gfile.GFile(filename, 'r') as f:
            lines = f.readlines()
        for line in lines:
            word, occurrence = line.strip('\r\n').split('\t')
            occurrence = int(occurrence)
            if occurrence < self._word_num_threshold:
                continue
            idx = len(self._id_to_word)
            if word == '<UNK>':
                self._unk = idx
            elif word == '.':
                self._eos = idx
            if word in self._word_to_id or idx in self._id_to_word:
                raise Exception("duplicate words in vocab.")
            self._word_to_id[word] = idx
            self._id_to_word[idx] = word
            
    @property
    def unk(self):
        return self._unk
    
    @property
    def eos(self):
        return self._eos
    
    def word_to_id(self, word):
        return self._word_to_id.get(word, self.unk)
    
    def id_to_word(self, word_id):
        return self._id_to_word.get(word_id, '<UNK>')
    
    def size(self):
        return len(self._id_to_word)
    
    def encode(self, sentence):
        return [self.word_to_id(word) for word in sentence.split(' ')]
    
    def decode(self, sentence_id):
        words = [self.id_to_word(word_id) for word_id in sentence_id]
        return ' '.join(words)
    
vocab = Vocab(input_vocab_file, hps.num_vocab_word_threshold)
vocab_size = vocab.size()
print("vocab_size: %d" % vocab_size)

pprint.pprint(vocab.encode("I have a dream."))
pprint.pprint(vocab.decode([5, 10, 9, 20]))

vocab_size: 10875
[1494, 389, 1, 0]
'the of man white'


In [3]:
def parse_token_file(token_file):
    """Parses images description file."""
    img_name_to_tokens = {}
    with gfile.GFile(token_file, 'r') as f:
        lines = f.readlines()
        
    for line in lines:
        img_id, description = line.strip('\r\n').split('\t')
        img_name, _ = img_id.split('#')
        img_name_to_tokens.setdefault(img_name, [])
        img_name_to_tokens[img_name].append(description)
    return img_name_to_tokens

def convert_token_to_id(img_name_to_tokens, vocab):
    """Converts tokens of each description of imgs to id."""
    img_name_to_tokens_id = {}
    for img_name in img_name_to_tokens:
        img_name_to_tokens_id.setdefault(img_name, [])
        for description in img_name_to_tokens[img_name]:
            token_ids = vocab.encode(description)
            img_name_to_tokens_id[img_name].append(token_ids)
    return img_name_to_tokens_id

img_name_to_tokens = parse_token_file(input_description_file)
img_name_to_tokens_id = convert_token_to_id(img_name_to_tokens, vocab)

print("num of all imgs: %d" % len(img_name_to_tokens))
pprint.pprint(img_name_to_tokens['2778832101.jpg'])
print("num of all imgs: %d" % len(img_name_to_tokens_id))
pprint.pprint(img_name_to_tokens_id['2778832101.jpg'])

num of all imgs: 31783
['A man in jeans is reclining on a green metal bench along a busy sidewalk and '
 'crowded street .',
 'A white male with a blue sweater and gray pants laying on a sidewalk bench .',
 'A man in a blue shirt and gray pants is sleeping on a sidewalk bench .',
 'A person is sleeping on a bench , next to cars .',
 'A man sleeping on a bench in a city area .']
num of all imgs: 31783
[[3, 9, 4, 132, 8, 3532, 6, 1, 48, 337, 146, 139, 1, 244, 93, 7, 380, 36, 2],
 [3, 20, 179, 11, 1, 26, 284, 7, 120, 128, 297, 6, 1, 93, 146, 2],
 [3, 9, 4, 1, 26, 21, 7, 120, 128, 8, 340, 6, 1, 93, 146, 2],
 [3, 63, 8, 340, 6, 1, 146, 12, 70, 15, 518, 2],
 [3, 9, 340, 6, 1, 146, 4, 1, 112, 171, 2]]


In [4]:
class ImageCaptionData(object):
    """Provides data for image caption model."""
    def __init__(self, 
                 img_name_to_tokens_id, 
                 img_feature_dir, 
                 num_timesteps, 
                 vocab,
                 deterministic = False):
        self._vocab = vocab
        self._img_name_to_tokens_id = img_name_to_tokens_id
        self._num_timesteps = num_timesteps
        self._deterministic = deterministic
        self._indicator = 0
        
        self._img_feature_filenames = []
        self._img_feature_data = []
        
        self._all_img_feature_filepaths = []
        for filename in gfile.ListDirectory(img_feature_dir):
            self._all_img_feature_filepaths.append(
                os.path.join(img_feature_dir, filename))
        pprint.pprint(self._all_img_feature_filepaths)
        self._load_img_feature_pickle()
        
        if not self._deterministic:
            self._random_shuffle()
            
    def _load_img_feature_pickle(self):
        """Loads img feature data from pickle."""
        for filepath in self._all_img_feature_filepaths:
            print("loading %s" % filepath)
            with gfile.GFile(filepath, 'rb') as f:
                filenames, features = pickle.load(f)
                self._img_feature_filenames += filenames
                self._img_feature_data.append(features)
        # [#(1000, 1, 1, 2048), #(1000, 1, 1, 2048)] -> #(2000, 1, 1, 2048)
        self._img_feature_data = np.vstack(self._img_feature_data)
        origin_shape = self._img_feature_data.shape
        self._img_feature_data = np.reshape(
            self._img_feature_data,
            (origin_shape[0], origin_shape[3]))
        self._img_feature_filenames = np.asarray(self._img_feature_filenames)
        print(self._img_feature_data.shape)
        print(self._img_feature_filenames.shape)
        
    def size(self):
        return len(self._img_feature_filenames)
        
    def img_feature_size(self):
        return self._img_feature_data.shape[1]
        
    def _random_shuffle(self):
        """Shuffle data randomly."""
        p = np.random.permutation(self.size())
        self._img_feature_filenames = self._img_feature_filenames[p]
        self._img_feature_data = self._img_feature_data[p]
    
    def _img_desc(self, batch_filenames):
        """Gets descriptions for filenames in batch."""
        batch_sentence_ids = []
        batch_weights = []
        for filename in batch_filenames:
            token_ids_set = self._img_name_to_tokens_id[filename]
            chosen_token_ids = random.choice(token_ids_set)
            chosen_token_ids_length = len(chosen_token_ids)
            
            weight = [1 for i in range(chosen_token_ids_length)]
            if chosen_token_ids_length >= self._num_timesteps:
                chosen_token_ids = chosen_token_ids[0: self._num_timesteps]
                weight = weight[0: self._num_timesteps]
            else:
                remaining_length = self._num_timesteps - chosen_token_ids_length
                chosen_token_ids += [self._vocab.eos for i in range(remaining_length)]
                weight += [0 for i in range(remaining_length)]
            batch_sentence_ids.append(chosen_token_ids)
            batch_weights.append(weight)
        batch_sentence_ids = np.asarray(batch_sentence_ids)
        batch_weights = np.asarray(batch_weights)
        return batch_sentence_ids, batch_weights
            
    def next_batch(self, batch_size):
        """Returns next batch data."""
        end_indicator = self._indicator + batch_size
        if end_indicator > self.size():
            if not self._deterministic:
                self._random_shuffle()
            self._indicator = 0
            end_indicator = self._indicator + batch_size
        assert end_indicator < self.size()
        
        batch_filenames = self._img_feature_filenames[self._indicator: end_indicator]
        batch_img_features = self._img_feature_data[self._indicator: end_indicator]
        # sentence_ids: [100, 101, 102, 10, 3, 0, 0, 0] -> [1, 1, 1, 1, 1, 0, 0, 0]
        batch_sentence_ids, batch_weights = self._img_desc(batch_filenames)
        self._indicator = end_indicator
        return batch_img_features, batch_sentence_ids, batch_weights, batch_filenames

caption_data = ImageCaptionData(img_name_to_tokens_id, 
                                input_img_feature_dir,
                                hps.num_timesteps,
                                vocab)
img_feature_dim = caption_data.img_feature_size()
caption_data_size = caption_data.size()
print("img_feature_dim: %d" % img_feature_dim)
print("caption_data_size: %d" % caption_data_size)

batch_img_features, batch_sentences_ids, batch_weight, batch_img_names = caption_data.next_batch(5)
pprint.pprint(batch_img_features)
pprint.pprint(batch_sentences_ids)
pprint.pprint(batch_weight)
pprint.pprint(batch_img_names)

['./download_inception_v3_features/image_feature-30.pickle',
 './download_inception_v3_features/image_feature-28.pickle',
 './download_inception_v3_features/image_feature-19.pickle',
 './download_inception_v3_features/image_feature-0.pickle',
 './download_inception_v3_features/image_feature-16.pickle',
 './download_inception_v3_features/image_feature-17.pickle',
 './download_inception_v3_features/image_feature-31.pickle',
 './download_inception_v3_features/image_feature-5.pickle',
 './download_inception_v3_features/image_feature-22.pickle',
 './download_inception_v3_features/image_feature-27.pickle',
 './download_inception_v3_features/image_feature-2.pickle',
 './download_inception_v3_features/image_feature-1.pickle',
 './download_inception_v3_features/image_feature-24.pickle',
 './download_inception_v3_features/image_feature-7.pickle',
 './download_inception_v3_features/image_feature-15.pickle',
 './download_inception_v3_features/image_feature-26.pickle',
 './download_inception_v3_fea

In [5]:
def create_rnn_cell(hidden_dim, cell_type):
    """Return specific cell according to cell_type."""
    if cell_type == 'lstm':
        return tf.contrib.rnn.BasicLSTMCell(hidden_dim, state_is_tuple=True)
    elif cell_type == 'gru':
        return tf.contrib.rnn.GRUCell(hidden_dim)
    else:
        raise Exception("%s type has not been supported." % cell_type)

def dropout(cell, keep_prob):
    """Wrap cell with dropout."""
    return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob = keep_prob)

def get_train_model(hps, vocab_size, img_feature_dim):
    num_timesteps = hps.num_timesteps
    batch_size = hps.batch_size
    
    img_feature = tf.placeholder(tf.float32, (batch_size, img_feature_dim))
    
    sentence = tf.placeholder(tf.int32, (batch_size, num_timesteps))
    
    mask = tf.placeholder(tf.int32, (batch_size, num_timesteps))
    
    keep_prob = tf.placeholder(tf.float32, name="keep_prob")
    global_step = tf.Variable(tf.zeros([], tf.int32), name="global_step", trainable=False)
    
    # prediction process:
    # sentence: [a, b, c, d, e]
    # input: [img, a, b ,c ,d]
    # img_feature: [0.4, 0.3, 10, 2]
    # predict #1: img_feature -> embedding_img -> lstm -> (a)
    # predict #2: a -> embedding_word -> lstm -> (b)
    # predict #3: b2 -> embedding_word -> lstm -> (c)
    # ...
    
    # Sets up embedding layer.
    embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
    with tf.variable_scope('embeddings', initializer=embedding_initializer):
        embeddings = tf.get_variable(
            'embeddings',
            [vocab_size, hps.num_embedding_nodes],
            tf.float32)
        # embed_token_ids: [batch_size, num_timesstep - 1, num_embedding_nodes]
        embed_token_ids = tf.nn.embedding_lookup(
            embeddings,
            sentence[:, 0: num_timesteps - 1])
    
    img_feature_embed_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    
    with tf.variable_scope('img_feature_embed', initializer=img_feature_embed_init):
        # img_feature: [batch_size, img_feature_dim]
        # embed_img: [batch_size, num_embedding_nodes]
        embed_img = tf.layers.dense(img_feature, hps.num_embedding_nodes)
        
        # embed_img: [batch_size, 1, num_embedding_nodes]
        embed_img = tf.expand_dims(embed_img, 1)
        # embed_input: [batch_size, num_timesteps, num_embedding_nodes]
        embed_inputs = tf.concat([embed_img, embed_token_ids], axis=1)
        
    # Sets up rnn network
    scale = 1.0 / math.sqrt(hps.num_embedding_nodes + hps.num_lstm_nodes[-1]) / 4.0
    rnn_init = tf.random_uniform_initializer(-scale, scale)
    with tf.variable_scope('lstm_nn', initializer=rnn_init):
        cells = []
        for i in range(hps.num_lstm_layers):
            cell = create_rnn_cell(hps.num_lstm_nodes[i], hps.cell_type)
            cell = dropout(cell, keep_prob)
            cells.append(cell)
        cell = tf.contrib.rnn.MultiRNNCell(cells)
        
        init_state = cell.zero_state(hps.batch_size, tf.float32)
        # rnn_outputs: [batch_size, num_timestep, hps.num_lstm_node[-1]]
        rnn_outputs, _ = tf.nn.dynamic_rnn(cell, embed_inputs, initial_state=init_state)
    # Sets up fully-connected layer.
    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    with tf.variable_scope('fc', initializer=fc_init):
        rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hps.num_lstm_nodes[-1]])
        fc1 = tf.layers.dense(rnn_outputs_2d, hps.num_fc_nodes, name='fc1')
        fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
        fc1_relu = tf.nn.relu(fc1_dropout)
        logits = tf.layers.dense(fc1_relu, vocab_size, name="logits")
    # Calculates loss
    with tf.variable_scope('loss'):
        sentence_flatten = tf.reshape(sentence, [-1])
        mask_flatten = tf.reshape(mask, [-1])
        mask_sum = tf.reduce_sum(mask_flatten)
        
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=sentence_flatten)
        weighted_softmax_loss = tf.multiply(softmax_loss, tf.cast(mask_flatten, tf.float32))
        loss = tf.reduce_sum(weighted_softmax_loss) / tf.cast(mask_sum, tf.float32)
        prediction = tf.argmax(logits, 1, output_type=tf.int32)
        correct_prediction = tf.equal(prediction, sentence_flatten)
        weighted_correct_prediction = tf.multiply(tf.cast(correct_prediction, tf.float32), tf.cast(mask_flatten, tf.float32))
        accuracy = tf.reduce_sum(weighted_correct_prediction) / tf.cast(mask_sum, tf.float32)
        tf.summary.scalar('loss', loss)
        
    # Defines train op.
    with tf.variable_scope('train_op'):
        tvars = tf.trainable_variables()
        for var in tvars:
            print("variable names: %s" % var.name)
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), hps.clip_lstm_grads)
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
        
    return ((img_feature, sentence, mask, keep_prob), (loss, accuracy, train_op), global_step)

placeholders, matrics, global_step = get_train_model(hps, vocab_size, img_feature_dim)
img_feature, sentence, mask, keep_prob = placeholders
loss, accuracy, train_op = matrics

summary_op = tf.summary.merge_all()
init_op = tf.global_variables_initializer()

saver = tf.train.Saver(max_to_keep=10)

W0819 17:00:08.677831 140572283238208 deprecation.py:323] From /root/anaconda3/envs/deep_learning/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py:507: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
W0819 17:00:08.678607 140572283238208 deprecation.py:323] From <ipython-input-5-a32b20da60aa>:53: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
W0819 17:00:08.860558 140572283238208 deprecation.py:323] From <ipython-input-5-a32b20da60aa>:4: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that i

variable names: embeddings/embeddings:0
variable names: img_feature_embed/dense/kernel:0
variable names: img_feature_embed/dense/bias:0
variable names: lstm_nn/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0
variable names: lstm_nn/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0
variable names: lstm_nn/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0
variable names: lstm_nn/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0
variable names: fc/fc1/kernel:0
variable names: fc/fc1/bias:0
variable names: fc/logits/kernel:0
variable names: fc/logits/bias:0


W0819 17:00:09.790738 140572283238208 deprecation.py:323] From /root/anaconda3/envs/deep_learning/lib/python3.7/site-packages/tensorflow/python/ops/clip_ops.py:286: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [15]:
training_steps = 100000

with tf.Session() as sess:
    sess.run(init_op)
    writer = tf.summary.FileWriter(output_dir, sess.graph)
    for i in range(training_steps):
        (batch_img_features,
         batch_sentence_ids,
         batch_weights, _) = caption_data.next_batch(hps.batch_size)
        input_vals = (batch_img_features, batch_sentence_ids, batch_weights, hps.keep_prob)
        feed_dict = dict(zip(placeholders, input_vals))
        fetches = (global_step, loss, accuracy, train_op)
        should_log = (i + 1) % hps.log_frequent == 0
        should_save = (i + 1) % hps.save_frequent == 0
        if should_log:
            fetches += tuple([summary_op])
        
        outputs = sess.run(fetches, feed_dict = feed_dict)
        global_step_val, loss_val, accuracy_val = outputs[0:3]
        if should_log:
            summary_str = outputs[-1]
            writer.add_summary(summary_str, global_step_val)
            print("Step: %5d, loss: %3.3f, accu:%3.3f" % (global_step_val, loss_val, accuracy_val))
        
        if should_save:
            model_save_file = os.path.join(output_dir, "image_caption")
            print("Step: %5d, model saved" % global_step_val)
            saver.save(sess, model_save_file, global_step=global_step_val)

Step:   100, loss: 5.971, accu:0.142
Step:   200, loss: 5.251, accu:0.173
Step:   300, loss: 5.238, accu:0.196
Step:   400, loss: 5.154, accu:0.198
Step:   500, loss: 5.030, accu:0.229
Step:   600, loss: 4.717, accu:0.229
Step:   700, loss: 4.638, accu:0.270
Step:   800, loss: 4.547, accu:0.282
Step:   900, loss: 4.667, accu:0.276
Step:  1000, loss: 4.675, accu:0.246
Step:  1000, model saved
Step:  1100, loss: 4.558, accu:0.273
Step:  1200, loss: 4.037, accu:0.322
Step:  1300, loss: 4.272, accu:0.317
Step:  1400, loss: 4.565, accu:0.276
Step:  1500, loss: 4.163, accu:0.302
Step:  1600, loss: 4.180, accu:0.300
Step:  1700, loss: 4.168, accu:0.304
Step:  1800, loss: 4.291, accu:0.289
Step:  1900, loss: 3.837, accu:0.311
Step:  2000, loss: 4.166, accu:0.280
Step:  2000, model saved
Step:  2100, loss: 4.357, accu:0.256
Step:  2200, loss: 4.074, accu:0.268
Step:  2300, loss: 4.345, accu:0.283
Step:  2400, loss: 4.237, accu:0.287
Step:  2500, loss: 4.052, accu:0.305
Step:  2600, loss: 4.018,

W0819 17:41:02.263978 140572283238208 deprecation.py:323] From /root/anaconda3/envs/deep_learning/lib/python3.7/site-packages/tensorflow/python/training/saver.py:960: remove_checkpoint (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to delete files with this prefix.


Step: 11000, loss: 3.743, accu:0.325
Step: 11000, model saved
Step: 11100, loss: 3.751, accu:0.321
Step: 11200, loss: 3.499, accu:0.350
Step: 11300, loss: 3.675, accu:0.327
Step: 11400, loss: 3.437, accu:0.368
Step: 11500, loss: 3.807, accu:0.306
Step: 11600, loss: 3.678, accu:0.305
Step: 11700, loss: 3.703, accu:0.311
Step: 11800, loss: 3.703, accu:0.317
Step: 11900, loss: 3.486, accu:0.352
Step: 12000, loss: 3.626, accu:0.325
Step: 12000, model saved
Step: 12100, loss: 3.444, accu:0.357
Step: 12200, loss: 3.575, accu:0.334
Step: 12300, loss: 3.594, accu:0.346
Step: 12400, loss: 3.420, accu:0.339
Step: 12500, loss: 3.658, accu:0.346
Step: 12600, loss: 3.568, accu:0.361
Step: 12700, loss: 3.762, accu:0.338
Step: 12800, loss: 3.575, accu:0.334
Step: 12900, loss: 3.360, accu:0.354
Step: 13000, loss: 3.482, accu:0.355
Step: 13000, model saved
Step: 13100, loss: 3.585, accu:0.363
Step: 13200, loss: 3.511, accu:0.369
Step: 13300, loss: 3.494, accu:0.339
Step: 13400, loss: 3.694, accu:0.310


Step: 31900, loss: 3.206, accu:0.389
Step: 32000, loss: 3.347, accu:0.361
Step: 32000, model saved
Step: 32100, loss: 3.167, accu:0.389
Step: 32200, loss: 3.427, accu:0.347
Step: 32300, loss: 3.297, accu:0.363
Step: 32400, loss: 3.359, accu:0.380
Step: 32500, loss: 3.479, accu:0.326
Step: 32600, loss: 3.272, accu:0.359
Step: 32700, loss: 3.309, accu:0.373
Step: 32800, loss: 3.216, accu:0.377
Step: 32900, loss: 3.424, accu:0.350
Step: 33000, loss: 3.367, accu:0.361
Step: 33000, model saved
Step: 33100, loss: 3.464, accu:0.360
Step: 33200, loss: 3.300, accu:0.357
Step: 33300, loss: 3.637, accu:0.329
Step: 33400, loss: 3.394, accu:0.344
Step: 33500, loss: 3.303, accu:0.365
Step: 33600, loss: 3.275, accu:0.354
Step: 33700, loss: 3.563, accu:0.304
Step: 33800, loss: 3.459, accu:0.364
Step: 33900, loss: 3.423, accu:0.354
Step: 34000, loss: 3.232, accu:0.381
Step: 34000, model saved
Step: 34100, loss: 3.384, accu:0.360
Step: 34200, loss: 3.416, accu:0.344
Step: 34300, loss: 3.293, accu:0.347


Step: 52700, loss: 3.277, accu:0.369
Step: 52800, loss: 3.428, accu:0.361
Step: 52900, loss: 3.290, accu:0.363
Step: 53000, loss: 3.253, accu:0.361
Step: 53000, model saved
Step: 53100, loss: 3.448, accu:0.344
Step: 53200, loss: 3.369, accu:0.356
Step: 53300, loss: 3.257, accu:0.352
Step: 53400, loss: 3.236, accu:0.381
Step: 53500, loss: 3.178, accu:0.393
Step: 53600, loss: 3.424, accu:0.333
Step: 53700, loss: 3.405, accu:0.356
Step: 53800, loss: 3.225, accu:0.354
Step: 53900, loss: 3.275, accu:0.363
Step: 54000, loss: 3.511, accu:0.344
Step: 54000, model saved
Step: 54100, loss: 3.220, accu:0.364
Step: 54200, loss: 3.325, accu:0.366
Step: 54300, loss: 3.289, accu:0.373
Step: 54400, loss: 3.516, accu:0.359
Step: 54500, loss: 3.325, accu:0.364
Step: 54600, loss: 3.289, accu:0.365
Step: 54700, loss: 3.429, accu:0.333
Step: 54800, loss: 3.391, accu:0.356
Step: 54900, loss: 3.405, accu:0.350
Step: 55000, loss: 3.574, accu:0.347
Step: 55000, model saved
Step: 55100, loss: 3.220, accu:0.363


Step: 73500, loss: 3.155, accu:0.349
Step: 73600, loss: 3.031, accu:0.393
Step: 73700, loss: 3.217, accu:0.368
Step: 73800, loss: 3.191, accu:0.371
Step: 73900, loss: 3.160, accu:0.368
Step: 74000, loss: 3.377, accu:0.338
Step: 74000, model saved
Step: 74100, loss: 3.155, accu:0.368
Step: 74200, loss: 3.012, accu:0.406
Step: 74300, loss: 3.164, accu:0.379
Step: 74400, loss: 3.151, accu:0.387
Step: 74500, loss: 3.131, accu:0.374
Step: 74600, loss: 3.231, accu:0.370
Step: 74700, loss: 3.403, accu:0.343
Step: 74800, loss: 3.032, accu:0.365
Step: 74900, loss: 3.250, accu:0.380
Step: 75000, loss: 3.306, accu:0.336
Step: 75000, model saved
Step: 75100, loss: 3.222, accu:0.390
Step: 75200, loss: 3.357, accu:0.354
Step: 75300, loss: 3.338, accu:0.360
Step: 75400, loss: 3.176, accu:0.354
Step: 75500, loss: 3.439, accu:0.329
Step: 75600, loss: 3.111, accu:0.357
Step: 75700, loss: 3.084, accu:0.380
Step: 75800, loss: 3.261, accu:0.363
Step: 75900, loss: 3.220, accu:0.384
Step: 76000, loss: 3.274,

Step: 94300, loss: 3.398, accu:0.336
Step: 94400, loss: 3.326, accu:0.352
Step: 94500, loss: 3.360, accu:0.334
Step: 94600, loss: 3.105, accu:0.385
Step: 94700, loss: 3.115, accu:0.382
Step: 94800, loss: 3.311, accu:0.364
Step: 94900, loss: 3.176, accu:0.370
Step: 95000, loss: 3.220, accu:0.377
Step: 95000, model saved
Step: 95100, loss: 3.339, accu:0.346
Step: 95200, loss: 3.257, accu:0.387
Step: 95300, loss: 3.145, accu:0.400
Step: 95400, loss: 3.109, accu:0.370
Step: 95500, loss: 3.278, accu:0.346
Step: 95600, loss: 3.336, accu:0.355
Step: 95700, loss: 3.130, accu:0.356
Step: 95800, loss: 3.166, accu:0.391
Step: 95900, loss: 3.369, accu:0.343
Step: 96000, loss: 3.048, accu:0.381
Step: 96000, model saved
Step: 96100, loss: 3.146, accu:0.371
Step: 96200, loss: 3.196, accu:0.382
Step: 96300, loss: 3.313, accu:0.347
Step: 96400, loss: 3.275, accu:0.387
Step: 96500, loss: 3.091, accu:0.359
Step: 96600, loss: 3.151, accu:0.365
Step: 96700, loss: 3.331, accu:0.359
Step: 96800, loss: 3.265,