In [2]:
import tensorflow as tf
import tensorflow.contrib.seq2seq as seq2seq
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple, GRUCell, MultiRNNCell
from tensorflow.contrib.rnn.python.ops.rnn_cell import _linear

from lib import data_utils, model_utils
from configs import model_config

import tensorflow as tf
import time
import math
import os
import sys

import numpy

In [3]:
#def __init__(self, config, use_lstm=True, forward_only=False,  attention=False ,sess=None):

config = model_config.Config()
use_lstm=True 
forward_only=False
attention=True

#config에서 모델의 설정값을 가져옵니다.
vocab_size = config.vocab_size
enc_hidden_size = config.enc_hidden_size
enc_num_layers = config.enc_num_layers
dec_hidden_size = config.dec_hidden_size
dec_num_layers = config.dec_num_layers
batch_size = config.batch_size
attention = attention


#학습 과정에서 가변 learning_rate 를 적용하기 위해서 tf.Variable 타입으로 선언합니다
learning_rate = tf.Variable(float(config.learning_rate), trainable=False)
#아래 op는 learning step 에서 두번이상 loss 가 증가헀을경우 learning_rate_decay_factor 를 곱해서
#기본값인 learning_rate_decay_factor = 0.99의 경우 본 op가 한번 실행될때마다 learning_rate 가 1% 감소시키는 효과를 가저옵니다
learning_rate_decay_op = learning_rate.assign(
    learning_rate * config.learning_rate_decay_factor)
#global_step 역시 학습 과정에서 계속 증가하여야 하는 값이므로 tf.Variable 형태로 선언하고 0으로 초기화합니다
global_step = tf.Variable(0, trainable=False)

max_gradient_norm = config.max_gradient_norm
buckets = config.buckets

#RNN의 내부 구조를 LSTM과 GRU중 선택으로 분기합니다.
if use_lstm:
    single_cell1 = LSTMCell(enc_hidden_size)
    single_cell2 = LSTMCell(dec_hidden_size)
else:
    single_cell1 = GRUCell(enc_hidden_size)
    single_cell2 = GRUCell(dec_hidden_size)

#multi layers 구조의 RNN 처리를 위한 부분 입니다. tensor 1.0 ~ 1.1 구간에서 해당 구현에 대한 이슈가 존재하여 본 예제에서는
#single layers의 구현을 가정하고 진행하겠습니다.
#seq2seq 모델의 경우 2 layers 부터 학습이 매우 어려워지는 경향을 보이며 4 layers 이상은 학습이 거의 되지 않는다 라는 경험적인 의견이 많습니다.
enc_cell = MultiRNNCell([single_cell1 for _ in range(enc_num_layers)])
dec_cell = MultiRNNCell([single_cell2 for _ in range(dec_num_layers)])

#인코딩 셀과 디코딩 셀을 self로 클래스 변수로 잡아 클래스내 다른 함수에서 이를 사용할것 입니다.
encoder_cell = enc_cell
decoder_cell = dec_cell

#실제 모델의 그래프를 생성합니다.
#_make_graph(forward_only)

#tensor sever를 정의하여 모델의 학습 과정에서 파라메터과 모델구조를 저장할 수 있도록 합니다.
#saver = tf.train.Saver(tf.global_variables())

In [4]:
#def _init_data(self):
# encoder_inputs의 placeholder로 들어오는 2차원 matrix 예시 입니다.
#[
# [36, 6, 36, 6, 14, 5, 13, 35, 739, 41, 24, 103, EOS_ID],
# [3, 5, 13, 956, 3, 227, EOS_ID, GO_ID, 142, 331, 4, 17, 8, 112, 6, 155, 3, EOS_ID] , ...
#]

encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name="encoder_inputs")
encoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name="encoder_inputs_length")

# decoder_inputs의 placeholder로 들어오는 2차원 matrix 예시 입니다.
#[
# [GO_ID, 5, 15, 33, 12, 2021, 3,2274,EOS_ID],
# [GO_ID, 142, 331, 4, 17, 8, 112, 6, 155, 3, EOS_ID] , ...
#]

decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name="decoder_inputs")
decoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name="decoder_inputs_length")

#np.array 타입의 슬라이싱을 이용하여 decoder_inputs의 GO 심볼이 위치하는 부분을 제거합니다
decoder_targets = decoder_inputs[1:, :]

#input 과 output 데이터를 버킷에 사이즈에 맞춰서 잘라냅니다
#기본값은 	model.config = buckets = [(8, 15)]로 이 경우에는 input의 경우 순서열의 최대 길이가 8 output의 경우 최대길이가 15로 잘리게 됩니다
temp_encoder_inputs = encoder_inputs[:buckets[-1][0], :]
encoder_inputs2 = temp_encoder_inputs
temp_decoder_inputs = decoder_inputs[:buckets[-1][1], :]
decoder_inputs2 = temp_decoder_inputs

# cross entropy 계산을 위해서 decoder_input 패딩된 데이터와 실제 데이터를 골라내기 위한 mask 입니다.
# getbatch 에서 받은 데이터를 주입할 placeholder
target_weights = tf.placeholder(shape=(None, None), dtype=tf.float32, name="target_weights")


## data_init

seq2seq 모델에서 입력값과 출력값(예측값)을 담당하는 encoder와 decoder의 데이터 파이프라인을 정의합니다.
placeholder 라는 구조는 lazy 하게 모델을 구성하고 차후에 데이터가 입력될 구조를 정의 합니다.

![](http://i.imgur.com/Cc22Moi.png)




In [5]:
with tf.variable_scope("embedding") as scope:
    #vocab_size * hidden_size 만큼의 크기를 가진 embedding_matrix를 생성 합니다.    
    enc_embedding_matrix = tf.get_variable(
        name="enc_embedding_matrix",
        shape=[vocab_size, enc_hidden_size],
        initializer=tf.contrib.layers.xavier_initializer(),
        dtype=tf.float32)

    dec_embedding_matrix = tf.get_variable(
        name="dec_embedding_matrix",
        shape=[vocab_size, dec_hidden_size],
        initializer=tf.contrib.layers.xavier_initializer(),
        dtype=tf.float32)

    # 연속된 단어의 index값으로 표현된 입력값을 각 인덱스의 one-hot으로 표현하고 이어서
    # embedding_vector화 하는 과정을 embedding_lookup을 통해서 쉽게 처리할 수 있습니다.

    encoder_inputs_embedded = tf.nn.embedding_lookup(
        enc_embedding_matrix, encoder_inputs2)

    decoder_inputs_embedded = tf.nn.embedding_lookup(
        dec_embedding_matrix, decoder_inputs2)


## init_embeddings
one-hot으로 표현된 단어 index를 hidden size * hidden_size 사이즈 만큼의 embedding vector로 생성 합니다.

![](http://i.imgur.com/ZOj61li.png)

In [6]:
with tf.variable_scope("encoder") as scope:
    # encoder_cell을 가지고 dynamic_rnn Layer를 생성하고
    # embedding_lookup 통해서 만든 embedding_matrix를 연결합니다.
    (encoder_outputs, encoder_state) = tf.nn.dynamic_rnn(cell=encoder_cell,
                                                                inputs=encoder_inputs_embedded,
                                                                sequence_length=encoder_inputs_length,
                                                                time_major=True, dtype=tf.float32)

## init_encoder

encoder_cell을 가지고 dynamic_rnn을 통해서 Encoder 구조를 정의 합니다.

![](http://i.imgur.com/AhmowtE.png)

In [7]:
#def _init_decoder(self, forward_only):
with tf.variable_scope("decoder") as scope:
    def output_fn(outputs):
        return tf.contrib.layers.linear(outputs, vocab_size, scope=scope)

    # attention_states: size [batch_size, max_time, num_units]
    attention_states = tf.transpose(encoder_outputs, [1, 0, 2])

    #encoder_outputs 을 가지고 attention network에 필요한 값을 생성합니다.
    (attention_keys, attention_values, attention_score_fn, attention_construct_fn) = (
        seq2seq.prepare_attention(
            attention_states=attention_states,
            attention_option="bahdanau",
            num_units=dec_hidden_size))

    #prediction 전용 모델인경우
    if forward_only:
        #decoder 함수로 inference를 사용하고
        #아래의 train 과정보다 더 많은 인자를 입력 받는데
        #전처리 과정에서 진행한 embedding 과정의 역순을 일부 자동으로 처리하기 위함입니다.
        decoder_fn = seq2seq.attention_decoder_fn_inference(
            output_fn=output_fn,
            encoder_state=encoder_state,
            attention_keys=attention_keys,
            attention_values=attention_values,
            attention_score_fn=attention_score_fn,
            attention_construct_fn=attention_construct_fn,
            embeddings=dec_embedding_matrix,
            start_of_sequence_id=model_config.GO_ID,
            end_of_sequence_id=model_config.EOS_ID,
            maximum_length=buckets[-1][1],
            num_decoder_symbols=vocab_size,
        )
        # rnn_decoder Layer를 생성합니다
        # encoder를 지나서 계산된 c는 decoder_fn의 인자를 통해 decoder에 연결되고
        # decoder_outputs에서 logit 형태로 예측값을 출력합니다.
        (decoder_outputs, decoder_state, decoder_context_state) = (
            seq2seq.dynamic_rnn_decoder(
                cell=decoder_cell,
                decoder_fn=decoder_fn,
                time_major=True,
                scope=scope,
            ))
    #train 모델인경우
    else:
        #decoder 함수로 train을 사용하고
        #이전 레이어의 출력값인 encoder_state와 함께 attention 구현에 필요한 값을 입력합니다.
        decoder_fn = seq2seq.attention_decoder_fn_train(
            encoder_state=encoder_state,
            attention_keys=attention_keys,
            attention_values=attention_values,
            attention_score_fn=attention_score_fn,
            attention_construct_fn=attention_construct_fn,
            name='attention_decoder'
        )
        # rnn_decoder Layer를 생성합니다
        # encoder를 지나서 계산된 encoder_state는 decoder_fn을 인자를 통해 decoder에 연결되고
        # loos값을 계산하기 위해서 decoder_outputs을 출력합니다.
        (decoder_outputs, decoder_state, decoder_context_state) = (
            seq2seq.dynamic_rnn_decoder(
                cell=decoder_cell,
                decoder_fn=decoder_fn,
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_inputs_length,
                time_major=True,
                scope=scope,
            ))

    if forward_only:
        decoder_logits = decoder_outputs
    else:
        #
        decoder_logits = output_fn(decoder_outputs)

    #vocab 사이즈 만큼의 각각 단어의 확률값으로 표현된 리스트의 순서열을 얻고
    #argmax 연산을 통해서 최대값을 찾게 됩니다.
    decoder_prediction = tf.argmax(decoder_logits, axis=-1, name='decoder_prediction')

    #loss 계산을 위한 logit과 targets을 출력
    logits = tf.transpose(decoder_logits, [1, 0, 2])
    targets = tf.transpose(decoder_targets, [1, 0])

    if not forward_only:
        #train 모델인 경우 loss 값을 정의
        seq_loss = seq2seq.sequence_loss(logits=logits, targets=targets,weights=target_weights)


## init_decoder
decoder_cell을 가지고 dynamic_rnn Layer를 생성하고 encoder 에서 나온 encoder_state를 연결합니다.  
attention의 경우 encoder 단계에서의 각 단계의 state와 output에서 중간 단계의 값을 다시 decoder 구조에 연결합니다.  
![](http://i.imgur.com/QRdjS8z.png)  

In [8]:
#params 초기화
params = tf.trainable_variables()

gradient_norms = []
updates_list = []

#Optimizer를 통하여 학습을 진행
opt = tf.train.AdamOptimizer(learning_rate)
#위에서 진행한 모든 네트워크 구조를 지나온 결과값인 loss 에서 gradients 계산하고
gradients = tf.gradients(seq_loss, params)

#clipped_gradients를 구하고
clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
#계산이나 결과에서 사용되지는 않지만 텐서보드등 에서 표현하기 위해서
gradient_norms.append(norm)
#opt.apply_gradients 를 통하여 실제 params의 업데이트를 하고
updates_list.append(opt.apply_gradients(zip(clipped_gradients, params), global_step=global_step))


## Attention Decoder

최종모델 그래프에서 optimizer(adam)에 모든 그래프 간선이 연결되는것을 확인 할 수 있습니다.

![](http://imgur.com/XKLBATg.png)

In [9]:
def step(in_session, in_encoder_inputs, in_encoder_inputs_length, in_decoder_inputs, in_decoder_inputs_length, in_target_weights):
    #placeholder 로 정의한 데이터 파이프 라인에
    #get_batch 등으로 모델 외부에서 주입할 데이터를 정의합니다.
    input_feed = {
        encoder_inputs: in_encoder_inputs,
        encoder_inputs_length: in_encoder_inputs_length,
        decoder_inputs: in_decoder_inputs,
        decoder_inputs_length: in_decoder_inputs_length,
        target_weights: in_target_weights
    }
    #학습 모델이 아니라면 출력에 필요한 OP 만 묶어서 계산하고
    if forward_only:
        output_feed = [decoder_logits, decoder_prediction, encoder_state, decoder_state]
        logits, prediction, encoder_embedding, decoder_embedding = in_session.run(output_feed, input_feed)
        return None, None, logits, prediction, encoder_embedding, decoder_embedding
    #학슴 모델이라면 updates 계산을 통해 학습을 진행 합니다.
    else:
        output_feed = [updates_list, gradient_norms, seq_loss, encoder_state, decoder_state]
        updates, gradient, loss, encoder_embedding, decoder_embedding = in_session.run(output_feed, input_feed)
        return gradient, loss, None, None, encoder_embedding, decoder_embedding



In [None]:
import tensorflow as tf
import time
import math
import os
import sys

from lib import data_utils, model_utils
from configs import model_config


#새로운 tensorflow 세션을 생성합니다 이후 sess 라는 이름으로 호출합니다
with tf.Session() as sess:

    print("Created model with fresh parameters.")
    sess.run(tf.global_variables_initializer())

    #vacab파일의 경로를 정의합니다. 여러 모델의 테스트를 위해서 사전 크기를 파일명뒤에 명시적으로 표시합니다
    #예를들어 8000 사이즈의 vovab 파일은 vocab8000.in 입니다.
    vocab_path = os.path.join(config.data_dir, 'vocab%d.in' % config.vocab_size)

    #학습 데이터의 경로를 정의합니다. 여러 모델의 테스트를 위해서 사전 크기를 파일명뒤에 명시적으로 표시합니다
    #예를들어 8000 사이즈의 사전으로 전처리된 학습 데이터는 chat_ids8000.in 입니다.
    train_data_path = os.path.join(config.data_dir, 'chat_ids%d.in' % config.vocab_size)

    #사전 파일을 로드합니다
    #vovab.get(word) 형태로 해당 워드에 맞는 사전 index 값을 반환합니다
    #vocab_rev(index) 향태로 해당 index에 맞는 워드를 반환 합니다
    vocab, vocab_rev = data_utils.load_vocabulary(vocab_path)

    #학습 데이터를 로드합니다.
    #[[[]]] 학습데이터는 3차원 리스트의 형태로 이루어지며
    # [
    #   [
    #       [2,34,523,5,3,2,5,6,32],
    #       [2,1,52,1,2,2,5,6,3],
    #	],
    #	[
    #       [2,34,523,5,3,2,5,6,32],
    #       [2,1,52,1,2,2,5,6,3]
    #	]
    # [
    # 커스터마이징이 필요한 경우가 아니면 기존의 구현을 활용하는것을 추천합니다.

    train_set = data_utils.read_data_chat(train_data_path, config)
    # print(train_set[0])


    #초기 변수를 세팅합니다.
    step_time, loss_val = 0.0, 0.0
    current_step = 0
    perplexity = 10000.0
    previous_losses = []

    
    while current_step < config.max_epoch and not forward_only:
        #스텝당 계산 시간을 검출하기 위해서 현재 시간을 기록하고
        start_time = time.time()
        #다중 버킷 모델 NMT 관련 모델에 관련된 옵션으로 여기서는 0으로 처리 합니다
        bucket_id = 0

        #데이터 유틸리티 함수를 이용하여 이번 스탭에서 모델 계산에 필요한 모든 배치 데이터를 가져 옵니다.
        step_encoder_inputs, step_encoder_inputs_length, step_decoder_inputs, step_decoder_inputs_length, step_target_weights = (data_utils.get_batch(train_set[bucket_id], config))

        #seq2seq 모델에 모든 데이터와 loss 값을 기반으로 경사도를 계산하고
        #옵티마이저가 loss를 줄이는 방향으로 step마다 모델의 파라메터를 업데이트 합니다.
        _, step_loss, _, _, enc_embedding, dec_embedding = step(sess, step_encoder_inputs, step_encoder_inputs_length, step_decoder_inputs, step_decoder_inputs_length, step_target_weights)

        #모델의 학습 상황을 확인하기 위해서 스탭당 시간을 계산하고
        step_time += (time.time() - start_time) / 100
        #loss의 변화를 합산 합니다.
        loss_val += step_loss / 100
        current_step += 1

        #100 step 마다 아래의 처리를 합니다
        #if current_step % 2 == 0:
        if current_step % 2 == 0:
            # loss *= config.max_state_length 		# Temporary purpose only
            # loss 를 가지고 perplexity 값을 계산합니다.
            perplexity = math.exp(loss_val) if loss_val < 300 else float('inf')
            print("global step %d learning rate %.4f step-time %.2f perplexity %.2f loss %.2f" %
                        (global_step.eval(), learning_rate.eval(), step_time, perplexity, loss_val))
            #스탭에서 두번 이상 연속으로 loss 값이 줄어들지 못한경우
            if len(previous_losses) > 2 and loss_val > max(previous_losses[-2:]):
                #learning_rate를 감소시켜 모델의 학습을 용이하게 합니다
                sess.run(learning_rate_decay_op)
            #100 step 당 로스를 시계열로 기록해 둡니다.
            previous_losses.append(loss_val)

            #100 step 마다 모델을 저장합니다.
            #checkoint_path = os.path.join(config.model_dir, "model.ckpt")
            #model.saver.save(sess, checkoint_path, global_step=model.global_step)
            step_time, loss_val = 0.0, 0.0
            sys.stdout.flush()


Created model with fresh parameters.
[[]]
3 24 119 27 974 9 3 3 17 4016 7885 47 420 88 3853 3 953 3 61 42 10 3 4 196 4
 70 6 8 152 24 5 79 324 45 3 6 56 16 5 13 123 45 7 4

  reading data line 1000
  reading data line 2000
  reading data line 3000
  reading data line 4000
  reading data line 5000
  reading data line 6000
  reading data line 7000
  reading data line 8000
  reading data line 9000
  reading data line 10000
  reading data line 11000
  reading data line 12000
  reading data line 13000
  reading data line 14000
  reading data line 15000
  reading data line 16000
  reading data line 17000
  reading data line 18000
  reading data line 19000
  reading data line 20000
  reading data line 21000
  reading data line 22000
  reading data line 23000
  reading data line 24000
  reading data line 25000
  reading data line 26000
  reading data line 27000
  reading data line 28000
  reading data line 29000
  reading data line 30000
  reading data line 31000
  reading data line 32000
  re

global step 60 learning rate 0.0009 step-time 0.00 perplexity 1.12 loss 0.11
global step 62 learning rate 0.0009 step-time 0.00 perplexity 1.11 loss 0.10
global step 64 learning rate 0.0009 step-time 0.00 perplexity 1.12 loss 0.11
global step 66 learning rate 0.0009 step-time 0.00 perplexity 1.12 loss 0.11
global step 68 learning rate 0.0009 step-time 0.00 perplexity 1.12 loss 0.12
global step 70 learning rate 0.0009 step-time 0.00 perplexity 1.12 loss 0.11
global step 72 learning rate 0.0009 step-time 0.00 perplexity 1.13 loss 0.12
global step 74 learning rate 0.0009 step-time 0.00 perplexity 1.10 loss 0.10
global step 76 learning rate 0.0009 step-time 0.00 perplexity 1.11 loss 0.10
global step 78 learning rate 0.0009 step-time 0.00 perplexity 1.13 loss 0.12
global step 80 learning rate 0.0009 step-time 0.00 perplexity 1.11 loss 0.11
global step 82 learning rate 0.0009 step-time 0.00 perplexity 1.12 loss 0.11
global step 84 learning rate 0.0009 step-time 0.00 perplexity 1.11 loss 0.10

global step 272 learning rate 0.0007 step-time 0.00 perplexity 1.10 loss 0.10
global step 274 learning rate 0.0007 step-time 0.00 perplexity 1.10 loss 0.10
global step 276 learning rate 0.0007 step-time 0.00 perplexity 1.10 loss 0.10
global step 278 learning rate 0.0007 step-time 0.00 perplexity 1.10 loss 0.10
global step 280 learning rate 0.0007 step-time 0.00 perplexity 1.09 loss 0.09
global step 282 learning rate 0.0007 step-time 0.00 perplexity 1.11 loss 0.11
global step 284 learning rate 0.0006 step-time 0.00 perplexity 1.11 loss 0.11
global step 286 learning rate 0.0006 step-time 0.00 perplexity 1.10 loss 0.10
global step 288 learning rate 0.0006 step-time 0.00 perplexity 1.12 loss 0.11
global step 290 learning rate 0.0006 step-time 0.00 perplexity 1.12 loss 0.11
global step 292 learning rate 0.0006 step-time 0.00 perplexity 1.11 loss 0.11
global step 294 learning rate 0.0006 step-time 0.00 perplexity 1.11 loss 0.10
global step 296 learning rate 0.0006 step-time 0.00 perplexity 1

global step 484 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.10
global step 486 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.10
global step 488 learning rate 0.0005 step-time 0.00 perplexity 1.12 loss 0.11
global step 490 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.09
global step 492 learning rate 0.0005 step-time 0.00 perplexity 1.12 loss 0.11
global step 494 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.10
global step 496 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.09
global step 498 learning rate 0.0005 step-time 0.00 perplexity 1.11 loss 0.10
global step 500 learning rate 0.0005 step-time 0.00 perplexity 1.11 loss 0.10
global step 502 learning rate 0.0005 step-time 0.00 perplexity 1.12 loss 0.11
global step 504 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.10
global step 506 learning rate 0.0005 step-time 0.00 perplexity 1.10 loss 0.10
global step 508 learning rate 0.0005 step-time 0.00 perplexity 1

global step 696 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.10
global step 698 learning rate 0.0003 step-time 0.00 perplexity 1.11 loss 0.11
global step 700 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.09
global step 702 learning rate 0.0003 step-time 0.00 perplexity 1.09 loss 0.09
global step 704 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.09
global step 706 learning rate 0.0003 step-time 0.00 perplexity 1.11 loss 0.11
global step 708 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.09
global step 710 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.10
global step 712 learning rate 0.0003 step-time 0.00 perplexity 1.11 loss 0.10
global step 714 learning rate 0.0003 step-time 0.00 perplexity 1.12 loss 0.12
global step 716 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.10
global step 718 learning rate 0.0003 step-time 0.00 perplexity 1.10 loss 0.10
global step 720 learning rate 0.0003 step-time 0.00 perplexity 1

global step 908 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.10
global step 910 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 912 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 914 learning rate 0.0002 step-time 0.00 perplexity 1.09 loss 0.08
global step 916 learning rate 0.0002 step-time 0.00 perplexity 1.12 loss 0.11
global step 918 learning rate 0.0002 step-time 0.00 perplexity 1.09 loss 0.08
global step 920 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.09
global step 922 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.10
global step 924 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 926 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.11
global step 928 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.10
global step 930 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.10
global step 932 learning rate 0.0002 step-time 0.00 perplexity 1

global step 1118 learning rate 0.0002 step-time 0.00 perplexity 1.09 loss 0.09
global step 1120 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 1122 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.10
global step 1124 learning rate 0.0002 step-time 0.00 perplexity 1.08 loss 0.08
global step 1126 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.09
global step 1128 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.11
global step 1130 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 1132 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 1134 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.11
global step 1136 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.09
global step 1138 learning rate 0.0002 step-time 0.00 perplexity 1.10 loss 0.09
global step 1140 learning rate 0.0002 step-time 0.00 perplexity 1.11 loss 0.10
global step 1142 learning rate 0.0002 step-time 0.00

global step 1326 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.10
global step 1328 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.09
global step 1330 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.10
global step 1332 learning rate 0.0001 step-time 0.00 perplexity 1.12 loss 0.11
global step 1334 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.09
global step 1336 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.09
global step 1338 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.09
global step 1340 learning rate 0.0001 step-time 0.00 perplexity 1.13 loss 0.12
global step 1342 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.08
global step 1344 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.10
global step 1346 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.11
global step 1348 learning rate 0.0001 step-time 0.00 perplexity 1.12 loss 0.11
global step 1350 learning rate 0.0001 step-time 0.00

global step 1534 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.09
global step 1536 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.09
global step 1538 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.09
global step 1540 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.09
global step 1542 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.11
global step 1544 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.11
global step 1546 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.10
global step 1548 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.10
global step 1550 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.10
global step 1552 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.10
global step 1554 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.10
global step 1556 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.09
global step 1558 learning rate 0.0001 step-time 0.00

global step 1742 learning rate 0.0001 step-time 0.00 perplexity 1.12 loss 0.11
global step 1744 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.10
global step 1746 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.11
global step 1748 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.10
global step 1750 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.09
global step 1752 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.11
global step 1754 learning rate 0.0001 step-time 0.00 perplexity 1.12 loss 0.11
global step 1756 learning rate 0.0001 step-time 0.00 perplexity 1.09 loss 0.09
global step 1758 learning rate 0.0001 step-time 0.00 perplexity 1.12 loss 0.11
global step 1760 learning rate 0.0001 step-time 0.00 perplexity 1.12 loss 0.11
global step 1762 learning rate 0.0001 step-time 0.00 perplexity 1.10 loss 0.09
global step 1764 learning rate 0.0001 step-time 0.00 perplexity 1.11 loss 0.10
global step 1766 learning rate 0.0001 step-time 0.00

global step 1950 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 1952 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 1954 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 1956 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 1958 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 1960 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 1962 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 1964 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.13
global step 1966 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 1968 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 1970 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 1972 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 1974 learning rate 0.0000 step-time 0.00

global step 2158 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2160 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 2162 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 2164 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2166 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2168 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2170 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 2172 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 2174 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2176 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2178 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 2180 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2182 learning rate 0.0000 step-time 0.00

global step 2366 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2368 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2370 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2372 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2374 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2376 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2378 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2380 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2382 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 2384 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2386 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2388 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 2390 learning rate 0.0000 step-time 0.00

global step 2574 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2576 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2578 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 2580 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2582 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2584 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2586 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2588 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2590 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 2592 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2594 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2596 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2598 learning rate 0.0000 step-time 0.00

global step 2782 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 2784 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 2786 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2788 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2790 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2792 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 2794 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2796 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2798 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2800 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2802 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 2804 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2806 learning rate 0.0000 step-time 0.00

global step 2990 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 2992 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2994 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 2996 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 2998 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 3000 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 3002 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3004 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3006 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3008 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 3010 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3012 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3014 learning rate 0.0000 step-time 0.00

global step 3198 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3200 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3202 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 3204 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3206 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3208 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3210 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3212 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 3214 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3216 learning rate 0.0000 step-time 0.00 perplexity 1.07 loss 0.07
global step 3218 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3220 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3222 learning rate 0.0000 step-time 0.00

global step 3406 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 3408 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3410 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3412 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 3414 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3416 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3418 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3420 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3422 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3424 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 3426 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3428 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3430 learning rate 0.0000 step-time 0.00

global step 3614 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3616 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3618 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3620 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3622 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3624 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3626 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3628 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3630 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 3632 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3634 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3636 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3638 learning rate 0.0000 step-time 0.00

global step 3822 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3824 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 3826 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 3828 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 3830 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3832 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3834 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 3836 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 3838 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3840 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 3842 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 3844 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 3846 learning rate 0.0000 step-time 0.00

global step 4030 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 4032 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4034 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 4036 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 4038 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4040 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4042 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 4044 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 4046 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 4048 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4050 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4052 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4054 learning rate 0.0000 step-time 0.00

global step 4238 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4240 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 4242 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4244 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4246 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4248 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4250 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4252 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 4254 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 4256 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 4258 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4260 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 4262 learning rate 0.0000 step-time 0.00

global step 4446 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 4448 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4450 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4452 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4454 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4456 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4458 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 4460 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4462 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4464 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4466 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4468 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4470 learning rate 0.0000 step-time 0.00

global step 4654 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4656 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 4658 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4660 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4662 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4664 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 4666 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4668 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 4670 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4672 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4674 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4676 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4678 learning rate 0.0000 step-time 0.00

global step 4862 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4864 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4866 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 4868 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 4870 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4872 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4874 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4876 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 4878 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 4880 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4882 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 4884 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 4886 learning rate 0.0000 step-time 0.00

global step 5070 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 5072 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5074 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5076 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5078 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5080 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5082 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5084 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 5086 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5088 learning rate 0.0000 step-time 0.00 perplexity 1.07 loss 0.07
global step 5090 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5092 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5094 learning rate 0.0000 step-time 0.00

global step 5278 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5280 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5282 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5284 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5286 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5288 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 5290 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5292 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 5294 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5296 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5298 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5300 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5302 learning rate 0.0000 step-time 0.00

global step 5486 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5488 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5490 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 5492 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5494 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5496 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5498 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5500 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 5502 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5504 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5506 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5508 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5510 learning rate 0.0000 step-time 0.00

global step 5694 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5696 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5698 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5700 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5702 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5704 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5706 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 5708 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5710 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5712 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 5714 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5716 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 5718 learning rate 0.0000 step-time 0.00

global step 5902 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5904 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5906 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5908 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5910 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5912 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 5914 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5916 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5918 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5920 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 5922 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 5924 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 5926 learning rate 0.0000 step-time 0.00

global step 6110 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6112 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6114 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6116 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6118 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6120 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6122 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6124 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6126 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 6128 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6130 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6132 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6134 learning rate 0.0000 step-time 0.00

global step 6318 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6320 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6322 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6324 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 6326 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6328 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6330 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6332 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6334 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6336 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6338 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6340 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6342 learning rate 0.0000 step-time 0.00

global step 6526 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6528 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6530 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6532 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6534 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6536 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6538 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6540 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6542 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6544 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6546 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6548 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6550 learning rate 0.0000 step-time 0.00

global step 6734 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6736 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6738 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6740 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 6742 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6744 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6746 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6748 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6750 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6752 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6754 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6756 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6758 learning rate 0.0000 step-time 0.00

global step 6942 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6944 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 6946 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 6948 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6950 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 6952 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6954 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 6956 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6958 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 6960 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6962 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 6964 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 6966 learning rate 0.0000 step-time 0.00

global step 7150 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7152 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 7154 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7156 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7158 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 7160 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 7162 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7164 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 7166 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7168 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7170 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7172 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 7174 learning rate 0.0000 step-time 0.00

global step 7358 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7360 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7362 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7364 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 7366 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 7368 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7370 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 7372 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7374 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 7376 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 7378 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7380 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 7382 learning rate 0.0000 step-time 0.00

global step 7566 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 7568 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7570 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7572 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 7574 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7576 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7578 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 7580 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 7582 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 7584 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7586 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 7588 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7590 learning rate 0.0000 step-time 0.00

global step 7772 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 7774 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7776 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 7778 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7780 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7782 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7784 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 7786 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 7788 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 7790 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7792 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 7794 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 7796 learning rate 0.0000 step-time 0.00

global step 7980 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 7982 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7984 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 7986 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 7988 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7990 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7992 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 7994 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7996 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 7998 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8000 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8002 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8004 learning rate 0.0000 step-time 0.00

global step 8188 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8190 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8192 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8194 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8196 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8198 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8200 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8202 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8204 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8206 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8208 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8210 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8212 learning rate 0.0000 step-time 0.00

global step 8396 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 8398 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8400 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8402 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8404 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8406 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 8408 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8410 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8412 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8414 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8416 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8418 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8420 learning rate 0.0000 step-time 0.00

global step 8604 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8606 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 8608 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8610 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8614 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8616 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 8618 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8620 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8622 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8624 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8626 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8628 learning rate 0.0000 step-time 0.00

global step 8812 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8814 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 8816 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8818 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8820 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 8822 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8824 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 8826 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8828 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8830 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 8832 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 8834 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 8836 learning rate 0.0000 step-time 0.00

global step 9020 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9022 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9024 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9026 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9028 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9030 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9032 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9034 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 9036 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9038 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9040 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9042 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9044 learning rate 0.0000 step-time 0.00

global step 9228 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9230 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 9232 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 9234 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 9236 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 9238 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9240 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 9242 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9244 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9246 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 9248 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 9250 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 9252 learning rate 0.0000 step-time 0.00

global step 9436 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 9438 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 9440 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 9442 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9444 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9446 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9448 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9450 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9452 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9454 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9456 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 9458 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9460 learning rate 0.0000 step-time 0.00

global step 9644 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9646 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9648 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 9650 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9652 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9654 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9656 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 9658 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9660 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 9662 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9664 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9666 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9668 learning rate 0.0000 step-time 0.00

global step 9852 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9854 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9856 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 9858 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9860 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9862 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 9864 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 9866 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9868 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 9870 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 9872 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9874 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 9876 learning rate 0.0000 step-time 0.00

global step 10060 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10062 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10064 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 10066 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10068 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10070 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 10072 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10074 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10076 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 10078 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10080 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10082 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10084 learning rate 0.0000 s

global step 10266 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10268 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10270 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10272 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10274 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10276 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10278 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10280 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10282 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10284 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 10286 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10288 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10290 learning rate 0.0000 s

global step 10472 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10474 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 10476 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10478 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 10480 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10482 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10484 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10486 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10488 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10490 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10492 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10494 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10496 learning rate 0.0000 s

global step 10678 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10680 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10682 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 10684 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10686 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10688 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10690 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10692 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 10694 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10696 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 10698 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 10700 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 10702 learning rate 0.0000 s

global step 10884 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 10886 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 10888 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10890 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10892 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 10894 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10896 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10898 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10900 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10902 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 10904 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 10906 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 10908 learning rate 0.0000 s

global step 11090 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 11092 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 11094 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11096 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 11098 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 11100 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11102 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11104 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 11106 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 11108 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11110 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11112 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 11114 learning rate 0.0000 s

global step 11296 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 11298 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11300 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11302 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11304 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11306 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11308 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11310 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11312 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11314 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11316 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11318 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 11320 learning rate 0.0000 s

global step 11502 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11504 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11506 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11508 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 11510 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11512 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11514 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11516 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 11518 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11520 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 11522 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 11524 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 11526 learning rate 0.0000 s

global step 11708 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11710 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11712 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11714 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 11716 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11718 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 11720 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 11722 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11724 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 11726 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 11728 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11730 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11732 learning rate 0.0000 s

global step 11914 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11916 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 11918 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11920 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11922 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11924 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 11926 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11928 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 11930 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11932 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 11934 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11936 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 11938 learning rate 0.0000 s

global step 12120 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12122 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12124 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12126 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12128 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12130 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12132 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12134 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12136 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12138 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12140 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12142 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 12144 learning rate 0.0000 s

global step 12326 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12328 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 12330 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12332 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 12334 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12336 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12338 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12340 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12342 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12344 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12346 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12348 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12350 learning rate 0.0000 s

global step 12532 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12534 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12536 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12538 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12540 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 12542 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12544 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12546 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 12548 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 12550 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12552 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12554 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12556 learning rate 0.0000 s

global step 12738 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12740 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12742 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12744 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12746 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12748 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 12750 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12752 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 12754 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12756 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 12758 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12760 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 12762 learning rate 0.0000 s

global step 12944 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12946 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12948 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 12950 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 12952 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12954 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 12956 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12958 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 12960 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 12962 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 12964 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12966 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 12968 learning rate 0.0000 s

global step 13150 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 13152 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13154 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 13156 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13158 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13160 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13162 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13164 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13166 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13168 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13170 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13172 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13174 learning rate 0.0000 s

global step 13356 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 13358 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 13360 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13362 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 13364 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13366 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13368 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13370 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13372 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13374 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13376 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13378 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 13380 learning rate 0.0000 s

global step 13562 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13564 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13566 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13568 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13570 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 13572 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13574 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 13576 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 13578 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 13580 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 13582 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13584 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13586 learning rate 0.0000 s

global step 13768 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 13770 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13772 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13774 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13776 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13778 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13780 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13782 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13784 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13786 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 13788 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 13790 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13792 learning rate 0.0000 s

global step 13974 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 13976 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13978 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13980 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13982 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 13984 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 13986 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13988 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13990 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 13992 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13994 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 13996 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 13998 learning rate 0.0000 s

global step 14180 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14182 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 14184 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14186 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14188 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 14190 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14192 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14194 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14196 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 14198 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14200 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 14202 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14204 learning rate 0.0000 s

global step 14386 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14388 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14390 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14392 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14394 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14396 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 14398 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 14400 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14402 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 14404 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14406 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14408 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14410 learning rate 0.0000 s

global step 14592 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 14594 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 14596 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 14598 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 14600 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14602 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 14604 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14606 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14608 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14610 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 14612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 14614 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 14616 learning rate 0.0000 s

global step 14798 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 14800 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14802 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14804 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14806 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 14808 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14810 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14812 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 14814 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14816 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 14818 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 14820 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 14822 learning rate 0.0000 s

global step 15004 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15006 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15008 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15010 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15012 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15014 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15016 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15018 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15020 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15022 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15024 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15026 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15028 learning rate 0.0000 s

global step 15210 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15212 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15214 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15216 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15218 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15220 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15222 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 15224 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 15226 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 15228 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15230 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15232 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15234 learning rate 0.0000 s

global step 15416 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15418 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15420 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15422 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15424 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15426 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15428 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15430 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15432 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15434 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15436 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15438 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 15440 learning rate 0.0000 s

global step 15622 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15624 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15626 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 15628 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15630 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15632 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 15634 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15636 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15638 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15640 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15642 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15644 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15646 learning rate 0.0000 s

global step 15828 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15830 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 15832 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15834 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15836 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15838 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15840 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 15842 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 15844 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15846 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 15848 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 15850 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 15852 learning rate 0.0000 s

global step 16034 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16036 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16038 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16040 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 16042 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16044 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16046 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 16048 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 16050 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 16052 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16054 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16056 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16058 learning rate 0.0000 s

global step 16240 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16242 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16244 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16246 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16248 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16250 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 16252 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 16254 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16256 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16258 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16260 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16262 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 16264 learning rate 0.0000 s

global step 16446 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16448 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16450 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16452 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 16454 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 16456 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16458 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 16460 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16462 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16464 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16466 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16468 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16470 learning rate 0.0000 s

global step 16652 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16654 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16656 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16658 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 16660 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16662 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 16664 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 16666 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16668 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16670 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16672 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16674 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16676 learning rate 0.0000 s

global step 16858 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16860 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16862 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 16864 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 16866 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 16868 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16870 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 16872 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16874 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 16876 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 16878 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 16880 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 16882 learning rate 0.0000 s

global step 17064 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17066 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17068 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17070 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17072 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17074 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 17076 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17078 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17080 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 17082 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17084 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17086 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 17088 learning rate 0.0000 s

global step 17270 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 17272 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17274 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17276 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17278 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17280 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17282 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17284 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17286 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 17288 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17290 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 17292 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17294 learning rate 0.0000 s

global step 17476 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17478 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17480 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 17482 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17484 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 17486 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17488 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17490 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17492 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17494 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 17496 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17498 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 17500 learning rate 0.0000 s

global step 17682 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17684 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17686 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17688 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17690 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 17692 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17694 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17696 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 17698 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 17700 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 17702 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 17704 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17706 learning rate 0.0000 s

global step 17888 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17890 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 17892 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17894 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17896 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 17898 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.07
global step 17900 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17902 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 17904 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17906 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 17908 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 17910 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 17912 learning rate 0.0000 s

global step 18094 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18096 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18098 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18100 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18102 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18104 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18106 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18108 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18110 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18112 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18114 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18116 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18118 learning rate 0.0000 s

global step 18300 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18302 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18304 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18306 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18308 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18310 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18312 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18314 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18316 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18318 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18320 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18322 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18324 learning rate 0.0000 s

global step 18506 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18508 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18510 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 18512 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18514 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18516 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18518 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18520 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18522 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18524 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18526 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 18528 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18530 learning rate 0.0000 s

global step 18712 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18714 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18716 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18718 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 18720 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18722 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18724 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18726 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18728 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18730 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18732 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 18734 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18736 learning rate 0.0000 s

global step 18918 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18920 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 18922 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18924 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18926 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18928 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18930 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 18932 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 18934 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 18936 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18938 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 18940 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 18942 learning rate 0.0000 s

global step 19124 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 19126 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19128 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19130 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19132 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19134 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19136 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19138 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19140 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 19142 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 19144 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 19146 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19148 learning rate 0.0000 s

global step 19330 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19332 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 19334 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 19336 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 19338 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 19340 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 19342 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19344 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19346 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 19348 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 19350 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19352 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 19354 learning rate 0.0000 s

global step 19536 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19538 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19540 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19542 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19544 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 19546 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19548 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19550 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19552 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19554 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19556 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19558 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19560 learning rate 0.0000 s

global step 19742 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19744 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19746 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19748 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 19750 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 19752 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19754 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19756 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19758 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 19760 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19762 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19764 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19766 learning rate 0.0000 s

global step 19948 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 19950 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19952 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19954 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19956 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 19958 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19960 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19962 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19964 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 19966 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 19968 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19970 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 19972 learning rate 0.0000 s

global step 20154 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20156 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20158 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20160 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20162 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20164 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20166 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20168 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 20170 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20172 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20174 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20176 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20178 learning rate 0.0000 s

global step 20360 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20362 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 20364 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20366 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20368 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20370 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20372 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20374 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20376 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20378 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20380 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 20382 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 20384 learning rate 0.0000 s

global step 20566 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 20568 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20570 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20572 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20574 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20576 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 20578 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20580 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20582 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20584 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20586 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20588 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20590 learning rate 0.0000 s

global step 20772 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20774 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20776 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20778 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20780 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20782 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20784 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20786 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20788 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20790 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20792 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 20794 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20796 learning rate 0.0000 s

global step 20978 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 20980 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20982 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 20984 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20986 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20988 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 20990 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 20992 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 20994 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 20996 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 20998 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21000 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21002 learning rate 0.0000 s

global step 21184 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21186 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21188 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21190 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21192 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21194 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 21196 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21198 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 21200 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 21202 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 21204 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21206 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21208 learning rate 0.0000 s

global step 21390 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21392 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 21394 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 21396 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21398 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21400 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21402 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 21404 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21406 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 21408 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 21410 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21412 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21414 learning rate 0.0000 s

global step 21596 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21598 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21600 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 21602 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 21604 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21606 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21608 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21610 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 21612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21614 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21616 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21618 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 21620 learning rate 0.0000 s

global step 21802 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21804 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21806 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21808 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 21810 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 21812 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 21814 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21816 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 21818 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21820 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 21822 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 21824 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 21826 learning rate 0.0000 s

global step 22008 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22010 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 22012 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22014 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22016 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22018 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22020 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22022 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22024 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22026 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22028 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22030 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 22032 learning rate 0.0000 s

global step 22214 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22216 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22218 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22220 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22222 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22224 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22226 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22228 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22230 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 22232 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22234 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22236 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 22238 learning rate 0.0000 s

global step 22420 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22422 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22424 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22426 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 22428 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22430 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22432 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22434 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22436 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22438 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22440 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22442 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 22444 learning rate 0.0000 s

global step 22626 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22628 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22630 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22632 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22634 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22636 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22638 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22640 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22642 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 22644 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22646 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22648 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 22650 learning rate 0.0000 s

global step 22832 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 22834 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22836 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22838 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22840 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22842 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22844 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 22846 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 22848 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 22850 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 22852 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22854 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 22856 learning rate 0.0000 s

global step 23038 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23040 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23042 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23044 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23046 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 23048 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23050 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23052 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23054 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23056 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23058 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23060 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23062 learning rate 0.0000 s

global step 23244 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23246 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23248 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23250 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23252 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23254 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23256 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 23258 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 23260 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23262 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23264 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23266 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23268 learning rate 0.0000 s

global step 23450 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23452 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23454 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23456 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23458 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23460 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23462 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23464 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23466 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23468 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 23470 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23472 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23474 learning rate 0.0000 s

global step 23656 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23658 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23660 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23662 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23664 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 23666 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23668 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23670 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23672 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23674 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23676 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23678 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23680 learning rate 0.0000 s

global step 23862 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 23864 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 23866 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 23868 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23870 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 23872 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 23874 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23876 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23878 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 23880 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23882 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 23884 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 23886 learning rate 0.0000 s

global step 24068 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 24070 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24072 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 24074 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24076 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24078 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24080 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24082 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24084 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24086 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24088 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24090 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24092 learning rate 0.0000 s

global step 24274 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24276 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24278 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24280 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24282 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24284 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24286 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24288 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24290 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24292 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24294 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24296 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24298 learning rate 0.0000 s

global step 24480 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24482 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24484 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24486 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24488 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24490 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 24492 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24494 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 24496 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24498 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24500 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 24502 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24504 learning rate 0.0000 s

global step 24686 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24688 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24690 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24692 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 24694 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24696 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24698 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24700 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24702 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24704 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24706 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24708 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 24710 learning rate 0.0000 s

global step 24892 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 24894 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24896 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24898 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24900 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24902 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 24904 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 24906 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 24908 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24910 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24912 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 24914 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 24916 learning rate 0.0000 s

global step 25098 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25100 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25102 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25104 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25106 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.07
global step 25108 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 25110 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25112 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25114 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25116 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25118 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 25120 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25122 learning rate 0.0000 s

global step 25304 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25306 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 25308 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25310 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25312 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25314 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25316 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25318 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25320 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25322 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25324 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25326 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 25328 learning rate 0.0000 s

global step 25510 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25512 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25514 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 25516 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 25518 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25520 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25522 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25524 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25526 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 25528 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25530 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25532 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25534 learning rate 0.0000 s

global step 25716 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25718 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25720 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25722 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25724 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25726 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25728 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 25730 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 25732 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 25734 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25736 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25738 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 25740 learning rate 0.0000 s

global step 25922 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 25924 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 25926 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25928 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25930 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 25932 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25934 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 25936 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 25938 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 25940 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 25942 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 25944 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 25946 learning rate 0.0000 s

global step 26128 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26130 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26132 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26134 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26136 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26138 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26140 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 26142 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26144 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26146 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26148 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 26150 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 26152 learning rate 0.0000 s

global step 26334 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26336 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26338 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26340 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 26342 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 26344 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26346 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 26348 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 26350 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26352 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26354 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26356 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26358 learning rate 0.0000 s

global step 26540 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26542 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 26544 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26546 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 26548 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 26550 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 26552 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26554 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26556 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26558 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26560 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 26562 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26564 learning rate 0.0000 s

global step 26746 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26748 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26750 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26752 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26754 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 26756 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26758 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26760 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26762 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 26764 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26766 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26768 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26770 learning rate 0.0000 s

global step 26952 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26954 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26956 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26958 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 26960 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26962 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26964 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 26966 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26968 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26970 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 26972 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 26974 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 26976 learning rate 0.0000 s

global step 27158 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 27160 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 27162 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27164 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 27166 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27168 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27170 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27172 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 27174 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 27176 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27178 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 27180 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 27182 learning rate 0.0000 s

global step 27364 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27366 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27368 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27370 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 27372 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 27374 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 27376 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27378 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27380 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 27382 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27384 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 27386 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27388 learning rate 0.0000 s

global step 27570 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27572 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27574 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27576 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 27578 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27580 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 27582 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27584 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 27586 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27588 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27590 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 27592 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27594 learning rate 0.0000 s

global step 27776 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27778 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 27780 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 27782 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27784 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 27786 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 27788 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 27790 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 27792 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 27794 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 27796 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27798 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 27800 learning rate 0.0000 s

global step 27982 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 27984 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 27986 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 27988 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 27990 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27992 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27994 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 27996 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 27998 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28000 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28002 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28004 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 28006 learning rate 0.0000 s

global step 28188 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 28190 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28192 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28194 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28196 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28198 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28200 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 28202 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28204 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28206 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28208 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 28210 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 28212 learning rate 0.0000 s

global step 28394 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28396 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28398 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28400 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28402 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 28404 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28406 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28408 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28410 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28412 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28414 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28416 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28418 learning rate 0.0000 s

global step 28600 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 28602 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28604 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28606 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28608 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28610 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28614 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28616 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28618 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28620 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28622 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 28624 learning rate 0.0000 s

global step 28806 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28808 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28810 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28812 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28814 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 28816 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28818 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 28820 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28822 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28824 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 28826 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 28828 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 28830 learning rate 0.0000 s

global step 29012 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29014 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 29016 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 29018 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29020 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29022 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29024 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29026 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29028 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29030 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29032 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29034 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29036 learning rate 0.0000 s

global step 29218 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29220 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29222 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29224 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29226 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29228 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29230 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29232 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29234 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29236 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 29238 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29240 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29242 learning rate 0.0000 s

global step 29424 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29426 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29428 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 29430 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29432 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29434 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29436 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29438 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29440 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29442 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 29444 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29446 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29448 learning rate 0.0000 s

global step 29630 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29632 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29634 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29636 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29638 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29640 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29642 learning rate 0.0000 step-time 0.00 perplexity 1.07 loss 0.07
global step 29644 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29646 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29648 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29650 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 29652 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29654 learning rate 0.0000 s

global step 29836 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29838 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29840 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29842 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 29844 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 29846 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 29848 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 29850 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 29852 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 29854 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29856 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 29858 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 29860 learning rate 0.0000 s

global step 30042 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30044 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30046 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30048 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30050 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30052 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30054 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30056 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30058 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 30060 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 30062 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30064 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30066 learning rate 0.0000 s

global step 30248 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30250 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30252 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.07
global step 30254 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30256 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30258 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 30260 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30262 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 30264 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30266 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 30268 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 30270 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30272 learning rate 0.0000 s

global step 30454 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30456 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 30458 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 30460 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30462 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30464 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 30466 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30468 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30470 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30472 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30474 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30476 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30478 learning rate 0.0000 s

global step 30660 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30662 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30664 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30666 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30668 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30670 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30672 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30674 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30676 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30678 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30680 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30682 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30684 learning rate 0.0000 s

global step 30866 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 30868 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 30870 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 30872 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 30874 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 30876 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30878 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30880 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 30882 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30884 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 30886 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 30888 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 30890 learning rate 0.0000 s

global step 31072 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31074 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31076 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31078 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31080 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31082 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 31084 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31086 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 31088 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31090 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31092 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31094 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31096 learning rate 0.0000 s

global step 31278 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31280 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31282 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31284 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31286 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31288 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31290 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31292 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31294 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31296 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 31298 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31300 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31302 learning rate 0.0000 s

global step 31484 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31486 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31488 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31490 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31492 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 31494 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 31496 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 31498 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 31500 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31502 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 31504 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31506 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31508 learning rate 0.0000 s

global step 31690 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31692 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 31694 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31696 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 31698 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31700 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 31702 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31704 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31706 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31708 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 31710 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31712 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31714 learning rate 0.0000 s

global step 31896 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31898 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31900 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31902 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 31904 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31906 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31908 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31910 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 31912 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 31914 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 31916 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 31918 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 31920 learning rate 0.0000 s

global step 32102 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 32104 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32106 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32108 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32110 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32112 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 32114 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32116 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32118 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32120 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32122 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32124 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 32126 learning rate 0.0000 s

global step 32308 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32310 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32312 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32314 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32316 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32318 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32320 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 32322 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32324 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32326 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 32328 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32330 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 32332 learning rate 0.0000 s

global step 32514 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 32516 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32518 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32520 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32522 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.13
global step 32524 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32526 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 32528 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32530 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 32532 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32534 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32536 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32538 learning rate 0.0000 s

global step 32720 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 32722 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 32724 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32726 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32728 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 32730 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 32732 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32734 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32736 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32738 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32740 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32742 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 32744 learning rate 0.0000 s

global step 32926 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32928 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32930 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32932 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32934 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 32936 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32938 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32940 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32942 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 32944 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32946 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 32948 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 32950 learning rate 0.0000 s

global step 33132 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 33134 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33136 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33138 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33140 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33142 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33144 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33146 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33148 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33150 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33152 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33154 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33156 learning rate 0.0000 s

global step 33338 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33340 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 33342 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33344 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33346 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33348 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 33350 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33352 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33354 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33356 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33358 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33360 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33362 learning rate 0.0000 s

global step 33544 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33546 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33548 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 33550 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33552 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 33554 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33556 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33558 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33560 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33562 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33564 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33566 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 33568 learning rate 0.0000 s

global step 33750 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33752 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33754 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 33756 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33758 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33760 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33762 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33764 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 33766 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33768 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 33770 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 33772 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 33774 learning rate 0.0000 s

global step 33956 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33958 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33960 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 33962 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33964 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 33966 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33968 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 33970 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 33972 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 33974 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 33976 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 33978 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 33980 learning rate 0.0000 s

global step 34162 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34164 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34166 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34168 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34170 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34172 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34174 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34176 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 34178 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34180 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34182 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34184 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34186 learning rate 0.0000 s

global step 34368 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34370 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34372 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 34374 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34376 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34378 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 34380 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34382 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34384 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34386 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34388 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34390 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34392 learning rate 0.0000 s

global step 34574 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34576 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34578 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34580 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34582 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34584 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34586 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 34588 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 34590 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34592 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 34594 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34596 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 34598 learning rate 0.0000 s

global step 34780 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 34782 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34784 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 34786 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34788 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34790 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 34792 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 34794 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34796 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 34798 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34800 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34802 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 34804 learning rate 0.0000 s

global step 34986 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 34988 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34990 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34992 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 34994 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34996 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 34998 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35000 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35002 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35004 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35006 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35008 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35010 learning rate 0.0000 s

global step 35192 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35194 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35196 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35198 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35200 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 35202 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35204 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35206 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35208 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35210 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 35212 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35214 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35216 learning rate 0.0000 s

global step 35398 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 35400 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35402 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35404 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35406 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35408 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35410 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35412 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35414 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35416 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35418 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 35420 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35422 learning rate 0.0000 s

global step 35604 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35606 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35608 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35610 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35614 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35616 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35618 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 35620 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 35622 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35624 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35626 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35628 learning rate 0.0000 s

global step 35810 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35812 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 35814 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35816 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35818 learning rate 0.0000 step-time 0.00 perplexity 1.07 loss 0.07
global step 35820 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35822 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 35824 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 35826 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 35828 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 35830 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 35832 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 35834 learning rate 0.0000 s

global step 36016 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 36018 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36020 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36022 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36024 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 36026 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36028 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36030 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 36032 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36034 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36036 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36038 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 36040 learning rate 0.0000 s

global step 36222 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36224 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 36226 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36228 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36230 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36232 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36234 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36236 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36238 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36240 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36242 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 36244 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36246 learning rate 0.0000 s

global step 36428 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36430 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36432 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 36434 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36436 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36438 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 36440 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36442 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 36444 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36446 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36448 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36450 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36452 learning rate 0.0000 s

global step 36634 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36636 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36638 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36640 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36642 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36644 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36646 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36648 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36650 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36652 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36654 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36656 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36658 learning rate 0.0000 s

global step 36840 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36842 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 36844 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36846 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36848 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36850 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 36852 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36854 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 36856 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 36858 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36860 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 36862 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 36864 learning rate 0.0000 s

global step 37046 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37048 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37050 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 37052 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37054 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 37056 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37058 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37060 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37062 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37064 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 37066 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37068 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37070 learning rate 0.0000 s

global step 37252 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37254 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37256 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37258 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37260 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37262 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37264 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37266 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37268 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37270 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 37272 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37274 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 37276 learning rate 0.0000 s

global step 37458 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37460 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 37462 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37464 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37466 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37468 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 37470 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37472 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37474 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37476 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37478 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37480 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37482 learning rate 0.0000 s

global step 37664 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 37666 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37668 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37670 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37672 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37674 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37676 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37678 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 37680 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 37682 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37684 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37686 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 37688 learning rate 0.0000 s

global step 37870 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 37872 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 37874 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37876 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37878 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37880 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 37882 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 37884 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 37886 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 37888 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 37890 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 37892 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 37894 learning rate 0.0000 s

global step 38076 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 38078 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 38080 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38082 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38084 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38086 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38088 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38090 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38092 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 38094 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38096 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38098 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38100 learning rate 0.0000 s

global step 38282 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38284 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38286 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38288 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38290 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38292 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38294 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 38296 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38298 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38300 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38302 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38304 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38306 learning rate 0.0000 s

global step 38488 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38490 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38492 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38494 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38496 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 38498 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38500 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38502 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38504 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 38506 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 38508 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38510 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38512 learning rate 0.0000 s

global step 38694 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38696 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38698 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 38700 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38702 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38704 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38706 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38708 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 38710 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38712 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38714 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38716 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38718 learning rate 0.0000 s

global step 38900 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38902 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 38904 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38906 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 38908 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 38910 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38912 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38914 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38916 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 38918 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 38920 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 38922 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 38924 learning rate 0.0000 s

global step 39106 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39108 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39110 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39112 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39114 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 39116 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39118 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39120 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39122 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 39124 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39126 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39128 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39130 learning rate 0.0000 s

global step 39312 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39314 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 39316 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39318 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39320 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39322 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39324 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 39326 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 39328 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39330 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39332 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39334 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39336 learning rate 0.0000 s

global step 39518 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39520 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39522 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39524 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39526 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39528 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39530 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39532 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39534 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 39536 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 39538 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39540 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39542 learning rate 0.0000 s

global step 39724 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39726 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39728 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39730 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39732 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39734 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 39736 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39738 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39740 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39742 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39744 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 39746 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 39748 learning rate 0.0000 s

global step 39930 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39932 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39934 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 39936 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 39938 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39940 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39942 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39944 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39946 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 39948 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39950 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 39952 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 39954 learning rate 0.0000 s

global step 40136 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40138 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40140 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40142 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 40144 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40146 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40148 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 40150 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40152 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40154 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40156 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.07
global step 40158 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40160 learning rate 0.0000 s

global step 40342 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40344 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40346 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40348 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40350 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40352 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40354 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40356 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40358 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40360 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 40362 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40364 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 40366 learning rate 0.0000 s

global step 40548 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40550 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40552 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40554 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 40556 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 40558 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40560 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40562 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40564 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 40566 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 40568 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40570 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40572 learning rate 0.0000 s

global step 40754 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40756 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40758 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40760 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40762 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40764 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40766 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40768 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 40770 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40772 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40774 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40776 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40778 learning rate 0.0000 s

global step 40960 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40962 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40964 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40966 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 40968 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 40970 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40972 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 40974 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 40976 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 40978 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 40980 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 40982 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 40984 learning rate 0.0000 s

global step 41166 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41168 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 41170 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 41172 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41174 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41176 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41178 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41180 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 41182 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 41184 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41186 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 41188 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41190 learning rate 0.0000 s

global step 41372 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41374 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41376 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41378 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41380 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41382 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41384 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41386 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41388 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41390 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 41392 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41394 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41396 learning rate 0.0000 s

global step 41578 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 41580 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 41582 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41584 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 41586 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41588 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41590 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41592 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41594 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41596 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41598 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41600 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41602 learning rate 0.0000 s

global step 41784 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 41786 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41788 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41790 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41792 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 41794 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 41796 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41798 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 41800 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41802 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41804 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41806 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 41808 learning rate 0.0000 s

global step 41990 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 41992 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 41994 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 41996 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 41998 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 42000 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 42002 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42004 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42006 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42008 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 42010 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42012 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 42014 learning rate 0.0000 s

global step 42196 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 42198 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42200 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 42202 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42204 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 42206 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42208 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42210 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42212 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42214 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 42216 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42218 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 42220 learning rate 0.0000 s

global step 42402 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42404 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 42406 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 42408 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42410 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42412 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42414 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42416 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42418 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 42420 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 42422 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42424 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42426 learning rate 0.0000 s

global step 42608 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 42610 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 42612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 42614 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42616 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42618 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42620 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 42622 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42624 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42626 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42628 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 42630 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42632 learning rate 0.0000 s

global step 42814 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42816 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 42818 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 42820 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42822 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42824 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42826 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42828 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 42830 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42832 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 42834 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 42836 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 42838 learning rate 0.0000 s

global step 43020 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43022 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43024 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 43026 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43028 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43030 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.07
global step 43032 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43034 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 43036 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43038 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 43040 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43042 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 43044 learning rate 0.0000 s

global step 43226 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43228 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 43230 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43232 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43234 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43236 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43238 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43240 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43242 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43244 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 43246 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43248 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 43250 learning rate 0.0000 s

global step 43432 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43434 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43436 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43438 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43440 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43442 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43444 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 43446 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 43448 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43450 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43452 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43454 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43456 learning rate 0.0000 s

global step 43638 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43640 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43642 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43644 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 43646 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43648 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43650 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43652 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43654 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43656 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43658 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43660 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43662 learning rate 0.0000 s

global step 43844 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 43846 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43848 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43850 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43852 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43854 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43856 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 43858 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 43860 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43862 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 43864 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 43866 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 43868 learning rate 0.0000 s

global step 44050 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44052 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 44054 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44056 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44058 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44060 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44062 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44064 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44066 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 44068 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 44070 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44072 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 44074 learning rate 0.0000 s

global step 44256 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44258 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44260 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44262 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 44264 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44266 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44268 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44270 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44272 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 44274 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44276 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44278 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 44280 learning rate 0.0000 s

global step 44462 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44464 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44466 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44468 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44470 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44472 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44474 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44476 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44478 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 44480 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 44482 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44484 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44486 learning rate 0.0000 s

global step 44668 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 44670 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44672 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44674 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 44676 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44678 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 44680 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44682 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44684 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44686 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44688 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 44690 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 44692 learning rate 0.0000 s

global step 44874 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44876 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44878 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 44880 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44882 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 44884 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44886 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 44888 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 44890 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 44892 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 44894 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 44896 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 44898 learning rate 0.0000 s

global step 45080 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45082 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 45084 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 45086 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45088 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45090 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 45092 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45094 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45096 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45098 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45100 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45102 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 45104 learning rate 0.0000 s

global step 45286 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 45288 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 45290 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45292 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45294 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45296 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45298 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45300 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 45302 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45304 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45306 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 45308 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45310 learning rate 0.0000 s

global step 45492 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45494 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45496 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45498 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45500 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45502 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45504 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45506 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45508 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45510 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45512 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 45514 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 45516 learning rate 0.0000 s

global step 45698 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45700 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 45702 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45704 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45706 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 45708 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45710 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 45712 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45714 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45716 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45718 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45720 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45722 learning rate 0.0000 s

global step 45904 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45906 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 45908 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45910 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45912 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45914 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45916 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45918 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45920 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 45922 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 45924 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 45926 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 45928 learning rate 0.0000 s

global step 46110 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46112 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46114 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46116 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 46118 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 46120 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46122 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46124 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46126 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 46128 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46130 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46132 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46134 learning rate 0.0000 s

global step 46316 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46318 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46320 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46322 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46324 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46326 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46328 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 46330 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46332 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46334 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46336 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 46338 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 46340 learning rate 0.0000 s

global step 46522 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 46524 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46526 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46528 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46530 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 46532 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46534 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46536 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46538 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46540 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46542 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46544 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46546 learning rate 0.0000 s

global step 46728 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46730 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46732 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46734 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 46736 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46738 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 46740 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46742 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46744 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46746 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 46748 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46750 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 46752 learning rate 0.0000 s

global step 46934 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 46936 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 46938 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 46940 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46942 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 46944 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46946 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 46948 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 46950 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46952 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 46954 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 46956 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 46958 learning rate 0.0000 s

global step 47140 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 47142 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47144 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47146 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 47148 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47150 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47152 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 47154 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47156 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47158 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47160 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47162 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47164 learning rate 0.0000 s

global step 47346 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47348 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47350 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47352 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47354 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47356 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47358 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 47360 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47362 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47364 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47366 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47368 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47370 learning rate 0.0000 s

global step 47552 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47554 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47556 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47558 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47560 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47562 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47564 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47566 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47568 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47570 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47572 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47574 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47576 learning rate 0.0000 s

global step 47758 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47760 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 47762 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47764 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47766 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47768 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47770 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 47772 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47774 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47776 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47778 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47780 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47782 learning rate 0.0000 s

global step 47964 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47966 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 47968 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 47970 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47972 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47974 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47976 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 47978 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47980 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 47982 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47984 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 47986 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 47988 learning rate 0.0000 s

global step 48170 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48172 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 48174 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48176 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48178 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48180 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48182 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48184 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 48186 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 48188 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 48190 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48192 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 48194 learning rate 0.0000 s

global step 48376 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 48378 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 48380 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 48382 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 48384 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 48386 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48388 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48390 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48392 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 48394 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48396 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48398 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 48400 learning rate 0.0000 s

global step 48582 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48584 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 48586 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48588 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48590 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 48592 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48594 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48596 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48598 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48600 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48602 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48604 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48606 learning rate 0.0000 s

global step 48788 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48790 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48792 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 48794 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 48796 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 48798 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48800 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 48802 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48804 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48806 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 48808 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48810 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 48812 learning rate 0.0000 s

global step 48994 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 48996 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 48998 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49000 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49002 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49004 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 49006 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49008 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 49010 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49012 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 49014 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49016 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49018 learning rate 0.0000 s

global step 49200 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49202 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49204 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 49206 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49208 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49210 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 49212 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49214 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 49216 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49218 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49220 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49222 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49224 learning rate 0.0000 s

global step 49406 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49408 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 49410 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49412 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49414 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49416 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49418 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 49420 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49422 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49424 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49426 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49428 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49430 learning rate 0.0000 s

global step 49612 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49614 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49616 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49618 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49620 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49622 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49624 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 49626 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49628 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49630 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49632 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49634 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 49636 learning rate 0.0000 s

global step 49818 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 49820 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49822 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 49824 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49826 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49828 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49830 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49832 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 49834 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49836 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 49838 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 49840 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 49842 learning rate 0.0000 s

global step 50024 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50026 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50028 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50030 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50032 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50034 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50036 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50038 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50040 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50042 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50044 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50046 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50048 learning rate 0.0000 s

global step 50230 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 50232 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50234 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50236 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50238 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50240 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50242 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50244 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50246 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50248 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50250 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50252 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 50254 learning rate 0.0000 s

global step 50436 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50438 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50440 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50442 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50444 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50446 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50448 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50450 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50452 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50454 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50456 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50458 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50460 learning rate 0.0000 s

global step 50642 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50644 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50646 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50648 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50650 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 50652 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50654 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50656 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50658 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50660 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50662 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 50664 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50666 learning rate 0.0000 s

global step 50848 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50850 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50852 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50854 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50856 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50858 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 50860 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50862 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 50864 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50866 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 50868 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 50870 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 50872 learning rate 0.0000 s

global step 51052 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51054 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51056 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 51058 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51060 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51062 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51064 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51066 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 51068 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51070 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51072 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51074 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51076 learning rate 0.0000 s

global step 51258 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51260 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51262 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51264 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51266 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51268 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51270 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51272 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51274 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51276 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51278 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51280 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 51282 learning rate 0.0000 s

global step 51464 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 51466 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51468 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51470 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51472 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51474 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51476 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51478 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51480 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51482 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51484 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51486 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51488 learning rate 0.0000 s

global step 51670 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51672 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 51674 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51676 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51678 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 51680 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51682 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 51684 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 51686 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51688 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51690 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.12
global step 51692 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51694 learning rate 0.0000 s

global step 51876 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 51878 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51880 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51882 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 51884 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 51886 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51888 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 51890 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 51892 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51894 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 51896 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 51898 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 51900 learning rate 0.0000 s

global step 52082 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 52084 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52086 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52088 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52090 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 52092 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 52094 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52096 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 52098 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 52100 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52102 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 52104 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 52106 learning rate 0.0000 s

global step 52288 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 52290 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52292 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52294 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 52296 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52298 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 52300 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 52302 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 52304 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52306 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52308 learning rate 0.0000 step-time 0.00 perplexity 1.08 loss 0.08
global step 52310 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52312 learning rate 0.0000 s

global step 52494 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.11
global step 52496 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 52498 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52500 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 52502 learning rate 0.0000 step-time 0.00 perplexity 1.13 loss 0.12
global step 52504 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 52506 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52508 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 52510 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52512 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52514 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52516 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.08
global step 52518 learning rate 0.0000 s

global step 52700 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52702 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52704 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52706 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52708 learning rate 0.0000 step-time 0.00 perplexity 1.12 loss 0.11
global step 52710 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 52712 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.10
global step 52714 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52716 learning rate 0.0000 step-time 0.00 perplexity 1.09 loss 0.09
global step 52718 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52720 learning rate 0.0000 step-time 0.00 perplexity 1.10 loss 0.09
global step 52722 learning rate 0.0000 step-time 0.00 perplexity 1.11 loss 0.10
global step 52724 learning rate 0.0000 s