In [1]:
""" The main script for training

requirement: python 2, tensorflow r1.4 or r1.2 
"""
import os
import sys
import time
import math
import argparse
import pickle

import numpy as np
import tensorflow as tf
# from tensorflow.contrib.framework import nest

from data_loader import DataLoader
from model import Model

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement=True

In [4]:
def train(args):
  data_loader = DataLoader(args.batch_size, args.data_scale, args.bptt_length) # batch_size=10, bptt_length=120
  data_loader.reset_batch_pointer()

  if args.model_dir != '' and not os.path.exists(args.model_dir):
    os.makedirs(args.model_dir)

  with open(os.path.join(args.model_dir, 'config.pkl'), 'wb') as f:
    pickle.dump(args, f)
  print("hyperparam. saved.")

  model = Model(args)

  # training
  with tf.Session(config=config) as sess:

    tf.global_variables_initializer().run()

    saver = tf.train.Saver()
    if args.load_model is not None:
        saver.restore(sess, args.load_model)
        _, ep_start = args.load_model.rsplit("-", 1)
        ep_start = int(ep_start)
        model_steps = int(ep_start * data_loader.num_batches)
    else:
        ep_start = 0
        model_steps = last_model_steps = 0

    last_time = time.time()

    for ep in range(ep_start, args.num_epochs):
      ep_loss = []
      sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** ep)))

      for i in range(int(data_loader.num_sequences / args.batch_size)):
        idx = ep * data_loader.num_sequences + i * args.batch_size
        start = time.time()
        x, y, w, c, lens = data_loader.next_batch() #x :[batch_size,time_step????????,6]

        loss_list, model_steps = model.train(
          sess=sess, 
          sequence=x, 
          targets=y, 
          weights=w, 
          conditions=c, 
          subseq_length=args.bptt_length, 
          step_count=model_steps
          )

        ep_loss += loss_list

        if model_steps - last_model_steps >= 100:
          new_time = time.time()
          print(
            "Sequence %d/%d (epoch %d), batch %d, train_loss = %.3f, time/batch = %.3f" 
            % (
                idx,
                args.num_epochs * data_loader.num_sequences, #.num_sequences = len(self.data["inputs"]) #[time, 3*DoF*joints]
                ep,
                model_steps,
                np.mean(loss_list),
                (new_time - last_time) / (model_steps - last_model_steps)
              )
            ) #data_loader.num_sequences = 32632
          sys.stdout.flush()
          last_model_steps = model_steps
          last_time = new_time
      print("Epoch %d completed, average train loss %.6f, learning rate %.4f" % (ep, np.mean(ep_loss), args.learning_rate * (args.decay_rate ** ep)))
      sys.stdout.flush()
      if not os.path.isdir(args.model_dir):
        os.makedirs(args.model_dir)
      if (ep+1) % args.save_every == 0:
        checkpoint_path = os.path.join(args.model_dir, 'model.ckpt')
        saver.save(sess, save_path=checkpoint_path, global_step = (ep+1))
        print("model saved.")




## 1. train method1

In [3]:
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--dim_rec', type=int, default=128,
                     help='size of RNN hidden state')
  parser.add_argument('--num_layers', type=int, default=2,
                     help='number of layers in the RNN. ')
  parser.add_argument('--batch_size', type=int, default=10,
                     help='minibatch size')
  parser.add_argument('--num_epochs', type=int, default=200,
                     help='number of epochs')
  parser.add_argument('--save_every', type=int, default=10,
                     help='save frequency by epoches')
  parser.add_argument('--model_dir', type=str, default='checkpoints',
                     help='directory to save model to')
  parser.add_argument('--summary_dir', type=str, default='summary',
                     help='directory to save tensorboard info')
  parser.add_argument('--max_grad_norm', type=float, default=1.,
                     help='clip gradients at this value')
  parser.add_argument('--learning_rate', type=float, default=0.001,
                     help='learning rate')
  parser.add_argument('--decay_rate', type=float, default=1.0,
                     help='decay rate for the optimizer')
  parser.add_argument('--num_mixture', type=int, default=2,
                     help='number of gaussian mixtures')
  parser.add_argument('--data_scale', type=float, default=1000,
                     help='factor to scale raw data down by')
  parser.add_argument('--load_model', type=str, default=None,
                     help='Reload a model checkpoint and restore training.' )
  parser.add_argument('--bptt_length', type=int, default=120,
                     help='How many steps should the gradients pass back.' )
  parser.add_argument('--loss_form', type=str, default='mse',
                     help='mse / gmm' )
  parser.add_argument('--constraint_factor', type=float, default=0.,
                     help='the weight for constraint term in the cost function.' )
  
  args = parser.parse_args(['--num_epochs','200'])

  args.num_epochs = 100
  args.save_every = 5


  train(args)


In [5]:
#args.num_epochs = 4
#args.save_every = 2

if __name__ == "__main__":
  #with tf.device('/device:GPU:2'):
    main()

Maximal length of the training data is 169
Training data case distribution: [5348, 5108, 4904, 1208, 1260, 1224, 916, 1180, 1216, 1044, 876, 456, 1264, 992, 1080, 932, 1008, 420, 1160, 1036]
Validation data case distribution: [70, 67, 65, 15, 17, 16, 12, 15, 16, 14, 12, 6, 17, 13, 14, 13, 14, 5, 15, 13]
Shuffling training data...
Shuffling training data...
hyperparam. saved.
Number of trainable variables 6
[<tf.Variable 'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0' shape=(134, 512) dtype=float32_ref>, <tf.Variable 'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0' shape=(256, 512) dtype=float32_ref>, <tf.Variable 'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'out_W:0' shape=(128, 3) dtype=float32_ref>, <tf.Variable 'out_b:0' shape=(3,) dtype=float32_ref>]
Sequence 980/3263200 (epoch 0), batch 100, train_loss = 6.122, time/batch = 0

Sequence 83444/3263200 (epoch 2), batch 8503, train_loss = 2.764, time/batch = 0.100
Sequence 84404/3263200 (epoch 2), batch 8603, train_loss = 1.936, time/batch = 0.100
Sequence 85384/3263200 (epoch 2), batch 8703, train_loss = 1.923, time/batch = 0.102
Sequence 86374/3263200 (epoch 2), batch 8803, train_loss = 2.569, time/batch = 0.099
Sequence 87354/3263200 (epoch 2), batch 8903, train_loss = 2.102, time/batch = 0.099
Sequence 88344/3263200 (epoch 2), batch 9003, train_loss = 1.870, time/batch = 0.099
Sequence 89324/3263200 (epoch 2), batch 9103, train_loss = 3.633, time/batch = 0.100
Sequence 90274/3263200 (epoch 2), batch 9203, train_loss = 2.269, time/batch = 0.103
Sequence 91264/3263200 (epoch 2), batch 9303, train_loss = 1.620, time/batch = 0.100
Sequence 92264/3263200 (epoch 2), batch 9403, train_loss = 3.496, time/batch = 0.098
Sequence 93264/3263200 (epoch 2), batch 9503, train_loss = 2.104, time/batch = 0.099
Sequence 94224/3263200 (epoch 2), batch 9603, train_loss = 1.845,

Sequence 173650/3263200 (epoch 5), batch 17704, train_loss = 1.514, time/batch = 0.100
Sequence 174620/3263200 (epoch 5), batch 17804, train_loss = 2.613, time/batch = 0.102
Sequence 175610/3263200 (epoch 5), batch 17904, train_loss = 1.627, time/batch = 0.101
Sequence 176610/3263200 (epoch 5), batch 18004, train_loss = 1.280, time/batch = 0.100
Sequence 177580/3263200 (epoch 5), batch 18104, train_loss = 1.817, time/batch = 0.100
Sequence 178560/3263200 (epoch 5), batch 18204, train_loss = 2.135, time/batch = 0.098
Sequence 179540/3263200 (epoch 5), batch 18304, train_loss = 2.746, time/batch = 0.099
Sequence 180530/3263200 (epoch 5), batch 18404, train_loss = 2.050, time/batch = 0.101
Sequence 181520/3263200 (epoch 5), batch 18504, train_loss = 4.436, time/batch = 0.100
Sequence 182520/3263200 (epoch 5), batch 18604, train_loss = 1.284, time/batch = 0.101
Sequence 183470/3263200 (epoch 5), batch 18704, train_loss = 1.550, time/batch = 0.101
Sequence 184460/3263200 (epoch 5), batch 18

Sequence 262986/3263200 (epoch 8), batch 26808, train_loss = 1.144, time/batch = 0.099
Sequence 263966/3263200 (epoch 8), batch 26908, train_loss = 1.685, time/batch = 0.100
Sequence 264926/3263200 (epoch 8), batch 27008, train_loss = 1.323, time/batch = 0.101
Sequence 265906/3263200 (epoch 8), batch 27108, train_loss = 1.935, time/batch = 0.101
Sequence 266896/3263200 (epoch 8), batch 27208, train_loss = 1.053, time/batch = 0.100
Sequence 267876/3263200 (epoch 8), batch 27308, train_loss = 1.466, time/batch = 0.100
Sequence 268866/3263200 (epoch 8), batch 27408, train_loss = 0.934, time/batch = 0.102
Sequence 269826/3263200 (epoch 8), batch 27508, train_loss = 1.837, time/batch = 0.102
Sequence 270826/3263200 (epoch 8), batch 27608, train_loss = 3.521, time/batch = 0.100
Sequence 271816/3263200 (epoch 8), batch 27708, train_loss = 1.414, time/batch = 0.100
Sequence 272796/3263200 (epoch 8), batch 27808, train_loss = 1.958, time/batch = 0.099
Sequence 273786/3263200 (epoch 8), batch 27

Sequence 353230/3263200 (epoch 10), batch 36009, train_loss = 1.838, time/batch = 0.101
Sequence 354220/3263200 (epoch 10), batch 36109, train_loss = 0.953, time/batch = 0.098
Sequence 355180/3263200 (epoch 10), batch 36209, train_loss = 1.720, time/batch = 0.099
Sequence 356160/3263200 (epoch 10), batch 36309, train_loss = 1.334, time/batch = 0.099
Sequence 357140/3263200 (epoch 10), batch 36409, train_loss = 1.390, time/batch = 0.099
Sequence 358140/3263200 (epoch 10), batch 36509, train_loss = 1.820, time/batch = 0.099
Epoch 10 completed, average train loss 1.583449, learning rate 0.0010
Shuffling training data...
Sequence 359132/3263200 (epoch 11), batch 36609, train_loss = 1.982, time/batch = 0.097
Sequence 360122/3263200 (epoch 11), batch 36709, train_loss = 1.377, time/batch = 0.097
Sequence 361082/3263200 (epoch 11), batch 36809, train_loss = 1.333, time/batch = 0.101
Sequence 362072/3263200 (epoch 11), batch 36909, train_loss = 1.897, time/batch = 0.099
Sequence 363062/3263200

Sequence 441566/3263200 (epoch 13), batch 45009, train_loss = 0.831, time/batch = 0.096
Sequence 442566/3263200 (epoch 13), batch 45109, train_loss = 2.185, time/batch = 0.095
Sequence 443546/3263200 (epoch 13), batch 45209, train_loss = 1.407, time/batch = 0.097
Sequence 444536/3263200 (epoch 13), batch 45309, train_loss = 0.738, time/batch = 0.096
Sequence 445516/3263200 (epoch 13), batch 45409, train_loss = 1.603, time/batch = 0.095
Sequence 446496/3263200 (epoch 13), batch 45509, train_loss = 1.654, time/batch = 0.098
Sequence 447456/3263200 (epoch 13), batch 45609, train_loss = 0.831, time/batch = 0.099
Sequence 448426/3263200 (epoch 13), batch 45709, train_loss = 1.782, time/batch = 0.100
Sequence 449406/3263200 (epoch 13), batch 45809, train_loss = 1.648, time/batch = 0.098
Sequence 450406/3263200 (epoch 13), batch 45909, train_loss = 1.018, time/batch = 0.100
Sequence 451396/3263200 (epoch 13), batch 46009, train_loss = 1.679, time/batch = 0.100
Sequence 452366/3263200 (epoch 1

Sequence 529802/3263200 (epoch 16), batch 54011, train_loss = 1.089, time/batch = 0.096
Sequence 530772/3263200 (epoch 16), batch 54111, train_loss = 1.802, time/batch = 0.098
Sequence 531762/3263200 (epoch 16), batch 54211, train_loss = 1.693, time/batch = 0.098
Sequence 532752/3263200 (epoch 16), batch 54311, train_loss = 0.908, time/batch = 0.097
Sequence 533742/3263200 (epoch 16), batch 54411, train_loss = 1.606, time/batch = 0.097
Sequence 534722/3263200 (epoch 16), batch 54511, train_loss = 1.105, time/batch = 0.099
Sequence 535712/3263200 (epoch 16), batch 54611, train_loss = 1.484, time/batch = 0.095
Sequence 536702/3263200 (epoch 16), batch 54711, train_loss = 2.294, time/batch = 0.096
Sequence 537702/3263200 (epoch 16), batch 54811, train_loss = 1.624, time/batch = 0.095
Sequence 538702/3263200 (epoch 16), batch 54911, train_loss = 1.376, time/batch = 0.098
Sequence 539702/3263200 (epoch 16), batch 55011, train_loss = 1.345, time/batch = 0.099
Sequence 540672/3263200 (epoch 1

Sequence 619136/3263200 (epoch 18), batch 63112, train_loss = 1.165, time/batch = 0.097
Epoch 18 completed, average train loss 1.452224, learning rate 0.0010
Shuffling training data...
Sequence 620138/3263200 (epoch 19), batch 63212, train_loss = 1.640, time/batch = 0.096
Sequence 621128/3263200 (epoch 19), batch 63312, train_loss = 1.082, time/batch = 0.097
Sequence 622118/3263200 (epoch 19), batch 63413, train_loss = 1.204, time/batch = 0.098
Sequence 623108/3263200 (epoch 19), batch 63514, train_loss = 0.900, time/batch = 0.100
Sequence 624078/3263200 (epoch 19), batch 63614, train_loss = 1.185, time/batch = 0.097
Sequence 625048/3263200 (epoch 19), batch 63714, train_loss = 1.258, time/batch = 0.100
Sequence 626048/3263200 (epoch 19), batch 63814, train_loss = 1.440, time/batch = 0.097
Sequence 627038/3263200 (epoch 19), batch 63914, train_loss = 0.816, time/batch = 0.096
Sequence 628008/3263200 (epoch 19), batch 64015, train_loss = 0.728, time/batch = 0.099
Sequence 628998/3263200

Sequence 707452/3263200 (epoch 21), batch 72116, train_loss = 1.637, time/batch = 0.100
Sequence 708442/3263200 (epoch 21), batch 72216, train_loss = 1.661, time/batch = 0.100
Sequence 709422/3263200 (epoch 21), batch 72316, train_loss = 1.197, time/batch = 0.102
Sequence 710402/3263200 (epoch 21), batch 72416, train_loss = 1.941, time/batch = 0.104
Sequence 711382/3263200 (epoch 21), batch 72516, train_loss = 1.636, time/batch = 0.101
Sequence 712362/3263200 (epoch 21), batch 72616, train_loss = 1.535, time/batch = 0.098
Sequence 713352/3263200 (epoch 21), batch 72716, train_loss = 1.634, time/batch = 0.098
Sequence 714332/3263200 (epoch 21), batch 72816, train_loss = 1.300, time/batch = 0.100
Sequence 715322/3263200 (epoch 21), batch 72916, train_loss = 1.157, time/batch = 0.100
Sequence 716322/3263200 (epoch 21), batch 73016, train_loss = 1.973, time/batch = 0.102
Sequence 717302/3263200 (epoch 21), batch 73116, train_loss = 2.173, time/batch = 0.098
Epoch 21 completed, average trai

Sequence 795758/3263200 (epoch 24), batch 81118, train_loss = 1.942, time/batch = 0.098
Sequence 796728/3263200 (epoch 24), batch 81218, train_loss = 1.532, time/batch = 0.098
Sequence 797708/3263200 (epoch 24), batch 81318, train_loss = 1.604, time/batch = 0.097
Sequence 798668/3263200 (epoch 24), batch 81418, train_loss = 1.671, time/batch = 0.098
Sequence 799668/3263200 (epoch 24), batch 81518, train_loss = 1.712, time/batch = 0.099
Sequence 800658/3263200 (epoch 24), batch 81618, train_loss = 1.623, time/batch = 0.100
Sequence 801618/3263200 (epoch 24), batch 81718, train_loss = 1.118, time/batch = 0.098
Sequence 802608/3263200 (epoch 24), batch 81818, train_loss = 0.963, time/batch = 0.098
Sequence 803588/3263200 (epoch 24), batch 81918, train_loss = 1.144, time/batch = 0.098
Sequence 804588/3263200 (epoch 24), batch 82018, train_loss = 1.479, time/batch = 0.095
Sequence 805588/3263200 (epoch 24), batch 82118, train_loss = 1.265, time/batch = 0.093
Sequence 806568/3263200 (epoch 2

Sequence 884064/3263200 (epoch 27), batch 90119, train_loss = 2.146, time/batch = 0.098
Sequence 885044/3263200 (epoch 27), batch 90219, train_loss = 1.887, time/batch = 0.097
Sequence 886034/3263200 (epoch 27), batch 90319, train_loss = 1.937, time/batch = 0.098
Sequence 886984/3263200 (epoch 27), batch 90419, train_loss = 1.067, time/batch = 0.098
Sequence 887984/3263200 (epoch 27), batch 90519, train_loss = 1.501, time/batch = 0.100
Sequence 888954/3263200 (epoch 27), batch 90619, train_loss = 1.165, time/batch = 0.099
Sequence 889924/3263200 (epoch 27), batch 90719, train_loss = 0.818, time/batch = 0.099
Sequence 890904/3263200 (epoch 27), batch 90819, train_loss = 1.607, time/batch = 0.099
Sequence 891884/3263200 (epoch 27), batch 90919, train_loss = 1.156, time/batch = 0.100
Sequence 892864/3263200 (epoch 27), batch 91019, train_loss = 1.033, time/batch = 0.099
Sequence 893844/3263200 (epoch 27), batch 91119, train_loss = 1.465, time/batch = 0.096
Sequence 894834/3263200 (epoch 2

Sequence 973398/3263200 (epoch 29), batch 99222, train_loss = 1.438, time/batch = 0.101
Sequence 974368/3263200 (epoch 29), batch 99322, train_loss = 1.200, time/batch = 0.117
Sequence 975348/3263200 (epoch 29), batch 99422, train_loss = 1.260, time/batch = 0.118
Sequence 976328/3263200 (epoch 29), batch 99522, train_loss = 1.227, time/batch = 0.118
Sequence 977318/3263200 (epoch 29), batch 99622, train_loss = 1.193, time/batch = 0.115
Sequence 978298/3263200 (epoch 29), batch 99722, train_loss = 1.184, time/batch = 0.118
Epoch 29 completed, average train loss 1.355694, learning rate 0.0010
model saved.
Shuffling training data...
Sequence 979270/3263200 (epoch 30), batch 99823, train_loss = 0.744, time/batch = 0.118
Sequence 980250/3263200 (epoch 30), batch 99923, train_loss = 1.566, time/batch = 0.118
Sequence 981250/3263200 (epoch 30), batch 100023, train_loss = 1.044, time/batch = 0.119
Sequence 982240/3263200 (epoch 30), batch 100123, train_loss = 1.604, time/batch = 0.114
Sequence

Sequence 1060664/3263200 (epoch 32), batch 108124, train_loss = 0.838, time/batch = 0.101
Sequence 1061634/3263200 (epoch 32), batch 108224, train_loss = 1.180, time/batch = 0.101
Sequence 1062624/3263200 (epoch 32), batch 108324, train_loss = 1.164, time/batch = 0.101
Sequence 1063624/3263200 (epoch 32), batch 108424, train_loss = 0.946, time/batch = 0.100
Sequence 1064594/3263200 (epoch 32), batch 108524, train_loss = 1.170, time/batch = 0.100
Sequence 1065584/3263200 (epoch 32), batch 108624, train_loss = 1.766, time/batch = 0.099
Sequence 1066584/3263200 (epoch 32), batch 108724, train_loss = 1.117, time/batch = 0.098
Sequence 1067574/3263200 (epoch 32), batch 108824, train_loss = 1.450, time/batch = 0.101
Sequence 1068574/3263200 (epoch 32), batch 108925, train_loss = 0.635, time/batch = 0.101
Sequence 1069574/3263200 (epoch 32), batch 109025, train_loss = 0.974, time/batch = 0.098
Sequence 1070514/3263200 (epoch 32), batch 109125, train_loss = 1.307, time/batch = 0.100
Sequence 1

Sequence 1147040/3263200 (epoch 35), batch 116926, train_loss = 0.625, time/batch = 0.098
Sequence 1148020/3263200 (epoch 35), batch 117026, train_loss = 1.220, time/batch = 0.100
Sequence 1148990/3263200 (epoch 35), batch 117126, train_loss = 1.105, time/batch = 0.098
Sequence 1149960/3263200 (epoch 35), batch 117226, train_loss = 1.309, time/batch = 0.099
Sequence 1150930/3263200 (epoch 35), batch 117326, train_loss = 1.442, time/batch = 0.099
Sequence 1151910/3263200 (epoch 35), batch 117426, train_loss = 1.221, time/batch = 0.099
Sequence 1152890/3263200 (epoch 35), batch 117526, train_loss = 1.732, time/batch = 0.100
Sequence 1153870/3263200 (epoch 35), batch 117626, train_loss = 1.690, time/batch = 0.098
Sequence 1154850/3263200 (epoch 35), batch 117726, train_loss = 1.296, time/batch = 0.099
Sequence 1155840/3263200 (epoch 35), batch 117826, train_loss = 1.407, time/batch = 0.100
Sequence 1156830/3263200 (epoch 35), batch 117926, train_loss = 1.846, time/batch = 0.100
Sequence 1

Sequence 1234354/3263200 (epoch 37), batch 125827, train_loss = 1.350, time/batch = 0.096
Sequence 1235344/3263200 (epoch 37), batch 125927, train_loss = 1.606, time/batch = 0.096
Sequence 1236324/3263200 (epoch 37), batch 126027, train_loss = 1.109, time/batch = 0.097
Sequence 1237314/3263200 (epoch 37), batch 126127, train_loss = 1.411, time/batch = 0.098
Sequence 1238294/3263200 (epoch 37), batch 126227, train_loss = 1.774, time/batch = 0.097
Sequence 1239274/3263200 (epoch 37), batch 126327, train_loss = 1.410, time/batch = 0.098
Epoch 37 completed, average train loss 1.306156, learning rate 0.0010
Shuffling training data...
Sequence 1240276/3263200 (epoch 38), batch 126427, train_loss = 1.385, time/batch = 0.098
Sequence 1241236/3263200 (epoch 38), batch 126527, train_loss = 0.979, time/batch = 0.099
Sequence 1242206/3263200 (epoch 38), batch 126627, train_loss = 1.073, time/batch = 0.103
Sequence 1243176/3263200 (epoch 38), batch 126727, train_loss = 1.331, time/batch = 0.099
Seq

Sequence 1320650/3263200 (epoch 40), batch 134627, train_loss = 0.871, time/batch = 0.100
Sequence 1321640/3263200 (epoch 40), batch 134727, train_loss = 1.310, time/batch = 0.100
Sequence 1322640/3263200 (epoch 40), batch 134827, train_loss = 1.169, time/batch = 0.101
Sequence 1323630/3263200 (epoch 40), batch 134927, train_loss = 0.812, time/batch = 0.100
Sequence 1324620/3263200 (epoch 40), batch 135027, train_loss = 1.439, time/batch = 0.102
Sequence 1325600/3263200 (epoch 40), batch 135127, train_loss = 1.695, time/batch = 0.099
Sequence 1326570/3263200 (epoch 40), batch 135227, train_loss = 1.137, time/batch = 0.099
Sequence 1327560/3263200 (epoch 40), batch 135327, train_loss = 0.949, time/batch = 0.099
Sequence 1328550/3263200 (epoch 40), batch 135427, train_loss = 1.653, time/batch = 0.098
Sequence 1329530/3263200 (epoch 40), batch 135527, train_loss = 1.328, time/batch = 0.099
Sequence 1330520/3263200 (epoch 40), batch 135627, train_loss = 0.922, time/batch = 0.099
Sequence 1

Sequence 1407006/3263200 (epoch 43), batch 143427, train_loss = 1.560, time/batch = 0.098
Sequence 1408006/3263200 (epoch 43), batch 143527, train_loss = 1.066, time/batch = 0.099
Sequence 1408976/3263200 (epoch 43), batch 143627, train_loss = 1.203, time/batch = 0.101
Sequence 1409956/3263200 (epoch 43), batch 143727, train_loss = 1.000, time/batch = 0.102
Sequence 1410916/3263200 (epoch 43), batch 143827, train_loss = 1.428, time/batch = 0.100
Sequence 1411896/3263200 (epoch 43), batch 143927, train_loss = 1.653, time/batch = 0.098
Sequence 1412896/3263200 (epoch 43), batch 144027, train_loss = 1.073, time/batch = 0.098
Sequence 1413866/3263200 (epoch 43), batch 144127, train_loss = 1.082, time/batch = 0.099
Sequence 1414846/3263200 (epoch 43), batch 144227, train_loss = 0.887, time/batch = 0.098
Sequence 1415826/3263200 (epoch 43), batch 144327, train_loss = 1.381, time/batch = 0.100
Sequence 1416816/3263200 (epoch 43), batch 144427, train_loss = 1.157, time/batch = 0.100
Sequence 1

Sequence 1494350/3263200 (epoch 45), batch 152329, train_loss = 1.209, time/batch = 0.099
Sequence 1495340/3263200 (epoch 45), batch 152429, train_loss = 1.108, time/batch = 0.099
Sequence 1496340/3263200 (epoch 45), batch 152529, train_loss = 1.022, time/batch = 0.101
Sequence 1497320/3263200 (epoch 45), batch 152629, train_loss = 1.302, time/batch = 0.099
Sequence 1498310/3263200 (epoch 45), batch 152729, train_loss = 1.245, time/batch = 0.100
Sequence 1499270/3263200 (epoch 45), batch 152829, train_loss = 0.915, time/batch = 0.098
Sequence 1500250/3263200 (epoch 45), batch 152929, train_loss = 1.263, time/batch = 0.097
Epoch 45 completed, average train loss 1.267943, learning rate 0.0010
Shuffling training data...
Sequence 1501212/3263200 (epoch 46), batch 153029, train_loss = 1.283, time/batch = 0.099
Sequence 1502192/3263200 (epoch 46), batch 153129, train_loss = 1.233, time/batch = 0.097
Sequence 1503182/3263200 (epoch 46), batch 153229, train_loss = 1.184, time/batch = 0.098
Seq

Sequence 1580676/3263200 (epoch 48), batch 161129, train_loss = 0.996, time/batch = 0.115
Sequence 1581666/3263200 (epoch 48), batch 161229, train_loss = 1.627, time/batch = 0.114
Sequence 1582646/3263200 (epoch 48), batch 161329, train_loss = 1.115, time/batch = 0.112
Sequence 1583646/3263200 (epoch 48), batch 161429, train_loss = 1.555, time/batch = 0.115
Sequence 1584626/3263200 (epoch 48), batch 161529, train_loss = 1.298, time/batch = 0.117
Sequence 1585616/3263200 (epoch 48), batch 161629, train_loss = 1.146, time/batch = 0.114
Sequence 1586606/3263200 (epoch 48), batch 161729, train_loss = 1.199, time/batch = 0.117
Sequence 1587586/3263200 (epoch 48), batch 161829, train_loss = 1.591, time/batch = 0.117
Sequence 1588556/3263200 (epoch 48), batch 161929, train_loss = 1.029, time/batch = 0.117
Sequence 1589546/3263200 (epoch 48), batch 162029, train_loss = 1.227, time/batch = 0.113
Sequence 1590516/3263200 (epoch 48), batch 162129, train_loss = 1.223, time/batch = 0.113
Sequence 1

Sequence 1667032/3263200 (epoch 51), batch 169931, train_loss = 1.468, time/batch = 0.115
Sequence 1668022/3263200 (epoch 51), batch 170031, train_loss = 1.247, time/batch = 0.114
Sequence 1669002/3263200 (epoch 51), batch 170131, train_loss = 1.026, time/batch = 0.113
Sequence 1669972/3263200 (epoch 51), batch 170231, train_loss = 0.632, time/batch = 0.114
Sequence 1670932/3263200 (epoch 51), batch 170331, train_loss = 1.194, time/batch = 0.113
Sequence 1671912/3263200 (epoch 51), batch 170431, train_loss = 1.146, time/batch = 0.114
Sequence 1672892/3263200 (epoch 51), batch 170531, train_loss = 1.325, time/batch = 0.116
Sequence 1673882/3263200 (epoch 51), batch 170631, train_loss = 1.311, time/batch = 0.115
Sequence 1674872/3263200 (epoch 51), batch 170731, train_loss = 1.260, time/batch = 0.113
Sequence 1675842/3263200 (epoch 51), batch 170831, train_loss = 1.292, time/batch = 0.118
Sequence 1676822/3263200 (epoch 51), batch 170931, train_loss = 1.174, time/batch = 0.112
Sequence 1

Sequence 1754286/3263200 (epoch 53), batch 178831, train_loss = 1.169, time/batch = 0.117
Sequence 1755286/3263200 (epoch 53), batch 178931, train_loss = 1.477, time/batch = 0.114
Sequence 1756266/3263200 (epoch 53), batch 179031, train_loss = 1.054, time/batch = 0.114
Sequence 1757266/3263200 (epoch 53), batch 179131, train_loss = 1.269, time/batch = 0.112
Sequence 1758246/3263200 (epoch 53), batch 179231, train_loss = 1.154, time/batch = 0.113
Sequence 1759216/3263200 (epoch 53), batch 179331, train_loss = 1.640, time/batch = 0.118
Sequence 1760196/3263200 (epoch 53), batch 179431, train_loss = 0.923, time/batch = 0.116
Sequence 1761176/3263200 (epoch 53), batch 179531, train_loss = 1.135, time/batch = 0.114
Epoch 53 completed, average train loss 1.239161, learning rate 0.0010
Sequence 1762158/3263200 (epoch 54), batch 179631, train_loss = 0.835, time/batch = 0.112
Shuffling training data...
Sequence 1763138/3263200 (epoch 54), batch 179731, train_loss = 1.240, time/batch = 0.116
Seq

Sequence 1840682/3263200 (epoch 56), batch 187633, train_loss = 0.883, time/batch = 0.113
Sequence 1841662/3263200 (epoch 56), batch 187733, train_loss = 1.281, time/batch = 0.113
Sequence 1842662/3263200 (epoch 56), batch 187833, train_loss = 0.834, time/batch = 0.117
Sequence 1843652/3263200 (epoch 56), batch 187933, train_loss = 0.935, time/batch = 0.113
Sequence 1844642/3263200 (epoch 56), batch 188033, train_loss = 1.368, time/batch = 0.117
Sequence 1845602/3263200 (epoch 56), batch 188133, train_loss = 1.049, time/batch = 0.114
Sequence 1846592/3263200 (epoch 56), batch 188233, train_loss = 1.008, time/batch = 0.116
Sequence 1847582/3263200 (epoch 56), batch 188333, train_loss = 0.964, time/batch = 0.116
Sequence 1848552/3263200 (epoch 56), batch 188433, train_loss = 1.202, time/batch = 0.114
Sequence 1849522/3263200 (epoch 56), batch 188533, train_loss = 1.150, time/batch = 0.114
Sequence 1850512/3263200 (epoch 56), batch 188633, train_loss = 0.861, time/batch = 0.116
Sequence 1

Sequence 1926978/3263200 (epoch 59), batch 196434, train_loss = 1.412, time/batch = 0.120
Sequence 1927958/3263200 (epoch 59), batch 196534, train_loss = 1.250, time/batch = 0.117
Sequence 1928908/3263200 (epoch 59), batch 196634, train_loss = 2.248, time/batch = 0.121
Sequence 1929898/3263200 (epoch 59), batch 196734, train_loss = 1.113, time/batch = 0.120
Sequence 1930888/3263200 (epoch 59), batch 196834, train_loss = 0.857, time/batch = 0.119
Sequence 1931868/3263200 (epoch 59), batch 196934, train_loss = 1.258, time/batch = 0.114
Sequence 1932858/3263200 (epoch 59), batch 197034, train_loss = 1.166, time/batch = 0.117
Sequence 1933848/3263200 (epoch 59), batch 197134, train_loss = 1.693, time/batch = 0.120
Sequence 1934848/3263200 (epoch 59), batch 197234, train_loss = 0.794, time/batch = 0.117
Sequence 1935838/3263200 (epoch 59), batch 197334, train_loss = 1.167, time/batch = 0.118
Sequence 1936818/3263200 (epoch 59), batch 197434, train_loss = 1.435, time/batch = 0.118
Sequence 1

Sequence 2014242/3263200 (epoch 61), batch 205335, train_loss = 1.491, time/batch = 0.117
Sequence 2015222/3263200 (epoch 61), batch 205435, train_loss = 1.200, time/batch = 0.112
Sequence 2016202/3263200 (epoch 61), batch 205535, train_loss = 1.033, time/batch = 0.116
Sequence 2017192/3263200 (epoch 61), batch 205635, train_loss = 1.713, time/batch = 0.118
Sequence 2018182/3263200 (epoch 61), batch 205735, train_loss = 1.091, time/batch = 0.119
Sequence 2019162/3263200 (epoch 61), batch 205835, train_loss = 1.490, time/batch = 0.120
Sequence 2020152/3263200 (epoch 61), batch 205935, train_loss = 1.449, time/batch = 0.119
Sequence 2021152/3263200 (epoch 61), batch 206035, train_loss = 1.371, time/batch = 0.116
Sequence 2022142/3263200 (epoch 61), batch 206135, train_loss = 1.202, time/batch = 0.116
Sequence 2023112/3263200 (epoch 61), batch 206235, train_loss = 1.274, time/batch = 0.121
Epoch 61 completed, average train loss 1.214059, learning rate 0.0010
Shuffling training data...
Seq

Sequence 2100668/3263200 (epoch 64), batch 214137, train_loss = 1.027, time/batch = 0.117
Sequence 2101628/3263200 (epoch 64), batch 214237, train_loss = 1.305, time/batch = 0.117
Sequence 2102598/3263200 (epoch 64), batch 214337, train_loss = 0.897, time/batch = 0.118
Sequence 2103568/3263200 (epoch 64), batch 214437, train_loss = 0.987, time/batch = 0.120
Sequence 2104538/3263200 (epoch 64), batch 214537, train_loss = 1.111, time/batch = 0.116
Sequence 2105528/3263200 (epoch 64), batch 214637, train_loss = 1.646, time/batch = 0.119
Sequence 2106508/3263200 (epoch 64), batch 214737, train_loss = 0.979, time/batch = 0.119
Sequence 2107498/3263200 (epoch 64), batch 214837, train_loss = 1.294, time/batch = 0.121
Sequence 2108468/3263200 (epoch 64), batch 214937, train_loss = 0.965, time/batch = 0.118
Sequence 2109448/3263200 (epoch 64), batch 215037, train_loss = 0.869, time/batch = 0.120
Sequence 2110438/3263200 (epoch 64), batch 215137, train_loss = 1.231, time/batch = 0.120
Sequence 2

Shuffling training data...
Sequence 2186984/3263200 (epoch 67), batch 222938, train_loss = 1.003, time/batch = 0.119
Sequence 2187954/3263200 (epoch 67), batch 223038, train_loss = 1.105, time/batch = 0.117
Sequence 2188944/3263200 (epoch 67), batch 223138, train_loss = 1.183, time/batch = 0.120
Sequence 2189924/3263200 (epoch 67), batch 223238, train_loss = 0.939, time/batch = 0.122
Sequence 2190904/3263200 (epoch 67), batch 223338, train_loss = 1.465, time/batch = 0.119
Sequence 2191864/3263200 (epoch 67), batch 223438, train_loss = 0.782, time/batch = 0.118
Sequence 2192834/3263200 (epoch 67), batch 223538, train_loss = 0.818, time/batch = 0.120
Sequence 2193814/3263200 (epoch 67), batch 223638, train_loss = 0.999, time/batch = 0.121
Sequence 2194804/3263200 (epoch 67), batch 223738, train_loss = 0.980, time/batch = 0.118
Sequence 2195804/3263200 (epoch 67), batch 223838, train_loss = 1.501, time/batch = 0.120
Sequence 2196774/3263200 (epoch 67), batch 223938, train_loss = 1.386, ti

Sequence 2274298/3263200 (epoch 69), batch 231842, train_loss = 0.895, time/batch = 0.118
Sequence 2275278/3263200 (epoch 69), batch 231942, train_loss = 0.997, time/batch = 0.121
Sequence 2276258/3263200 (epoch 69), batch 232042, train_loss = 1.029, time/batch = 0.119
Sequence 2277238/3263200 (epoch 69), batch 232143, train_loss = 1.316, time/batch = 0.119
Sequence 2278228/3263200 (epoch 69), batch 232243, train_loss = 0.985, time/batch = 0.117
Sequence 2279208/3263200 (epoch 69), batch 232343, train_loss = 1.538, time/batch = 0.120
Sequence 2280178/3263200 (epoch 69), batch 232443, train_loss = 0.756, time/batch = 0.117
Sequence 2281158/3263200 (epoch 69), batch 232543, train_loss = 0.694, time/batch = 0.118
Sequence 2282158/3263200 (epoch 69), batch 232643, train_loss = 1.924, time/batch = 0.119
Sequence 2283148/3263200 (epoch 69), batch 232743, train_loss = 1.814, time/batch = 0.118
Sequence 2284128/3263200 (epoch 69), batch 232843, train_loss = 1.210, time/batch = 0.115
Epoch 69 c

Sequence 2360644/3263200 (epoch 72), batch 240645, train_loss = 1.029, time/batch = 0.118
Sequence 2361624/3263200 (epoch 72), batch 240745, train_loss = 1.049, time/batch = 0.118
Sequence 2362574/3263200 (epoch 72), batch 240845, train_loss = 1.247, time/batch = 0.117
Sequence 2363564/3263200 (epoch 72), batch 240945, train_loss = 1.313, time/batch = 0.117
Sequence 2364534/3263200 (epoch 72), batch 241045, train_loss = 1.153, time/batch = 0.118
Sequence 2365514/3263200 (epoch 72), batch 241145, train_loss = 1.440, time/batch = 0.120
Sequence 2366494/3263200 (epoch 72), batch 241245, train_loss = 1.035, time/batch = 0.120
Sequence 2367484/3263200 (epoch 72), batch 241345, train_loss = 1.774, time/batch = 0.119
Sequence 2368454/3263200 (epoch 72), batch 241445, train_loss = 1.356, time/batch = 0.119
Sequence 2369444/3263200 (epoch 72), batch 241545, train_loss = 1.350, time/batch = 0.119
Sequence 2370434/3263200 (epoch 72), batch 241645, train_loss = 1.132, time/batch = 0.118
Sequence 2

Epoch 74 completed, average train loss 1.180459, learning rate 0.0010
model saved.
Shuffling training data...
Sequence 2447990/3263200 (epoch 75), batch 249547, train_loss = 1.503, time/batch = 0.121
Sequence 2448950/3263200 (epoch 75), batch 249647, train_loss = 0.682, time/batch = 0.120
Sequence 2449930/3263200 (epoch 75), batch 249747, train_loss = 1.371, time/batch = 0.117
Sequence 2450910/3263200 (epoch 75), batch 249847, train_loss = 1.182, time/batch = 0.117
Sequence 2451880/3263200 (epoch 75), batch 249947, train_loss = 0.820, time/batch = 0.120
Sequence 2452870/3263200 (epoch 75), batch 250047, train_loss = 1.189, time/batch = 0.119
Sequence 2453840/3263200 (epoch 75), batch 250147, train_loss = 1.317, time/batch = 0.118
Sequence 2454800/3263200 (epoch 75), batch 250247, train_loss = 1.404, time/batch = 0.118
Sequence 2455750/3263200 (epoch 75), batch 250347, train_loss = 0.821, time/batch = 0.118
Sequence 2456720/3263200 (epoch 75), batch 250447, train_loss = 1.230, time/batc

Sequence 2534304/3263200 (epoch 77), batch 258348, train_loss = 1.044, time/batch = 0.118
Sequence 2535294/3263200 (epoch 77), batch 258448, train_loss = 1.066, time/batch = 0.118
Sequence 2536264/3263200 (epoch 77), batch 258548, train_loss = 1.570, time/batch = 0.118
Sequence 2537244/3263200 (epoch 77), batch 258648, train_loss = 0.988, time/batch = 0.119
Sequence 2538204/3263200 (epoch 77), batch 258748, train_loss = 1.654, time/batch = 0.120
Sequence 2539184/3263200 (epoch 77), batch 258848, train_loss = 0.931, time/batch = 0.119
Sequence 2540154/3263200 (epoch 77), batch 258948, train_loss = 0.767, time/batch = 0.113
Sequence 2541144/3263200 (epoch 77), batch 259048, train_loss = 0.838, time/batch = 0.111
Sequence 2542134/3263200 (epoch 77), batch 259148, train_loss = 0.776, time/batch = 0.117
Sequence 2543114/3263200 (epoch 77), batch 259248, train_loss = 0.826, time/batch = 0.118
Sequence 2544104/3263200 (epoch 77), batch 259348, train_loss = 1.029, time/batch = 0.119
Sequence 2

Sequence 2620630/3263200 (epoch 80), batch 267149, train_loss = 1.039, time/batch = 0.118
Sequence 2621610/3263200 (epoch 80), batch 267249, train_loss = 1.364, time/batch = 0.121
Sequence 2622580/3263200 (epoch 80), batch 267349, train_loss = 0.682, time/batch = 0.118
Sequence 2623550/3263200 (epoch 80), batch 267449, train_loss = 0.917, time/batch = 0.111
Sequence 2624540/3263200 (epoch 80), batch 267549, train_loss = 1.728, time/batch = 0.111
Sequence 2625530/3263200 (epoch 80), batch 267649, train_loss = 1.240, time/batch = 0.117
Sequence 2626500/3263200 (epoch 80), batch 267749, train_loss = 1.120, time/batch = 0.117
Sequence 2627500/3263200 (epoch 80), batch 267849, train_loss = 1.436, time/batch = 0.119
Sequence 2628490/3263200 (epoch 80), batch 267949, train_loss = 1.001, time/batch = 0.119
Sequence 2629470/3263200 (epoch 80), batch 268049, train_loss = 0.898, time/batch = 0.119
Sequence 2630430/3263200 (epoch 80), batch 268149, train_loss = 1.174, time/batch = 0.120
Sequence 2

Sequence 2707954/3263200 (epoch 82), batch 276049, train_loss = 0.886, time/batch = 0.122
Epoch 82 completed, average train loss 1.164223, learning rate 0.0010
Shuffling training data...
Sequence 2708936/3263200 (epoch 83), batch 276149, train_loss = 1.145, time/batch = 0.117
Sequence 2709926/3263200 (epoch 83), batch 276249, train_loss = 1.189, time/batch = 0.119
Sequence 2710916/3263200 (epoch 83), batch 276349, train_loss = 0.874, time/batch = 0.118
Sequence 2711886/3263200 (epoch 83), batch 276449, train_loss = 0.964, time/batch = 0.120
Sequence 2712856/3263200 (epoch 83), batch 276549, train_loss = 1.266, time/batch = 0.120
Sequence 2713836/3263200 (epoch 83), batch 276649, train_loss = 1.102, time/batch = 0.118
Sequence 2714816/3263200 (epoch 83), batch 276749, train_loss = 0.971, time/batch = 0.120
Sequence 2715796/3263200 (epoch 83), batch 276849, train_loss = 1.128, time/batch = 0.119
Sequence 2716786/3263200 (epoch 83), batch 276949, train_loss = 0.853, time/batch = 0.118
Seq

Sequence 2794300/3263200 (epoch 85), batch 284852, train_loss = 1.257, time/batch = 0.118
Sequence 2795290/3263200 (epoch 85), batch 284952, train_loss = 1.003, time/batch = 0.120
Sequence 2796280/3263200 (epoch 85), batch 285052, train_loss = 1.066, time/batch = 0.118
Sequence 2797260/3263200 (epoch 85), batch 285152, train_loss = 0.971, time/batch = 0.122
Sequence 2798250/3263200 (epoch 85), batch 285252, train_loss = 1.134, time/batch = 0.120
Sequence 2799220/3263200 (epoch 85), batch 285352, train_loss = 1.207, time/batch = 0.120
Sequence 2800180/3263200 (epoch 85), batch 285453, train_loss = 1.079, time/batch = 0.117
Sequence 2801170/3263200 (epoch 85), batch 285553, train_loss = 1.183, time/batch = 0.113
Sequence 2802150/3263200 (epoch 85), batch 285653, train_loss = 0.744, time/batch = 0.110
Sequence 2803120/3263200 (epoch 85), batch 285753, train_loss = 1.229, time/batch = 0.116
Sequence 2804120/3263200 (epoch 85), batch 285853, train_loss = 1.418, time/batch = 0.120
Sequence 2

Sequence 2880666/3263200 (epoch 88), batch 293654, train_loss = 0.823, time/batch = 0.117
Sequence 2881656/3263200 (epoch 88), batch 293754, train_loss = 0.774, time/batch = 0.119
Sequence 2882646/3263200 (epoch 88), batch 293854, train_loss = 1.170, time/batch = 0.120
Sequence 2883646/3263200 (epoch 88), batch 293954, train_loss = 1.365, time/batch = 0.119
Sequence 2884636/3263200 (epoch 88), batch 294054, train_loss = 1.106, time/batch = 0.118
Sequence 2885626/3263200 (epoch 88), batch 294154, train_loss = 1.044, time/batch = 0.122
Sequence 2886616/3263200 (epoch 88), batch 294254, train_loss = 2.150, time/batch = 0.120
Sequence 2887606/3263200 (epoch 88), batch 294354, train_loss = 0.897, time/batch = 0.119
Sequence 2888596/3263200 (epoch 88), batch 294454, train_loss = 1.293, time/batch = 0.117
Sequence 2889576/3263200 (epoch 88), batch 294554, train_loss = 1.001, time/batch = 0.120
Sequence 2890536/3263200 (epoch 88), batch 294654, train_loss = 1.433, time/batch = 0.120
Sequence 2

Sequence 2967930/3263200 (epoch 90), batch 302555, train_loss = 1.195, time/batch = 0.117
Sequence 2968930/3263200 (epoch 90), batch 302655, train_loss = 0.902, time/batch = 0.118
Epoch 90 completed, average train loss 1.150477, learning rate 0.0010
Shuffling training data...
Sequence 2969912/3263200 (epoch 91), batch 302755, train_loss = 0.852, time/batch = 0.111
Sequence 2970882/3263200 (epoch 91), batch 302855, train_loss = 0.951, time/batch = 0.119
Sequence 2971852/3263200 (epoch 91), batch 302955, train_loss = 1.016, time/batch = 0.122
Sequence 2972852/3263200 (epoch 91), batch 303055, train_loss = 1.175, time/batch = 0.118
Sequence 2973802/3263200 (epoch 91), batch 303155, train_loss = 1.258, time/batch = 0.120
Sequence 2974782/3263200 (epoch 91), batch 303255, train_loss = 1.230, time/batch = 0.116
Sequence 2975772/3263200 (epoch 91), batch 303355, train_loss = 1.263, time/batch = 0.120
Sequence 2976772/3263200 (epoch 91), batch 303455, train_loss = 1.645, time/batch = 0.114
Seq

Sequence 3054316/3263200 (epoch 93), batch 311359, train_loss = 1.206, time/batch = 0.116
Sequence 3055296/3263200 (epoch 93), batch 311459, train_loss = 1.766, time/batch = 0.122
Sequence 3056276/3263200 (epoch 93), batch 311559, train_loss = 1.175, time/batch = 0.121
Sequence 3057256/3263200 (epoch 93), batch 311659, train_loss = 1.312, time/batch = 0.122
Sequence 3058236/3263200 (epoch 93), batch 311759, train_loss = 1.000, time/batch = 0.119
Sequence 3059206/3263200 (epoch 93), batch 311859, train_loss = 0.948, time/batch = 0.119
Sequence 3060196/3263200 (epoch 93), batch 311959, train_loss = 1.104, time/batch = 0.124
Sequence 3061176/3263200 (epoch 93), batch 312059, train_loss = 1.632, time/batch = 0.121
Sequence 3062166/3263200 (epoch 93), batch 312159, train_loss = 0.802, time/batch = 0.120
Sequence 3063156/3263200 (epoch 93), batch 312259, train_loss = 1.175, time/batch = 0.119
Sequence 3064126/3263200 (epoch 93), batch 312359, train_loss = 1.153, time/batch = 0.119
Sequence 3

Sequence 3140642/3263200 (epoch 96), batch 320159, train_loss = 1.102, time/batch = 0.119
Sequence 3141612/3263200 (epoch 96), batch 320259, train_loss = 1.155, time/batch = 0.122
Sequence 3142582/3263200 (epoch 96), batch 320359, train_loss = 1.372, time/batch = 0.121
Sequence 3143572/3263200 (epoch 96), batch 320459, train_loss = 1.303, time/batch = 0.119
Sequence 3144522/3263200 (epoch 96), batch 320559, train_loss = 0.987, time/batch = 0.122
Sequence 3145492/3263200 (epoch 96), batch 320659, train_loss = 1.150, time/batch = 0.121
Sequence 3146482/3263200 (epoch 96), batch 320759, train_loss = 1.345, time/batch = 0.120
Sequence 3147472/3263200 (epoch 96), batch 320859, train_loss = 1.228, time/batch = 0.122
Sequence 3148462/3263200 (epoch 96), batch 320959, train_loss = 1.672, time/batch = 0.122
Sequence 3149432/3263200 (epoch 96), batch 321059, train_loss = 0.917, time/batch = 0.121
Sequence 3150412/3263200 (epoch 96), batch 321159, train_loss = 1.150, time/batch = 0.120
Sequence 3

Sequence 3227966/3263200 (epoch 98), batch 329060, train_loss = 1.119, time/batch = 0.121
Sequence 3228936/3263200 (epoch 98), batch 329160, train_loss = 1.022, time/batch = 0.121
Sequence 3229916/3263200 (epoch 98), batch 329260, train_loss = 1.000, time/batch = 0.116
Epoch 98 completed, average train loss 1.137398, learning rate 0.0010
Shuffling training data...
Sequence 3230908/3263200 (epoch 99), batch 329360, train_loss = 0.836, time/batch = 0.116
Sequence 3231898/3263200 (epoch 99), batch 329460, train_loss = 1.159, time/batch = 0.117
Sequence 3232878/3263200 (epoch 99), batch 329560, train_loss = 1.052, time/batch = 0.118
Sequence 3233818/3263200 (epoch 99), batch 329660, train_loss = 1.320, time/batch = 0.116
Sequence 3234788/3263200 (epoch 99), batch 329760, train_loss = 0.895, time/batch = 0.121
Sequence 3235778/3263200 (epoch 99), batch 329860, train_loss = 0.943, time/batch = 0.120
Sequence 3236778/3263200 (epoch 99), batch 329960, train_loss = 1.120, time/batch = 0.122
Seq

## 2. train method2

In [7]:
parser = argparse.ArgumentParser()
parser.add_argument('--dim_rec', type=int, default=128,
                     help='size of RNN hidden state')
parser.add_argument('--num_layers', type=int, default=2,
                     help='number of layers in the RNN. ')
parser.add_argument('--batch_size', type=int, default=10,
                     help='minibatch size')
parser.add_argument('--num_epochs', type=int, default=200,
                     help='number of epochs')
parser.add_argument('--save_every', type=int, default=10,
                     help='save frequency by epoches')
parser.add_argument('--model_dir', type=str, default='checkpoints',
                     help='directory to save model to')
parser.add_argument('--summary_dir', type=str, default='summary',
                     help='directory to save tensorboard info')
parser.add_argument('--max_grad_norm', type=float, default=1.,
                     help='clip gradients at this value')
parser.add_argument('--learning_rate', type=float, default=0.001,
                     help='learning rate')
parser.add_argument('--decay_rate', type=float, default=1.0,
                     help='decay rate for the optimizer')
parser.add_argument('--num_mixture', type=int, default=2,
                     help='number of gaussian mixtures')
parser.add_argument('--data_scale', type=float, default=1000,
                     help='factor to scale raw data down by')
parser.add_argument('--load_model', type=str, default=None,
                     help='Reload a model checkpoint and restore training.' )
parser.add_argument('--bptt_length', type=int, default=120,
                     help='How many steps should the gradients pass back.' )
parser.add_argument('--loss_form', type=str, default='mse',
                     help='mse / gmm' )
parser.add_argument('--constraint_factor', type=float, default=0.,
                     help='the weight for constraint term in the cost function.' )
  
args = parser.parse_args(['--num_epochs','200'])

In [5]:
import sys
#reload(sys)
print(sys.path)

['', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python36.zip', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/lib-dynload', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages/Sphinx-1.5.6-py3.6.egg', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages/svgwrite-1.1.6-py3.6.egg', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages/IPython/extensions', '/home/mye/.ipython']


In [11]:
data_loader = DataLoader(args.batch_size, args.data_scale, args.bptt_length) # batch_size=10, bptt_length=120
data_loader.reset_batch_pointer()
x, y, w, c, lens = data_loader.next_batch()
print(x.shape)
print(y.shape)
print(w.shape)
print(c.shape)
print(args.bptt_length)

Maximal length of the training data is 169
Training data case distribution: [5348, 5108, 4904, 1208, 1260, 1224, 916, 1180, 1216, 1044, 876, 456, 1264, 992, 1080, 932, 1008, 420, 1160, 1036]
Validation data case distribution: [70, 67, 65, 15, 17, 16, 12, 15, 16, 14, 12, 6, 17, 13, 14, 13, 14, 5, 15, 13]
Shuffling training data...
Shuffling training data...
(10, 120, 6)
(10, 120, 3)
(10, 120, 1)
(10, 12)
120
