In [1]:
""" The main script for training

requirement: python 2, tensorflow r1.4 or r1.2 
"""
import os
import sys
import time
import math
import argparse
import pickle

import numpy as np
import tensorflow as tf
# from tensorflow.contrib.framework import nest

from data_loader import DataLoader
from model import Model

In [2]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement=True

In [3]:
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--dim_rec', type=int, default=128,
                     help='size of RNN hidden state')
  parser.add_argument('--num_layers', type=int, default=2,
                     help='number of layers in the RNN. ')
  parser.add_argument('--batch_size', type=int, default=10,
                     help='minibatch size')
  parser.add_argument('--num_epochs', type=int, default=200,
                     help='number of epochs')
  parser.add_argument('--save_every', type=int, default=10,
                     help='save frequency by epoches')
  parser.add_argument('--model_dir', type=str, default='checkpoints',
                     help='directory to save model to')
  parser.add_argument('--summary_dir', type=str, default='summary',
                     help='directory to save tensorboard info')
  parser.add_argument('--max_grad_norm', type=float, default=1.,
                     help='clip gradients at this value')
  parser.add_argument('--learning_rate', type=float, default=0.001,
                     help='learning rate')
  parser.add_argument('--decay_rate', type=float, default=1.0,
                     help='decay rate for the optimizer')
  parser.add_argument('--num_mixture', type=int, default=2,
                     help='number of gaussian mixtures')
  parser.add_argument('--data_scale', type=float, default=1000,
                     help='factor to scale raw data down by')
  parser.add_argument('--load_model', type=str, default=None,
                     help='Reload a model checkpoint and restore training.' )
  parser.add_argument('--bptt_length', type=int, default=120,
                     help='How many steps should the gradients pass back.' )
  parser.add_argument('--loss_form', type=str, default='mse',
                     help='mse / gmm' )
  parser.add_argument('--constraint_factor', type=float, default=0.,
                     help='the weight for constraint term in the cost function.' )
  
  args = parser.parse_args(['--num_epochs','200'])

  args.num_epochs = 100
  args.save_every = 5


  train(args)


In [4]:
def train(args):
  data_loader = DataLoader(args.batch_size, args.data_scale, args.bptt_length)
  data_loader.reset_batch_pointer()

  if args.model_dir != '' and not os.path.exists(args.model_dir):
    os.makedirs(args.model_dir)

  with open(os.path.join(args.model_dir, 'config.pkl'), 'wb') as f:
    pickle.dump(args, f)
  print("hyperparam. saved.")

  model = Model(args)

  # training
  with tf.Session(config=config) as sess:

    tf.global_variables_initializer().run()

    saver = tf.train.Saver()
    if args.load_model is not None:
        saver.restore(sess, args.load_model)
        _, ep_start = args.load_model.rsplit("-", 1)
        ep_start = int(ep_start)
        model_steps = int(ep_start * data_loader.num_batches)
    else:
        ep_start = 0
        model_steps = last_model_steps = 0

    last_time = time.time()

    for ep in range(ep_start, args.num_epochs):
      ep_loss = []
      sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** ep))) ###????????

      for i in range(int(data_loader.num_sequences / args.batch_size)):
        idx = ep * data_loader.num_sequences + i * args.batch_size
        start = time.time()
        x, y, w, c, lens = data_loader.next_batch()

        loss_list, model_steps = model.train(
          sess=sess, 
          sequence=x, 
          targets=y, 
          weights=w, 
          conditions=c, 
          subseq_length=args.bptt_length, 
          step_count=model_steps
          )

        ep_loss += loss_list
        ########?????????????????????
        if model_steps - last_model_steps >= 100:
          new_time = time.time()
          print(
            "Sequence %d/%d (epoch %d), batch %d, train_loss = %.3f, time/batch = %.3f" 
            % (
                idx,
                args.num_epochs * data_loader.num_sequences,
                ep,
                model_steps,
                np.mean(loss_list),
                (new_time - last_time) / (model_steps - last_model_steps)
              )
            )
          sys.stdout.flush()
          last_model_steps = model_steps
          last_time = new_time
      print("Epoch %d completed, average train loss %.6f, learning rate %.4f" % (ep, np.mean(ep_loss), args.learning_rate * (args.decay_rate ** ep)))
      sys.stdout.flush()
      if not os.path.isdir(args.model_dir):
        os.makedirs(args.model_dir)
      if (ep+1) % args.save_every == 0:
        checkpoint_path = os.path.join(args.model_dir, 'model.ckpt')
        saver.save(sess, save_path=checkpoint_path, global_step = (ep+1))
        print("model saved.")




In [5]:
#args.num_epochs = 4
#args.save_every = 2

if __name__ == "__main__":
  #with tf.device('/device:GPU:2'):
    main()

Maximal length of the training data is 169
Training data case distribution: [5348, 5108, 4904, 1208, 1260, 1224, 916, 1180, 1216, 1044, 876, 456, 1264, 992, 1080, 932, 1008, 420, 1160, 1036]
Validation data case distribution: [70, 67, 65, 15, 17, 16, 12, 15, 16, 14, 12, 6, 17, 13, 14, 13, 14, 5, 15, 13]
Shuffling training data...
Shuffling training data...
hyperparam. saved.
Number of trainable variables 6
[<tf.Variable 'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0' shape=(134, 512) dtype=float32_ref>, <tf.Variable 'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0' shape=(256, 512) dtype=float32_ref>, <tf.Variable 'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'out_W:0' shape=(128, 3) dtype=float32_ref>, <tf.Variable 'out_b:0' shape=(3,) dtype=float32_ref>]
Sequence 970/3263200 (epoch 0), batch 100, train_loss = 4.186, time/batch = 0

Sequence 83384/3263200 (epoch 2), batch 8502, train_loss = 2.331, time/batch = 0.094
Sequence 84354/3263200 (epoch 2), batch 8602, train_loss = 1.765, time/batch = 0.093
Sequence 85304/3263200 (epoch 2), batch 8702, train_loss = 2.527, time/batch = 0.095
Sequence 86294/3263200 (epoch 2), batch 8802, train_loss = 2.301, time/batch = 0.096
Sequence 87284/3263200 (epoch 2), batch 8902, train_loss = 1.647, time/batch = 0.095
Sequence 88264/3263200 (epoch 2), batch 9002, train_loss = 2.033, time/batch = 0.093
Sequence 89254/3263200 (epoch 2), batch 9102, train_loss = 1.749, time/batch = 0.094
Sequence 90254/3263200 (epoch 2), batch 9202, train_loss = 1.141, time/batch = 0.095
Sequence 91224/3263200 (epoch 2), batch 9302, train_loss = 1.364, time/batch = 0.094
Sequence 92194/3263200 (epoch 2), batch 9402, train_loss = 1.182, time/batch = 0.095
Sequence 93194/3263200 (epoch 2), batch 9502, train_loss = 2.777, time/batch = 0.095
Sequence 94174/3263200 (epoch 2), batch 9602, train_loss = 1.545,

Sequence 173660/3263200 (epoch 5), batch 17704, train_loss = 1.255, time/batch = 0.095
Sequence 174630/3263200 (epoch 5), batch 17804, train_loss = 1.523, time/batch = 0.094
Sequence 175620/3263200 (epoch 5), batch 17904, train_loss = 3.230, time/batch = 0.094
Sequence 176610/3263200 (epoch 5), batch 18004, train_loss = 1.991, time/batch = 0.095
Sequence 177580/3263200 (epoch 5), batch 18104, train_loss = 0.933, time/batch = 0.094
Sequence 178570/3263200 (epoch 5), batch 18204, train_loss = 1.281, time/batch = 0.094
Sequence 179540/3263200 (epoch 5), batch 18304, train_loss = 0.804, time/batch = 0.094
Sequence 180510/3263200 (epoch 5), batch 18404, train_loss = 1.353, time/batch = 0.093
Sequence 181490/3263200 (epoch 5), batch 18504, train_loss = 1.549, time/batch = 0.094
Sequence 182450/3263200 (epoch 5), batch 18604, train_loss = 1.190, time/batch = 0.094
Sequence 183440/3263200 (epoch 5), batch 18704, train_loss = 2.450, time/batch = 0.095
Sequence 184410/3263200 (epoch 5), batch 18

Sequence 262906/3263200 (epoch 8), batch 26804, train_loss = 2.461, time/batch = 0.094
Sequence 263846/3263200 (epoch 8), batch 26904, train_loss = 1.945, time/batch = 0.094
Sequence 264826/3263200 (epoch 8), batch 27004, train_loss = 1.693, time/batch = 0.095
Sequence 265806/3263200 (epoch 8), batch 27104, train_loss = 2.122, time/batch = 0.096
Sequence 266776/3263200 (epoch 8), batch 27204, train_loss = 1.324, time/batch = 0.095
Sequence 267746/3263200 (epoch 8), batch 27304, train_loss = 1.973, time/batch = 0.095
Sequence 268726/3263200 (epoch 8), batch 27404, train_loss = 1.596, time/batch = 0.096
Sequence 269726/3263200 (epoch 8), batch 27504, train_loss = 3.005, time/batch = 0.095
Sequence 270706/3263200 (epoch 8), batch 27604, train_loss = 1.334, time/batch = 0.095
Sequence 271706/3263200 (epoch 8), batch 27704, train_loss = 3.468, time/batch = 0.096
Sequence 272686/3263200 (epoch 8), batch 27804, train_loss = 2.609, time/batch = 0.093
Sequence 273666/3263200 (epoch 8), batch 27

Sequence 353230/3263200 (epoch 10), batch 36004, train_loss = 1.687, time/batch = 0.094
Sequence 354200/3263200 (epoch 10), batch 36104, train_loss = 1.675, time/batch = 0.094
Sequence 355180/3263200 (epoch 10), batch 36204, train_loss = 1.826, time/batch = 0.089
Sequence 356140/3263200 (epoch 10), batch 36304, train_loss = 1.350, time/batch = 0.094
Sequence 357120/3263200 (epoch 10), batch 36404, train_loss = 1.363, time/batch = 0.095
Sequence 358090/3263200 (epoch 10), batch 36504, train_loss = 2.262, time/batch = 0.094
Epoch 10 completed, average train loss 1.572067, learning rate 0.0010
Shuffling training data...
Sequence 359062/3263200 (epoch 11), batch 36604, train_loss = 1.757, time/batch = 0.095
Sequence 360032/3263200 (epoch 11), batch 36704, train_loss = 1.901, time/batch = 0.094
Sequence 360992/3263200 (epoch 11), batch 36804, train_loss = 1.203, time/batch = 0.095
Sequence 361972/3263200 (epoch 11), batch 36904, train_loss = 1.321, time/batch = 0.094
Sequence 362952/3263200

Sequence 441446/3263200 (epoch 13), batch 45007, train_loss = 1.330, time/batch = 0.092
Sequence 442436/3263200 (epoch 13), batch 45107, train_loss = 0.999, time/batch = 0.093
Sequence 443396/3263200 (epoch 13), batch 45207, train_loss = 1.313, time/batch = 0.095
Sequence 444396/3263200 (epoch 13), batch 45307, train_loss = 0.941, time/batch = 0.095
Sequence 445376/3263200 (epoch 13), batch 45407, train_loss = 1.420, time/batch = 0.094
Sequence 446356/3263200 (epoch 13), batch 45507, train_loss = 1.116, time/batch = 0.095
Sequence 447346/3263200 (epoch 13), batch 45607, train_loss = 2.006, time/batch = 0.093
Sequence 448336/3263200 (epoch 13), batch 45707, train_loss = 1.735, time/batch = 0.094
Sequence 449316/3263200 (epoch 13), batch 45807, train_loss = 1.846, time/batch = 0.094
Sequence 450306/3263200 (epoch 13), batch 45907, train_loss = 2.303, time/batch = 0.096
Sequence 451286/3263200 (epoch 13), batch 46007, train_loss = 1.136, time/batch = 0.094
Sequence 452276/3263200 (epoch 1

Sequence 529772/3263200 (epoch 16), batch 54009, train_loss = 1.468, time/batch = 0.093
Sequence 530752/3263200 (epoch 16), batch 54109, train_loss = 1.046, time/batch = 0.094
Sequence 531722/3263200 (epoch 16), batch 54209, train_loss = 1.164, time/batch = 0.095
Sequence 532722/3263200 (epoch 16), batch 54309, train_loss = 0.699, time/batch = 0.094
Sequence 533702/3263200 (epoch 16), batch 54409, train_loss = 1.507, time/batch = 0.094
Sequence 534702/3263200 (epoch 16), batch 54509, train_loss = 1.674, time/batch = 0.093
Sequence 535692/3263200 (epoch 16), batch 54609, train_loss = 1.271, time/batch = 0.093
Sequence 536682/3263200 (epoch 16), batch 54709, train_loss = 1.549, time/batch = 0.095
Sequence 537662/3263200 (epoch 16), batch 54809, train_loss = 2.663, time/batch = 0.093
Sequence 538642/3263200 (epoch 16), batch 54909, train_loss = 1.316, time/batch = 0.095
Sequence 539622/3263200 (epoch 16), batch 55009, train_loss = 1.663, time/batch = 0.094
Sequence 540592/3263200 (epoch 1

Sequence 619056/3263200 (epoch 18), batch 63110, train_loss = 2.273, time/batch = 0.091
Epoch 18 completed, average train loss 1.448525, learning rate 0.0010
Shuffling training data...
Sequence 620048/3263200 (epoch 19), batch 63210, train_loss = 1.112, time/batch = 0.094
Sequence 620988/3263200 (epoch 19), batch 63310, train_loss = 1.464, time/batch = 0.093
Sequence 621968/3263200 (epoch 19), batch 63411, train_loss = 0.565, time/batch = 0.093
Sequence 622948/3263200 (epoch 19), batch 63511, train_loss = 0.789, time/batch = 0.092
Sequence 623938/3263200 (epoch 19), batch 63611, train_loss = 1.848, time/batch = 0.093
Sequence 624908/3263200 (epoch 19), batch 63711, train_loss = 1.875, time/batch = 0.093
Sequence 625908/3263200 (epoch 19), batch 63811, train_loss = 1.041, time/batch = 0.091
Sequence 626888/3263200 (epoch 19), batch 63911, train_loss = 1.886, time/batch = 0.092
Sequence 627868/3263200 (epoch 19), batch 64011, train_loss = 1.167, time/batch = 0.092
Sequence 628838/3263200

Sequence 707382/3263200 (epoch 21), batch 72113, train_loss = 1.160, time/batch = 0.093
Sequence 708372/3263200 (epoch 21), batch 72213, train_loss = 1.928, time/batch = 0.093
Sequence 709342/3263200 (epoch 21), batch 72313, train_loss = 1.837, time/batch = 0.091
Sequence 710322/3263200 (epoch 21), batch 72413, train_loss = 1.608, time/batch = 0.094
Sequence 711302/3263200 (epoch 21), batch 72513, train_loss = 1.444, time/batch = 0.093
Sequence 712282/3263200 (epoch 21), batch 72613, train_loss = 1.125, time/batch = 0.092
Sequence 713262/3263200 (epoch 21), batch 72713, train_loss = 2.246, time/batch = 0.092
Sequence 714252/3263200 (epoch 21), batch 72813, train_loss = 3.152, time/batch = 0.092
Sequence 715232/3263200 (epoch 21), batch 72913, train_loss = 1.245, time/batch = 0.093
Sequence 716232/3263200 (epoch 21), batch 73013, train_loss = 1.061, time/batch = 0.094
Sequence 717192/3263200 (epoch 21), batch 73113, train_loss = 1.678, time/batch = 0.093
Epoch 21 completed, average trai

Sequence 795668/3263200 (epoch 24), batch 81115, train_loss = 0.876, time/batch = 0.092
Sequence 796658/3263200 (epoch 24), batch 81215, train_loss = 1.386, time/batch = 0.091
Sequence 797658/3263200 (epoch 24), batch 81315, train_loss = 1.259, time/batch = 0.093
Sequence 798648/3263200 (epoch 24), batch 81415, train_loss = 1.135, time/batch = 0.092
Sequence 799638/3263200 (epoch 24), batch 81515, train_loss = 1.391, time/batch = 0.093
Sequence 800618/3263200 (epoch 24), batch 81615, train_loss = 1.063, time/batch = 0.092
Sequence 801608/3263200 (epoch 24), batch 81715, train_loss = 1.470, time/batch = 0.092
Sequence 802588/3263200 (epoch 24), batch 81815, train_loss = 1.782, time/batch = 0.092
Sequence 803578/3263200 (epoch 24), batch 81915, train_loss = 1.777, time/batch = 0.092
Sequence 804548/3263200 (epoch 24), batch 82015, train_loss = 1.720, time/batch = 0.092
Sequence 805528/3263200 (epoch 24), batch 82115, train_loss = 1.717, time/batch = 0.091
Sequence 806508/3263200 (epoch 2

Sequence 883994/3263200 (epoch 27), batch 90117, train_loss = 1.281, time/batch = 0.101
Sequence 884974/3263200 (epoch 27), batch 90217, train_loss = 1.006, time/batch = 0.102
Sequence 885964/3263200 (epoch 27), batch 90317, train_loss = 1.470, time/batch = 0.102
Sequence 886934/3263200 (epoch 27), batch 90417, train_loss = 1.229, time/batch = 0.104
Sequence 887874/3263200 (epoch 27), batch 90517, train_loss = 1.323, time/batch = 0.104
Sequence 888864/3263200 (epoch 27), batch 90617, train_loss = 0.865, time/batch = 0.102
Sequence 889844/3263200 (epoch 27), batch 90717, train_loss = 1.419, time/batch = 0.100
Sequence 890824/3263200 (epoch 27), batch 90817, train_loss = 1.102, time/batch = 0.095
Sequence 891764/3263200 (epoch 27), batch 90917, train_loss = 1.293, time/batch = 0.094
Sequence 892754/3263200 (epoch 27), batch 91017, train_loss = 1.377, time/batch = 0.095
Sequence 893704/3263200 (epoch 27), batch 91117, train_loss = 1.289, time/batch = 0.102
Sequence 894704/3263200 (epoch 2

Sequence 973248/3263200 (epoch 29), batch 99220, train_loss = 0.943, time/batch = 0.093
Sequence 974218/3263200 (epoch 29), batch 99320, train_loss = 1.374, time/batch = 0.101
Sequence 975208/3263200 (epoch 29), batch 99420, train_loss = 1.399, time/batch = 0.098
Sequence 976188/3263200 (epoch 29), batch 99520, train_loss = 1.436, time/batch = 0.093
Sequence 977168/3263200 (epoch 29), batch 99620, train_loss = 1.354, time/batch = 0.095
Sequence 978168/3263200 (epoch 29), batch 99720, train_loss = 1.005, time/batch = 0.095
Epoch 29 completed, average train loss 1.355342, learning rate 0.0010
model saved.
Shuffling training data...
Sequence 979160/3263200 (epoch 30), batch 99820, train_loss = 1.615, time/batch = 0.096
Sequence 980140/3263200 (epoch 30), batch 99920, train_loss = 1.065, time/batch = 0.094
Sequence 981120/3263200 (epoch 30), batch 100020, train_loss = 0.810, time/batch = 0.095
Sequence 982080/3263200 (epoch 30), batch 100120, train_loss = 1.636, time/batch = 0.093
Sequence

Sequence 1060614/3263200 (epoch 32), batch 108121, train_loss = 1.316, time/batch = 0.093
Sequence 1061614/3263200 (epoch 32), batch 108221, train_loss = 0.923, time/batch = 0.093
Sequence 1062604/3263200 (epoch 32), batch 108321, train_loss = 1.491, time/batch = 0.093
Sequence 1063584/3263200 (epoch 32), batch 108421, train_loss = 1.235, time/batch = 0.092
Sequence 1064564/3263200 (epoch 32), batch 108521, train_loss = 1.207, time/batch = 0.092
Sequence 1065554/3263200 (epoch 32), batch 108621, train_loss = 1.114, time/batch = 0.093
Sequence 1066514/3263200 (epoch 32), batch 108721, train_loss = 1.018, time/batch = 0.092
Sequence 1067504/3263200 (epoch 32), batch 108821, train_loss = 1.626, time/batch = 0.093
Sequence 1068484/3263200 (epoch 32), batch 108921, train_loss = 1.484, time/batch = 0.093
Sequence 1069474/3263200 (epoch 32), batch 109021, train_loss = 0.863, time/batch = 0.093
Sequence 1070424/3263200 (epoch 32), batch 109121, train_loss = 1.375, time/batch = 0.093
Sequence 1

Sequence 1146880/3263200 (epoch 35), batch 116921, train_loss = 0.936, time/batch = 0.092
Sequence 1147870/3263200 (epoch 35), batch 117021, train_loss = 1.367, time/batch = 0.093
Sequence 1148840/3263200 (epoch 35), batch 117121, train_loss = 1.904, time/batch = 0.093
Sequence 1149840/3263200 (epoch 35), batch 117221, train_loss = 1.548, time/batch = 0.093
Sequence 1150840/3263200 (epoch 35), batch 117321, train_loss = 1.037, time/batch = 0.091
Sequence 1151810/3263200 (epoch 35), batch 117421, train_loss = 1.352, time/batch = 0.092
Sequence 1152790/3263200 (epoch 35), batch 117521, train_loss = 1.630, time/batch = 0.092
Sequence 1153790/3263200 (epoch 35), batch 117621, train_loss = 1.798, time/batch = 0.095
Sequence 1154750/3263200 (epoch 35), batch 117721, train_loss = 1.025, time/batch = 0.092
Sequence 1155730/3263200 (epoch 35), batch 117821, train_loss = 1.294, time/batch = 0.092
Sequence 1156730/3263200 (epoch 35), batch 117921, train_loss = 0.901, time/batch = 0.093
Sequence 1

Sequence 1234224/3263200 (epoch 37), batch 125823, train_loss = 1.043, time/batch = 0.093
Sequence 1235214/3263200 (epoch 37), batch 125923, train_loss = 1.354, time/batch = 0.092
Sequence 1236174/3263200 (epoch 37), batch 126023, train_loss = 1.662, time/batch = 0.093
Sequence 1237154/3263200 (epoch 37), batch 126123, train_loss = 0.996, time/batch = 0.092
Sequence 1238154/3263200 (epoch 37), batch 126223, train_loss = 1.141, time/batch = 0.093
Sequence 1239144/3263200 (epoch 37), batch 126323, train_loss = 1.317, time/batch = 0.094
Epoch 37 completed, average train loss 1.307795, learning rate 0.0010
Shuffling training data...
Sequence 1240116/3263200 (epoch 38), batch 126423, train_loss = 1.774, time/batch = 0.093
Sequence 1241066/3263200 (epoch 38), batch 126523, train_loss = 1.199, time/batch = 0.092
Sequence 1242046/3263200 (epoch 38), batch 126623, train_loss = 1.130, time/batch = 0.093
Sequence 1243026/3263200 (epoch 38), batch 126723, train_loss = 0.882, time/batch = 0.091
Seq

Sequence 1320570/3263200 (epoch 40), batch 134623, train_loss = 0.778, time/batch = 0.103
Sequence 1321550/3263200 (epoch 40), batch 134723, train_loss = 0.956, time/batch = 0.101
Sequence 1322520/3263200 (epoch 40), batch 134823, train_loss = 1.370, time/batch = 0.105
Sequence 1323510/3263200 (epoch 40), batch 134923, train_loss = 1.026, time/batch = 0.101
Sequence 1324490/3263200 (epoch 40), batch 135023, train_loss = 0.715, time/batch = 0.092
Sequence 1325490/3263200 (epoch 40), batch 135123, train_loss = 1.623, time/batch = 0.092
Sequence 1326480/3263200 (epoch 40), batch 135223, train_loss = 1.752, time/batch = 0.093
Sequence 1327460/3263200 (epoch 40), batch 135323, train_loss = 1.198, time/batch = 0.092
Sequence 1328430/3263200 (epoch 40), batch 135423, train_loss = 1.411, time/batch = 0.093
Sequence 1329420/3263200 (epoch 40), batch 135523, train_loss = 1.242, time/batch = 0.092
Sequence 1330400/3263200 (epoch 40), batch 135623, train_loss = 1.502, time/batch = 0.092
Sequence 1

Sequence 1406876/3263200 (epoch 43), batch 143423, train_loss = 1.547, time/batch = 0.092
Sequence 1407826/3263200 (epoch 43), batch 143523, train_loss = 0.950, time/batch = 0.094
Sequence 1408816/3263200 (epoch 43), batch 143623, train_loss = 1.246, time/batch = 0.093
Sequence 1409786/3263200 (epoch 43), batch 143723, train_loss = 1.435, time/batch = 0.092
Sequence 1410726/3263200 (epoch 43), batch 143823, train_loss = 1.418, time/batch = 0.093
Sequence 1411706/3263200 (epoch 43), batch 143924, train_loss = 1.342, time/batch = 0.092
Sequence 1412686/3263200 (epoch 43), batch 144024, train_loss = 0.926, time/batch = 0.092
Sequence 1413666/3263200 (epoch 43), batch 144124, train_loss = 1.476, time/batch = 0.092
Sequence 1414636/3263200 (epoch 43), batch 144224, train_loss = 1.985, time/batch = 0.092
Sequence 1415636/3263200 (epoch 43), batch 144324, train_loss = 0.909, time/batch = 0.092
Sequence 1416626/3263200 (epoch 43), batch 144424, train_loss = 1.109, time/batch = 0.092
Sequence 1

Sequence 1494200/3263200 (epoch 45), batch 152326, train_loss = 1.057, time/batch = 0.092
Sequence 1495190/3263200 (epoch 45), batch 152426, train_loss = 1.380, time/batch = 0.092
Sequence 1496170/3263200 (epoch 45), batch 152526, train_loss = 1.637, time/batch = 0.092
Sequence 1497130/3263200 (epoch 45), batch 152626, train_loss = 1.519, time/batch = 0.093
Sequence 1498110/3263200 (epoch 45), batch 152726, train_loss = 0.645, time/batch = 0.092
Sequence 1499100/3263200 (epoch 45), batch 152826, train_loss = 1.555, time/batch = 0.093
Sequence 1500090/3263200 (epoch 45), batch 152926, train_loss = 0.996, time/batch = 0.093
Epoch 45 completed, average train loss 1.273815, learning rate 0.0010
Sequence 1501072/3263200 (epoch 46), batch 153026, train_loss = 0.692, time/batch = 0.092
Shuffling training data...
Sequence 1502062/3263200 (epoch 46), batch 153126, train_loss = 1.961, time/batch = 0.093
Sequence 1503062/3263200 (epoch 46), batch 153226, train_loss = 1.073, time/batch = 0.092
Seq

Sequence 1580536/3263200 (epoch 48), batch 161128, train_loss = 0.897, time/batch = 0.092
Sequence 1581516/3263200 (epoch 48), batch 161228, train_loss = 0.987, time/batch = 0.092
Sequence 1582506/3263200 (epoch 48), batch 161328, train_loss = 1.167, time/batch = 0.092
Sequence 1583496/3263200 (epoch 48), batch 161428, train_loss = 1.039, time/batch = 0.091
Sequence 1584486/3263200 (epoch 48), batch 161528, train_loss = 1.235, time/batch = 0.092
Sequence 1585456/3263200 (epoch 48), batch 161628, train_loss = 0.836, time/batch = 0.092
Sequence 1586416/3263200 (epoch 48), batch 161728, train_loss = 1.240, time/batch = 0.092
Sequence 1587396/3263200 (epoch 48), batch 161828, train_loss = 1.155, time/batch = 0.093
Sequence 1588376/3263200 (epoch 48), batch 161928, train_loss = 1.400, time/batch = 0.093
Sequence 1589346/3263200 (epoch 48), batch 162028, train_loss = 1.513, time/batch = 0.093
Sequence 1590346/3263200 (epoch 48), batch 162128, train_loss = 1.473, time/batch = 0.093
Sequence 1

Sequence 1666882/3263200 (epoch 51), batch 169931, train_loss = 1.424, time/batch = 0.093
Sequence 1667862/3263200 (epoch 51), batch 170031, train_loss = 1.042, time/batch = 0.092
Sequence 1668862/3263200 (epoch 51), batch 170131, train_loss = 2.384, time/batch = 0.093
Sequence 1669852/3263200 (epoch 51), batch 170231, train_loss = 0.814, time/batch = 0.093
Sequence 1670832/3263200 (epoch 51), batch 170331, train_loss = 1.184, time/batch = 0.091
Sequence 1671782/3263200 (epoch 51), batch 170431, train_loss = 1.482, time/batch = 0.092
Sequence 1672762/3263200 (epoch 51), batch 170531, train_loss = 1.264, time/batch = 0.095
Sequence 1673712/3263200 (epoch 51), batch 170631, train_loss = 1.662, time/batch = 0.095
Sequence 1674692/3263200 (epoch 51), batch 170731, train_loss = 0.914, time/batch = 0.093
Sequence 1675672/3263200 (epoch 51), batch 170831, train_loss = 1.027, time/batch = 0.093
Sequence 1676662/3263200 (epoch 51), batch 170931, train_loss = 1.518, time/batch = 0.092
Sequence 1

Sequence 1754196/3263200 (epoch 53), batch 178832, train_loss = 0.881, time/batch = 0.092
Sequence 1755176/3263200 (epoch 53), batch 178932, train_loss = 0.889, time/batch = 0.094
Sequence 1756156/3263200 (epoch 53), batch 179032, train_loss = 2.206, time/batch = 0.093
Sequence 1757146/3263200 (epoch 53), batch 179132, train_loss = 1.387, time/batch = 0.092
Sequence 1758146/3263200 (epoch 53), batch 179232, train_loss = 1.237, time/batch = 0.092
Sequence 1759116/3263200 (epoch 53), batch 179332, train_loss = 0.859, time/batch = 0.092
Sequence 1760076/3263200 (epoch 53), batch 179432, train_loss = 1.649, time/batch = 0.092
Sequence 1761066/3263200 (epoch 53), batch 179532, train_loss = 0.872, time/batch = 0.092
Sequence 1762046/3263200 (epoch 53), batch 179632, train_loss = 1.375, time/batch = 0.093
Epoch 53 completed, average train loss 1.243329, learning rate 0.0010
Shuffling training data...
Sequence 1763038/3263200 (epoch 54), batch 179732, train_loss = 1.116, time/batch = 0.094
Seq

Sequence 1840562/3263200 (epoch 56), batch 187633, train_loss = 1.382, time/batch = 0.092
Sequence 1841532/3263200 (epoch 56), batch 187733, train_loss = 0.840, time/batch = 0.092
Sequence 1842502/3263200 (epoch 56), batch 187833, train_loss = 1.013, time/batch = 0.092
Sequence 1843482/3263200 (epoch 56), batch 187933, train_loss = 1.094, time/batch = 0.092
Sequence 1844452/3263200 (epoch 56), batch 188033, train_loss = 0.792, time/batch = 0.092
Sequence 1845442/3263200 (epoch 56), batch 188133, train_loss = 1.215, time/batch = 0.093
Sequence 1846432/3263200 (epoch 56), batch 188233, train_loss = 1.056, time/batch = 0.093
Sequence 1847402/3263200 (epoch 56), batch 188333, train_loss = 1.292, time/batch = 0.094
Sequence 1848382/3263200 (epoch 56), batch 188433, train_loss = 1.201, time/batch = 0.093
Sequence 1849362/3263200 (epoch 56), batch 188533, train_loss = 1.306, time/batch = 0.094
Sequence 1850332/3263200 (epoch 56), batch 188633, train_loss = 1.325, time/batch = 0.092
Sequence 1

Sequence 1926858/3263200 (epoch 59), batch 196434, train_loss = 1.239, time/batch = 0.093
Sequence 1927828/3263200 (epoch 59), batch 196534, train_loss = 0.823, time/batch = 0.093
Sequence 1928808/3263200 (epoch 59), batch 196634, train_loss = 1.251, time/batch = 0.092
Sequence 1929798/3263200 (epoch 59), batch 196734, train_loss = 0.989, time/batch = 0.094
Sequence 1930728/3263200 (epoch 59), batch 196834, train_loss = 1.242, time/batch = 0.093
Sequence 1931708/3263200 (epoch 59), batch 196934, train_loss = 1.127, time/batch = 0.093
Sequence 1932708/3263200 (epoch 59), batch 197034, train_loss = 1.806, time/batch = 0.092
Sequence 1933708/3263200 (epoch 59), batch 197134, train_loss = 1.102, time/batch = 0.094
Sequence 1934688/3263200 (epoch 59), batch 197234, train_loss = 1.514, time/batch = 0.093
Sequence 1935668/3263200 (epoch 59), batch 197334, train_loss = 1.667, time/batch = 0.092
Sequence 1936648/3263200 (epoch 59), batch 197434, train_loss = 1.591, time/batch = 0.092
Sequence 1

Sequence 2014162/3263200 (epoch 61), batch 205336, train_loss = 1.801, time/batch = 0.093
Sequence 2015132/3263200 (epoch 61), batch 205436, train_loss = 0.809, time/batch = 0.092
Sequence 2016112/3263200 (epoch 61), batch 205536, train_loss = 0.935, time/batch = 0.093
Sequence 2017112/3263200 (epoch 61), batch 205636, train_loss = 1.176, time/batch = 0.093
Sequence 2018102/3263200 (epoch 61), batch 205736, train_loss = 0.951, time/batch = 0.093
Sequence 2019082/3263200 (epoch 61), batch 205836, train_loss = 1.166, time/batch = 0.092
Sequence 2020062/3263200 (epoch 61), batch 205936, train_loss = 1.072, time/batch = 0.094
Sequence 2021062/3263200 (epoch 61), batch 206036, train_loss = 1.394, time/batch = 0.092
Sequence 2022042/3263200 (epoch 61), batch 206136, train_loss = 1.462, time/batch = 0.093
Sequence 2023012/3263200 (epoch 61), batch 206236, train_loss = 1.339, time/batch = 0.092
Epoch 61 completed, average train loss 1.215879, learning rate 0.0010
Shuffling training data...
Seq

Sequence 2100518/3263200 (epoch 64), batch 214136, train_loss = 1.110, time/batch = 0.093
Sequence 2101498/3263200 (epoch 64), batch 214236, train_loss = 1.120, time/batch = 0.093
Sequence 2102458/3263200 (epoch 64), batch 214336, train_loss = 1.708, time/batch = 0.093
Sequence 2103448/3263200 (epoch 64), batch 214436, train_loss = 0.906, time/batch = 0.092
Sequence 2104438/3263200 (epoch 64), batch 214536, train_loss = 1.368, time/batch = 0.092
Sequence 2105418/3263200 (epoch 64), batch 214636, train_loss = 1.195, time/batch = 0.100
Sequence 2106388/3263200 (epoch 64), batch 214736, train_loss = 1.149, time/batch = 0.103
Sequence 2107368/3263200 (epoch 64), batch 214836, train_loss = 1.525, time/batch = 0.100
Sequence 2108328/3263200 (epoch 64), batch 214936, train_loss = 1.585, time/batch = 0.102
Sequence 2109288/3263200 (epoch 64), batch 215036, train_loss = 0.745, time/batch = 0.103
Sequence 2110288/3263200 (epoch 64), batch 215136, train_loss = 1.250, time/batch = 0.094
Sequence 2

Shuffling training data...
Sequence 2186844/3263200 (epoch 67), batch 222937, train_loss = 1.033, time/batch = 0.094
Sequence 2187834/3263200 (epoch 67), batch 223037, train_loss = 1.276, time/batch = 0.093
Sequence 2188814/3263200 (epoch 67), batch 223137, train_loss = 0.696, time/batch = 0.093
Sequence 2189814/3263200 (epoch 67), batch 223237, train_loss = 0.749, time/batch = 0.093
Sequence 2190794/3263200 (epoch 67), batch 223337, train_loss = 1.037, time/batch = 0.094
Sequence 2191774/3263200 (epoch 67), batch 223437, train_loss = 1.000, time/batch = 0.089
Sequence 2192754/3263200 (epoch 67), batch 223537, train_loss = 0.832, time/batch = 0.088
Sequence 2193754/3263200 (epoch 67), batch 223637, train_loss = 1.618, time/batch = 0.093
Sequence 2194694/3263200 (epoch 67), batch 223737, train_loss = 1.026, time/batch = 0.093
Sequence 2195634/3263200 (epoch 67), batch 223837, train_loss = 1.243, time/batch = 0.092
Sequence 2196604/3263200 (epoch 67), batch 223937, train_loss = 1.017, ti

Sequence 2274138/3263200 (epoch 69), batch 231838, train_loss = 1.094, time/batch = 0.092
Sequence 2275098/3263200 (epoch 69), batch 231938, train_loss = 1.327, time/batch = 0.093
Sequence 2276068/3263200 (epoch 69), batch 232038, train_loss = 0.908, time/batch = 0.092
Sequence 2277058/3263200 (epoch 69), batch 232138, train_loss = 1.067, time/batch = 0.091
Sequence 2278038/3263200 (epoch 69), batch 232238, train_loss = 0.840, time/batch = 0.103
Sequence 2278998/3263200 (epoch 69), batch 232338, train_loss = 0.855, time/batch = 0.102
Sequence 2279998/3263200 (epoch 69), batch 232438, train_loss = 1.833, time/batch = 0.096
Sequence 2280988/3263200 (epoch 69), batch 232538, train_loss = 1.646, time/batch = 0.093
Sequence 2281978/3263200 (epoch 69), batch 232638, train_loss = 0.950, time/batch = 0.094
Sequence 2282958/3263200 (epoch 69), batch 232738, train_loss = 1.463, time/batch = 0.092
Sequence 2283958/3263200 (epoch 69), batch 232838, train_loss = 0.904, time/batch = 0.093
Epoch 69 c

Sequence 2360424/3263200 (epoch 72), batch 240639, train_loss = 2.058, time/batch = 0.093
Sequence 2361394/3263200 (epoch 72), batch 240739, train_loss = 0.975, time/batch = 0.094
Sequence 2362384/3263200 (epoch 72), batch 240839, train_loss = 0.911, time/batch = 0.092
Sequence 2363374/3263200 (epoch 72), batch 240940, train_loss = 0.876, time/batch = 0.095
Sequence 2364344/3263200 (epoch 72), batch 241040, train_loss = 0.988, time/batch = 0.093
Sequence 2365334/3263200 (epoch 72), batch 241140, train_loss = 0.933, time/batch = 0.092
Sequence 2366314/3263200 (epoch 72), batch 241240, train_loss = 1.388, time/batch = 0.093
Sequence 2367274/3263200 (epoch 72), batch 241340, train_loss = 0.790, time/batch = 0.092
Sequence 2368264/3263200 (epoch 72), batch 241440, train_loss = 1.227, time/batch = 0.093
Sequence 2369264/3263200 (epoch 72), batch 241540, train_loss = 1.203, time/batch = 0.092
Sequence 2370244/3263200 (epoch 72), batch 241640, train_loss = 0.765, time/batch = 0.093
Sequence 2

Epoch 74 completed, average train loss 1.183562, learning rate 0.0010
model saved.
Shuffling training data...
Sequence 2447790/3263200 (epoch 75), batch 249540, train_loss = 1.428, time/batch = 0.094
Sequence 2448750/3263200 (epoch 75), batch 249640, train_loss = 1.073, time/batch = 0.092
Sequence 2449740/3263200 (epoch 75), batch 249740, train_loss = 1.087, time/batch = 0.092
Sequence 2450740/3263200 (epoch 75), batch 249840, train_loss = 0.894, time/batch = 0.092
Sequence 2451720/3263200 (epoch 75), batch 249940, train_loss = 1.195, time/batch = 0.093
Sequence 2452710/3263200 (epoch 75), batch 250040, train_loss = 1.755, time/batch = 0.092
Sequence 2453700/3263200 (epoch 75), batch 250140, train_loss = 1.388, time/batch = 0.094
Sequence 2454680/3263200 (epoch 75), batch 250240, train_loss = 1.569, time/batch = 0.093
Sequence 2455650/3263200 (epoch 75), batch 250340, train_loss = 1.161, time/batch = 0.092
Sequence 2456640/3263200 (epoch 75), batch 250440, train_loss = 1.668, time/batc

Sequence 2534154/3263200 (epoch 77), batch 258341, train_loss = 1.094, time/batch = 0.094
Sequence 2535124/3263200 (epoch 77), batch 258441, train_loss = 1.127, time/batch = 0.093
Sequence 2536124/3263200 (epoch 77), batch 258541, train_loss = 1.615, time/batch = 0.092
Sequence 2537064/3263200 (epoch 77), batch 258641, train_loss = 1.128, time/batch = 0.093
Sequence 2538054/3263200 (epoch 77), batch 258741, train_loss = 0.873, time/batch = 0.092
Sequence 2539024/3263200 (epoch 77), batch 258841, train_loss = 0.986, time/batch = 0.092
Sequence 2540014/3263200 (epoch 77), batch 258941, train_loss = 1.294, time/batch = 0.092
Sequence 2540974/3263200 (epoch 77), batch 259041, train_loss = 1.679, time/batch = 0.093
Sequence 2541964/3263200 (epoch 77), batch 259141, train_loss = 1.374, time/batch = 0.092
Sequence 2542934/3263200 (epoch 77), batch 259241, train_loss = 1.265, time/batch = 0.091
Sequence 2543904/3263200 (epoch 77), batch 259341, train_loss = 1.131, time/batch = 0.092
Sequence 2

Sequence 2620430/3263200 (epoch 80), batch 267143, train_loss = 1.346, time/batch = 0.093
Sequence 2621400/3263200 (epoch 80), batch 267243, train_loss = 0.890, time/batch = 0.094
Sequence 2622390/3263200 (epoch 80), batch 267343, train_loss = 1.356, time/batch = 0.093
Sequence 2623370/3263200 (epoch 80), batch 267443, train_loss = 1.085, time/batch = 0.092
Sequence 2624360/3263200 (epoch 80), batch 267543, train_loss = 1.869, time/batch = 0.093
Sequence 2625350/3263200 (epoch 80), batch 267643, train_loss = 1.189, time/batch = 0.092
Sequence 2626310/3263200 (epoch 80), batch 267743, train_loss = 0.533, time/batch = 0.091
Sequence 2627310/3263200 (epoch 80), batch 267843, train_loss = 1.044, time/batch = 0.092
Sequence 2628300/3263200 (epoch 80), batch 267943, train_loss = 1.068, time/batch = 0.094
Sequence 2629300/3263200 (epoch 80), batch 268043, train_loss = 0.787, time/batch = 0.097
Sequence 2630290/3263200 (epoch 80), batch 268143, train_loss = 1.865, time/batch = 0.096
Sequence 2

Sequence 2707754/3263200 (epoch 82), batch 276043, train_loss = 0.850, time/batch = 0.092
Epoch 82 completed, average train loss 1.167154, learning rate 0.0010
Shuffling training data...
Sequence 2708746/3263200 (epoch 83), batch 276143, train_loss = 1.068, time/batch = 0.094
Sequence 2709736/3263200 (epoch 83), batch 276243, train_loss = 1.170, time/batch = 0.093
Sequence 2710726/3263200 (epoch 83), batch 276343, train_loss = 1.303, time/batch = 0.093
Sequence 2711706/3263200 (epoch 83), batch 276443, train_loss = 0.912, time/batch = 0.092
Sequence 2712686/3263200 (epoch 83), batch 276543, train_loss = 1.368, time/batch = 0.093
Sequence 2713686/3263200 (epoch 83), batch 276643, train_loss = 1.138, time/batch = 0.093
Sequence 2714676/3263200 (epoch 83), batch 276743, train_loss = 1.213, time/batch = 0.093
Sequence 2715656/3263200 (epoch 83), batch 276843, train_loss = 0.968, time/batch = 0.094
Sequence 2716656/3263200 (epoch 83), batch 276943, train_loss = 1.466, time/batch = 0.091
Seq

Sequence 2794100/3263200 (epoch 85), batch 284845, train_loss = 1.024, time/batch = 0.092
Sequence 2795090/3263200 (epoch 85), batch 284945, train_loss = 1.532, time/batch = 0.093
Sequence 2796060/3263200 (epoch 85), batch 285045, train_loss = 0.518, time/batch = 0.092
Sequence 2797040/3263200 (epoch 85), batch 285145, train_loss = 1.142, time/batch = 0.092
Sequence 2798020/3263200 (epoch 85), batch 285245, train_loss = 0.736, time/batch = 0.092
Sequence 2799020/3263200 (epoch 85), batch 285345, train_loss = 1.376, time/batch = 0.093
Sequence 2799990/3263200 (epoch 85), batch 285445, train_loss = 0.985, time/batch = 0.093
Sequence 2800990/3263200 (epoch 85), batch 285545, train_loss = 0.957, time/batch = 0.092
Sequence 2801940/3263200 (epoch 85), batch 285645, train_loss = 1.040, time/batch = 0.093
Sequence 2802940/3263200 (epoch 85), batch 285745, train_loss = 1.289, time/batch = 0.093
Sequence 2803940/3263200 (epoch 85), batch 285845, train_loss = 1.138, time/batch = 0.093
Sequence 2

Sequence 2880526/3263200 (epoch 88), batch 293646, train_loss = 1.084, time/batch = 0.092
Sequence 2881516/3263200 (epoch 88), batch 293746, train_loss = 1.207, time/batch = 0.093
Sequence 2882496/3263200 (epoch 88), batch 293846, train_loss = 0.972, time/batch = 0.092
Sequence 2883486/3263200 (epoch 88), batch 293946, train_loss = 1.230, time/batch = 0.095
Sequence 2884476/3263200 (epoch 88), batch 294046, train_loss = 1.348, time/batch = 0.092
Sequence 2885456/3263200 (epoch 88), batch 294146, train_loss = 0.985, time/batch = 0.092
Sequence 2886456/3263200 (epoch 88), batch 294246, train_loss = 1.193, time/batch = 0.093
Sequence 2887446/3263200 (epoch 88), batch 294346, train_loss = 0.873, time/batch = 0.094
Sequence 2888406/3263200 (epoch 88), batch 294446, train_loss = 1.312, time/batch = 0.093
Sequence 2889386/3263200 (epoch 88), batch 294546, train_loss = 1.709, time/batch = 0.093
Sequence 2890386/3263200 (epoch 88), batch 294647, train_loss = 0.634, time/batch = 0.092
Sequence 2

Sequence 2967790/3263200 (epoch 90), batch 302548, train_loss = 0.972, time/batch = 0.092
Sequence 2968750/3263200 (epoch 90), batch 302648, train_loss = 0.953, time/batch = 0.094
Epoch 90 completed, average train loss 1.151222, learning rate 0.0010
Shuffling training data...
Sequence 2969742/3263200 (epoch 91), batch 302748, train_loss = 0.901, time/batch = 0.091
Sequence 2970742/3263200 (epoch 91), batch 302848, train_loss = 0.964, time/batch = 0.092
Sequence 2971732/3263200 (epoch 91), batch 302948, train_loss = 0.546, time/batch = 0.092
Sequence 2972712/3263200 (epoch 91), batch 303048, train_loss = 0.915, time/batch = 0.093
Sequence 2973692/3263200 (epoch 91), batch 303148, train_loss = 1.316, time/batch = 0.092
Sequence 2974682/3263200 (epoch 91), batch 303248, train_loss = 0.904, time/batch = 0.093
Sequence 2975632/3263200 (epoch 91), batch 303348, train_loss = 1.056, time/batch = 0.091
Sequence 2976622/3263200 (epoch 91), batch 303448, train_loss = 1.367, time/batch = 0.094
Seq

Sequence 3054166/3263200 (epoch 93), batch 311350, train_loss = 1.566, time/batch = 0.094
Sequence 3055136/3263200 (epoch 93), batch 311450, train_loss = 0.973, time/batch = 0.093
Sequence 3056116/3263200 (epoch 93), batch 311550, train_loss = 1.133, time/batch = 0.093
Sequence 3057096/3263200 (epoch 93), batch 311650, train_loss = 0.959, time/batch = 0.094
Sequence 3058066/3263200 (epoch 93), batch 311750, train_loss = 0.800, time/batch = 0.092
Sequence 3059056/3263200 (epoch 93), batch 311850, train_loss = 2.476, time/batch = 0.093
Sequence 3060036/3263200 (epoch 93), batch 311950, train_loss = 1.191, time/batch = 0.092
Sequence 3061016/3263200 (epoch 93), batch 312050, train_loss = 1.280, time/batch = 0.093
Sequence 3061986/3263200 (epoch 93), batch 312150, train_loss = 1.142, time/batch = 0.092
Sequence 3062956/3263200 (epoch 93), batch 312250, train_loss = 1.399, time/batch = 0.092
Sequence 3063926/3263200 (epoch 93), batch 312350, train_loss = 0.997, time/batch = 0.092
Sequence 3

Sequence 3140452/3263200 (epoch 96), batch 320151, train_loss = 1.488, time/batch = 0.091
Sequence 3141432/3263200 (epoch 96), batch 320251, train_loss = 1.251, time/batch = 0.091
Sequence 3142412/3263200 (epoch 96), batch 320351, train_loss = 1.232, time/batch = 0.092
Sequence 3143382/3263200 (epoch 96), batch 320451, train_loss = 1.118, time/batch = 0.093
Sequence 3144352/3263200 (epoch 96), batch 320551, train_loss = 1.445, time/batch = 0.091
Sequence 3145332/3263200 (epoch 96), batch 320651, train_loss = 0.992, time/batch = 0.092
Sequence 3146322/3263200 (epoch 96), batch 320751, train_loss = 1.177, time/batch = 0.093
Sequence 3147312/3263200 (epoch 96), batch 320851, train_loss = 1.482, time/batch = 0.093
Sequence 3148302/3263200 (epoch 96), batch 320951, train_loss = 1.233, time/batch = 0.092
Sequence 3149302/3263200 (epoch 96), batch 321051, train_loss = 0.860, time/batch = 0.092
Sequence 3150282/3263200 (epoch 96), batch 321151, train_loss = 0.965, time/batch = 0.093
Sequence 3

Sequence 3227726/3263200 (epoch 98), batch 329053, train_loss = 0.987, time/batch = 0.092
Sequence 3228716/3263200 (epoch 98), batch 329153, train_loss = 0.897, time/batch = 0.093
Sequence 3229706/3263200 (epoch 98), batch 329253, train_loss = 1.545, time/batch = 0.093
Epoch 98 completed, average train loss 1.140088, learning rate 0.0010
Sequence 3230708/3263200 (epoch 99), batch 329353, train_loss = 1.225, time/batch = 0.093
Shuffling training data...
Sequence 3231688/3263200 (epoch 99), batch 329453, train_loss = 0.893, time/batch = 0.093
Sequence 3232668/3263200 (epoch 99), batch 329553, train_loss = 1.452, time/batch = 0.092
Sequence 3233638/3263200 (epoch 99), batch 329653, train_loss = 1.260, time/batch = 0.092
Sequence 3234618/3263200 (epoch 99), batch 329753, train_loss = 1.327, time/batch = 0.093
Sequence 3235618/3263200 (epoch 99), batch 329853, train_loss = 0.889, time/batch = 0.094
Sequence 3236578/3263200 (epoch 99), batch 329953, train_loss = 1.272, time/batch = 0.093
Seq

In [None]:
#------------------------#
#  print training loss   #
#------------------------#

## 2. Alternative training method

In [7]:
parser = argparse.ArgumentParser()
parser.add_argument('--dim_rec', type=int, default=128,
                     help='size of RNN hidden state')
parser.add_argument('--num_layers', type=int, default=2,
                     help='number of layers in the RNN. ')
parser.add_argument('--batch_size', type=int, default=10,
                     help='minibatch size')
parser.add_argument('--num_epochs', type=int, default=200,
                     help='number of epochs')
parser.add_argument('--save_every', type=int, default=10,
                     help='save frequency by epoches')
parser.add_argument('--model_dir', type=str, default='checkpoints',
                     help='directory to save model to')
parser.add_argument('--summary_dir', type=str, default='summary',
                     help='directory to save tensorboard info')
parser.add_argument('--max_grad_norm', type=float, default=1.,
                     help='clip gradients at this value')
parser.add_argument('--learning_rate', type=float, default=0.001,
                     help='learning rate')
parser.add_argument('--decay_rate', type=float, default=1.0,
                     help='decay rate for the optimizer')
parser.add_argument('--num_mixture', type=int, default=2,
                     help='number of gaussian mixtures')
parser.add_argument('--data_scale', type=float, default=1000,
                     help='factor to scale raw data down by')
parser.add_argument('--load_model', type=str, default=None,
                     help='Reload a model checkpoint and restore training.' )
parser.add_argument('--bptt_length', type=int, default=120,
                     help='How many steps should the gradients pass back.' )
parser.add_argument('--loss_form', type=str, default='mse',
                     help='mse / gmm' )
parser.add_argument('--constraint_factor', type=float, default=0.,
                     help='the weight for constraint term in the cost function.' )
  
args = parser.parse_args(['--num_epochs','200'])

args.num_epochs = 4
args.save_every = 1


In [5]:
import sys
#reload(sys)
print(sys.path)

['', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python36.zip', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/lib-dynload', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages/Sphinx-1.5.6-py3.6.egg', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages/svgwrite-1.1.6-py3.6.egg', '/home/mye/tools/anacondas3/envs/tensorflow/lib/python3.6/site-packages/IPython/extensions', '/home/mye/.ipython']


In [8]:
data_loader = DataLoader(args.batch_size, args.data_scale, args.bptt_length) # batch_size=10, bptt_length=120
data_loader.reset_batch_pointer()
for i in range(int(data_loader.num_sequences / args.batch_size)):
    x, y, w, c, lens = data_loader.next_batch()
    print('i:',i,' x.shape:', x.shape)

Maximal length of the training data is 169
Training data case distribution: [5348, 5108, 4904, 1208, 1260, 1224, 916, 1180, 1216, 1044, 876, 456, 1264, 992, 1080, 932, 1008, 420, 1160, 1036]
Validation data case distribution: [70, 67, 65, 15, 17, 16, 12, 15, 16, 14, 12, 6, 17, 13, 14, 13, 14, 5, 15, 13]
Shuffling training data...
Shuffling training data...
('i:', 0, ' x.shape:', (10, 120, 6))
('i:', 1, ' x.shape:', (10, 120, 6))
('i:', 2, ' x.shape:', (10, 120, 6))
('i:', 3, ' x.shape:', (10, 120, 6))
('i:', 4, ' x.shape:', (10, 120, 6))
('i:', 5, ' x.shape:', (10, 120, 6))
('i:', 6, ' x.shape:', (10, 120, 6))
('i:', 7, ' x.shape:', (10, 120, 6))
('i:', 8, ' x.shape:', (10, 120, 6))
('i:', 9, ' x.shape:', (10, 120, 6))
('i:', 10, ' x.shape:', (10, 120, 6))
('i:', 11, ' x.shape:', (10, 120, 6))
('i:', 12, ' x.shape:', (10, 120, 6))
('i:', 13, ' x.shape:', (10, 120, 6))
('i:', 14, ' x.shape:', (10, 120, 6))
('i:', 15, ' x.shape:', (10, 120, 6))
('i:', 16, ' x.shape:', (10, 120, 6))
('i:'

('i:', 406, ' x.shape:', (10, 120, 6))
('i:', 407, ' x.shape:', (10, 120, 6))
('i:', 408, ' x.shape:', (10, 120, 6))
('i:', 409, ' x.shape:', (10, 120, 6))
('i:', 410, ' x.shape:', (10, 120, 6))
('i:', 411, ' x.shape:', (10, 120, 6))
('i:', 412, ' x.shape:', (10, 120, 6))
('i:', 413, ' x.shape:', (10, 120, 6))
('i:', 414, ' x.shape:', (10, 120, 6))
('i:', 415, ' x.shape:', (10, 120, 6))
('i:', 416, ' x.shape:', (10, 120, 6))
('i:', 417, ' x.shape:', (10, 240, 6))
('i:', 418, ' x.shape:', (10, 120, 6))
('i:', 419, ' x.shape:', (10, 120, 6))
('i:', 420, ' x.shape:', (10, 120, 6))
('i:', 421, ' x.shape:', (10, 120, 6))
('i:', 422, ' x.shape:', (10, 120, 6))
('i:', 423, ' x.shape:', (10, 120, 6))
('i:', 424, ' x.shape:', (10, 120, 6))
('i:', 425, ' x.shape:', (10, 120, 6))
('i:', 426, ' x.shape:', (10, 120, 6))
('i:', 427, ' x.shape:', (10, 120, 6))
('i:', 428, ' x.shape:', (10, 120, 6))
('i:', 429, ' x.shape:', (10, 120, 6))
('i:', 430, ' x.shape:', (10, 120, 6))
('i:', 431, ' x.shape:', 

('i:', 979, ' x.shape:', (10, 120, 6))
('i:', 980, ' x.shape:', (10, 120, 6))
('i:', 981, ' x.shape:', (10, 120, 6))
('i:', 982, ' x.shape:', (10, 120, 6))
('i:', 983, ' x.shape:', (10, 120, 6))
('i:', 984, ' x.shape:', (10, 120, 6))
('i:', 985, ' x.shape:', (10, 120, 6))
('i:', 986, ' x.shape:', (10, 120, 6))
('i:', 987, ' x.shape:', (10, 120, 6))
('i:', 988, ' x.shape:', (10, 120, 6))
('i:', 989, ' x.shape:', (10, 120, 6))
('i:', 990, ' x.shape:', (10, 120, 6))
('i:', 991, ' x.shape:', (10, 120, 6))
('i:', 992, ' x.shape:', (10, 120, 6))
('i:', 993, ' x.shape:', (10, 120, 6))
('i:', 994, ' x.shape:', (10, 120, 6))
('i:', 995, ' x.shape:', (10, 120, 6))
('i:', 996, ' x.shape:', (10, 120, 6))
('i:', 997, ' x.shape:', (10, 120, 6))
('i:', 998, ' x.shape:', (10, 120, 6))
('i:', 999, ' x.shape:', (10, 120, 6))
('i:', 1000, ' x.shape:', (10, 120, 6))
('i:', 1001, ' x.shape:', (10, 120, 6))
('i:', 1002, ' x.shape:', (10, 120, 6))
('i:', 1003, ' x.shape:', (10, 120, 6))
('i:', 1004, ' x.shap

('i:', 1587, ' x.shape:', (10, 120, 6))
('i:', 1588, ' x.shape:', (10, 120, 6))
('i:', 1589, ' x.shape:', (10, 120, 6))
('i:', 1590, ' x.shape:', (10, 120, 6))
('i:', 1591, ' x.shape:', (10, 120, 6))
('i:', 1592, ' x.shape:', (10, 120, 6))
('i:', 1593, ' x.shape:', (10, 120, 6))
('i:', 1594, ' x.shape:', (10, 120, 6))
('i:', 1595, ' x.shape:', (10, 120, 6))
('i:', 1596, ' x.shape:', (10, 120, 6))
('i:', 1597, ' x.shape:', (10, 120, 6))
('i:', 1598, ' x.shape:', (10, 120, 6))
('i:', 1599, ' x.shape:', (10, 120, 6))
('i:', 1600, ' x.shape:', (10, 120, 6))
('i:', 1601, ' x.shape:', (10, 120, 6))
('i:', 1602, ' x.shape:', (10, 120, 6))
('i:', 1603, ' x.shape:', (10, 120, 6))
('i:', 1604, ' x.shape:', (10, 120, 6))
('i:', 1605, ' x.shape:', (10, 120, 6))
('i:', 1606, ' x.shape:', (10, 120, 6))
('i:', 1607, ' x.shape:', (10, 120, 6))
('i:', 1608, ' x.shape:', (10, 120, 6))
('i:', 1609, ' x.shape:', (10, 120, 6))
('i:', 1610, ' x.shape:', (10, 120, 6))
('i:', 1611, ' x.shape:', (10, 120, 6))


('i:', 2173, ' x.shape:', (10, 240, 6))
('i:', 2174, ' x.shape:', (10, 120, 6))
('i:', 2175, ' x.shape:', (10, 120, 6))
('i:', 2176, ' x.shape:', (10, 120, 6))
('i:', 2177, ' x.shape:', (10, 120, 6))
('i:', 2178, ' x.shape:', (10, 120, 6))
('i:', 2179, ' x.shape:', (10, 120, 6))
('i:', 2180, ' x.shape:', (10, 120, 6))
('i:', 2181, ' x.shape:', (10, 120, 6))
('i:', 2182, ' x.shape:', (10, 120, 6))
('i:', 2183, ' x.shape:', (10, 120, 6))
('i:', 2184, ' x.shape:', (10, 120, 6))
('i:', 2185, ' x.shape:', (10, 120, 6))
('i:', 2186, ' x.shape:', (10, 120, 6))
('i:', 2187, ' x.shape:', (10, 120, 6))
('i:', 2188, ' x.shape:', (10, 120, 6))
('i:', 2189, ' x.shape:', (10, 120, 6))
('i:', 2190, ' x.shape:', (10, 120, 6))
('i:', 2191, ' x.shape:', (10, 120, 6))
('i:', 2192, ' x.shape:', (10, 120, 6))
('i:', 2193, ' x.shape:', (10, 120, 6))
('i:', 2194, ' x.shape:', (10, 120, 6))
('i:', 2195, ' x.shape:', (10, 120, 6))
('i:', 2196, ' x.shape:', (10, 120, 6))
('i:', 2197, ' x.shape:', (10, 120, 6))


('i:', 2743, ' x.shape:', (10, 120, 6))
('i:', 2744, ' x.shape:', (10, 120, 6))
('i:', 2745, ' x.shape:', (10, 120, 6))
('i:', 2746, ' x.shape:', (10, 120, 6))
('i:', 2747, ' x.shape:', (10, 120, 6))
('i:', 2748, ' x.shape:', (10, 120, 6))
('i:', 2749, ' x.shape:', (10, 120, 6))
('i:', 2750, ' x.shape:', (10, 120, 6))
('i:', 2751, ' x.shape:', (10, 120, 6))
('i:', 2752, ' x.shape:', (10, 120, 6))
('i:', 2753, ' x.shape:', (10, 120, 6))
('i:', 2754, ' x.shape:', (10, 120, 6))
('i:', 2755, ' x.shape:', (10, 240, 6))
('i:', 2756, ' x.shape:', (10, 240, 6))
('i:', 2757, ' x.shape:', (10, 120, 6))
('i:', 2758, ' x.shape:', (10, 120, 6))
('i:', 2759, ' x.shape:', (10, 120, 6))
('i:', 2760, ' x.shape:', (10, 120, 6))
('i:', 2761, ' x.shape:', (10, 120, 6))
('i:', 2762, ' x.shape:', (10, 120, 6))
('i:', 2763, ' x.shape:', (10, 120, 6))
('i:', 2764, ' x.shape:', (10, 120, 6))
('i:', 2765, ' x.shape:', (10, 120, 6))
('i:', 2766, ' x.shape:', (10, 120, 6))
('i:', 2767, ' x.shape:', (10, 120, 6))


In [9]:
print(data_loader.num_sequences )

32632


In [7]:
noise_level = 0.001
print(np.random.normal(0, noise_level, 3))

[ 0.00105173  0.00144482 -0.00125339]


In [50]:
import random
def sample_cluster(pi):
      rand = random.random()
      print(rand)
      accumulate = 0.
      for i in range(len(pi)):
        accumulate += pi[i]
        print('iterations:',i)
        if accumulate >= rand:
          print('accumulate:',accumulate)
          return i
      raise ValueError("Cannot sample a cluster!")

In [69]:
sample_cluster([0.3, 0.7])

0.176895848302
('iterations:', 0)
('accumulate:', 0.3)


0

In [71]:
a = ['a', [1, 2]]
print(a[0])
print(a[1][-1])

a
2


In [None]:
#m1:[batch*time, num_mixture]
mu = (m1, m2, m3)