# tensorflow实战之 英文阅读理解

## 数据预处理

数据：《Children's Book Test》http://www.thespermwhale.com/jaseweston/babi/CBTest.tgz

In [1]:
import re
import random
import ast
import itertools
import pickle
import numpy as np


In [2]:
train_pre = '/home/blueberry/data/Q&A/CBTest/data/cbtest_NE_train.txt'
valid_pre = '/home/blueberry/data/Q&A/CBTest/data/cbtest_NE_valid_2000ex.txt'

In [3]:
def preprocess_data(data_file, out_file):
    # stories[x][0]  tories[x][1]  tories[x][2]
    stories = []
    with open(data_file) as f:
        story = []
        for line in f:
            line = line.strip()
            if not line:
                story = []
            else:
                _, line = line.split(' ', 1)
                if line:
                    if '\t' in line:
                        q, a, _, answers = line.split('\t')
                        # tokenize
                        q = [s.strip() for s in re.split('(\W+)+', q) if s.strip()]
                        stories.append((story, q, a))
                    else:
                        line = [s.strip() for s in re.split('(\W+)+', line) if s.strip()]
                        story.append(line)
 
    samples = []
    for story in stories:
        story_tmp = []
        content = []
        for c in story[0]:
            content += c
        story_tmp.append(content)
        story_tmp.append(story[1])
        story_tmp.append(story[2])
 
        samples.append(story_tmp)
 
    random.shuffle(samples)
    print(len(samples),'\n',samples[0])
 
    with open(out_file, "w") as f:
        for sample in samples:
            f.write(str(sample))
            f.write('\n')

In [4]:
preprocess_data(train_pre, 'train.data')
preprocess_data(valid_pre, 'valid.data')

108719 
 [['Is', 'Jack', 'hurt', "? ''", '``', 'Broken', 'his', 'head', ", ''", 'croaked', 'Joe', ',', 'stepping', 'aside', ',', 'that', 'she', 'might', 'behold', 'the', 'fallen', 'hero', 'vainly', 'trying', 'to', 'look', 'calm', 'and', 'cheerful', 'with', 'red', 'drops', 'running', 'down', 'his', 'cheek', 'and', 'a', 'lump', 'on', 'his', 'forehead', '.', 'Jill', 'shut', 'her', 'eyes', 'and', 'waved', 'the', 'girls', 'away', ',', 'saying', ',', 'faintly', ', -- ``', 'Never', 'mind', 'me', '.', 'Go', 'and', 'see', 'to', 'him', ". ''", '``', 'Do', 'n', "'", 't', '!', 'I', "'", 'm', 'all', 'right', ", ''", 'and', 'Jack', 'tried', 'to', 'get', 'up', 'in', 'order', 'to', 'prove', 'that', 'headers', 'off', 'a', 'bank', 'were', 'mere', 'trifles', 'to', 'him', ';', 'but', 'at', 'the', 'first', 'movement', 'of', 'the', 'left', 'leg', 'he', 'uttered', 'a', 'sharp', 'cry', 'of', 'pain', ',', 'and', 'would', 'have', 'fallen', 'if', 'Gus', 'had', 'not', 'caught', 'and', 'gently', 'laid', 'him', 'do

In [5]:
# 创建词汇表
def read_data(data_file):
	stories = []
	with open(data_file) as f:
		for line in f:
			line = ast.literal_eval(line.strip())
			stories.append(line)
	return stories

In [6]:
stories = read_data('train.data') + read_data('valid.data')
 
content_length = max([len(s) for s, _, _ in stories])
question_length = max([len(q) for _, q, _ in stories])
print(content_length, question_length)
 
vocab = sorted(set(itertools.chain(*(story + q + [answer] for story, q, answer in stories))))
vocab_size = len(vocab) + 1
print(vocab_size)
word2idx = dict((w, i + 1) for i,w in enumerate(vocab))
pickle.dump((word2idx, content_length, question_length, vocab_size), open('vocab.data', "wb"))

1406 237
52465


In [7]:
word2idx

{'!': 1,
 "! '": 2,
 "! ' '": 3,
 "! ' ''": 4,
 "! ' '' '": 5,
 "! ' -": 6,
 "! ''": 7,
 "! '' '": 8,
 "! '' ' '": 9,
 "! '' ''": 10,
 "! '' -": 11,
 '! -': 12,
 '!!': 13,
 '!! -': 14,
 '!!!': 15,
 "!!! ''": 16,
 '#': 17,
 '$': 18,
 '&': 19,
 "'": 20,
 "' !": 21,
 "' ! '": 22,
 "' ! ''": 23,
 "' '": 24,
 "' ''": 25,
 "' '' ,": 26,
 "' '' --": 27,
 "' ,": 28,
 "' , '": 29,
 "' , ''": 30,
 "' , --": 31,
 "' , -- '' `": 32,
 "' , `": 33,
 "' -": 34,
 "' --": 35,
 "' -- ' `": 36,
 "' -- `": 37,
 "' .": 38,
 "' . '": 39,
 "' . ''": 40,
 "' :": 41,
 "' : `": 42,
 "' ;": 43,
 "' ; ''": 44,
 "' ?": 45,
 "' ? '": 46,
 "' ? ''": 47,
 "' `": 48,
 "' ``": 49,
 "''": 50,
 "'' !": 51,
 "'' ! '": 52,
 "'' ! ''": 53,
 "'' '": 54,
 "'' ' --": 55,
 "'' ' ``": 56,
 "'' ,": 57,
 "'' , '": 58,
 "'' , ''": 59,
 "'' , ``": 60,
 "'' -": 61,
 "'' --": 62,
 "'' -- ' `": 63,
 "'' -- -": 64,
 "'' -- ``": 65,
 "'' .": 66,
 "'' . '": 67,
 "'' . ' '": 68,
 "'' . ''": 69,
 "'' . -": 70,
 "'' ...": 71,
 "'' :": 72,
 "

In [8]:
# From keras 补齐
def pad_sequences(sequences, maxlen=None, dtype='int32',
                  padding='post', truncating='post', value=0.):
    lengths = [len(s) for s in sequences]
 
    nb_samples = len(sequences)
    if maxlen is None:
        maxlen = np.max(lengths)
 
    # take the sample shape from the first non empty sequence
    # checking for consistency in the main loop below.
    sample_shape = tuple()
    for s in sequences:
        if len(s) > 0:
            sample_shape = np.asarray(s).shape[1:]
            break
 
    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
    for idx, s in enumerate(sequences):
        if len(s) == 0:
            continue  # empty list was found
        if truncating == 'pre':
            trunc = s[-maxlen:]
        elif truncating == 'post':
            trunc = s[:maxlen]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)
 
        # check `trunc` has expected shape
        trunc = np.asarray(trunc, dtype=dtype)
        if trunc.shape[1:] != sample_shape:
            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
                             (trunc.shape[1:], idx, sample_shape))
 
        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
    return x

In [9]:
# 转为向量
def to_vector(data_file, output_file):
	word2idx, content_length, question_length, _ = pickle.load(open('vocab.data', "rb"))
	
	X = []
	Q = []
	A = []
	with open(data_file) as f_i:
		for line in f_i:
			line = ast.literal_eval(line.strip())
			x = [word2idx[w] for w in line[0]]
			q = [word2idx[w] for w in line[1]]
			a = [word2idx[line[2]]]
 
			X.append(x)
			Q.append(q)
			A.append(a)
 
	X = pad_sequences(X, content_length)
	Q = pad_sequences(Q, question_length)
 
	with open(output_file, "w") as f_o:
		for i in range(len(X)):
			f_o.write(str([X[i].tolist(), Q[i].tolist(), A[i]]))
			f_o.write('\n')

In [10]:
to_vector('train.data', 'train.vec')
to_vector('valid.data', 'valid.vec')

## 训练

In [3]:
import tensorflow as tf
import pickle
import numpy as np
import ast
from collections import defaultdict

In [4]:
train_data = 'train.vec'
valid_data = 'valid.vec'
 
word2idx, content_length, question_length, vocab_size = pickle.load(open('vocab.data', "rb"))
print(content_length, question_length, vocab_size)
 
batch_size = 64
 
train_file = open(train_data)

1406 237 52465


In [5]:
def get_next_batch():
	X = []
	Q = []
	A = []
	for i in range(batch_size):
		for line in train_file:
			line = ast.literal_eval(line.strip())
			X.append(line[0])
			Q.append(line[1])
			A.append(line[2][0])
			break
 
	if len(X) == batch_size:
		return X, Q, A
	else:
		train_file.seek(0)
		return get_next_batch()

In [6]:
def get_test_batch():
	with open(valid_data) as f:
		X = []
		Q = []
		A = []
		for line in f:
			line = ast.literal_eval(line.strip())
			X.append(line[0])
			Q.append(line[1])
			A.append(line[2][0])
		return X, Q, A

In [7]:
##三个输入 placeholder
X = tf.placeholder(tf.int32, [batch_size, content_length])   # 洋文材料
Q = tf.placeholder(tf.int32, [batch_size, question_length])  # 问题
A = tf.placeholder(tf.int32, [batch_size])                   # 答案
 
# drop out
keep_prob = tf.placeholder(tf.float32)

In [8]:
def glimpse(weights, bias, encodings, inputs):
	weights = tf.nn.dropout(weights, keep_prob)
	inputs = tf.nn.dropout(inputs, keep_prob)
	attention = tf.transpose(tf.matmul(weights, tf.transpose(inputs)) + bias)##转至
	attention = tf.matmul(encodings, tf.expand_dims(attention, -1))
	attention = tf.nn.softmax(tf.squeeze(attention, -1))
	return attention, tf.reduce_sum(tf.expand_dims(attention, -1) * encodings, 1)
 


In [9]:
def neural_attention(embedding_dim=384, encoding_dim=128):
	embeddings = tf.Variable(tf.random_normal([vocab_size, embedding_dim], stddev=0.22), dtype=tf.float32)
	tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-4), [embeddings])
 
	with tf.variable_scope('encode'):
		with tf.variable_scope('X'):
			X_lens = tf.reduce_sum(tf.sign(tf.abs(X)), 1)
			embedded_X = tf.nn.embedding_lookup(embeddings, X)
			encoded_X = tf.nn.dropout(embedded_X, keep_prob)
			gru_cell = tf.nn.rnn_cell.GRUCell(encoding_dim)
			outputs, output_states = tf.nn.bidirectional_dynamic_rnn(gru_cell, gru_cell, encoded_X, sequence_length=X_lens, dtype=tf.float32, swap_memory=True)
			encoded_X = tf.concat(outputs,2)
		with tf.variable_scope('Q'):
			Q_lens = tf.reduce_sum(tf.sign(tf.abs(Q)), 1)
			embedded_Q = tf.nn.embedding_lookup(embeddings, Q)
			encoded_Q = tf.nn.dropout(embedded_Q, keep_prob)
			gru_cell = tf.nn.rnn_cell.GRUCell(encoding_dim)
			outputs, output_states = tf.nn.bidirectional_dynamic_rnn(gru_cell, gru_cell, encoded_Q, sequence_length=Q_lens, dtype=tf.float32, swap_memory=True)
			encoded_Q = tf.concat(outputs,2)
 
	W_q = tf.Variable(tf.random_normal([2*encoding_dim, 4*encoding_dim], stddev=0.22), dtype=tf.float32)
	b_q = tf.Variable(tf.random_normal([2*encoding_dim, 1], stddev=0.22), dtype=tf.float32)
	W_d = tf.Variable(tf.random_normal([2*encoding_dim, 6*encoding_dim], stddev=0.22), dtype=tf.float32)
	b_d = tf.Variable(tf.random_normal([2*encoding_dim, 1], stddev=0.22), dtype=tf.float32)
	g_q = tf.Variable(tf.random_normal([10*encoding_dim, 2*encoding_dim], stddev=0.22), dtype=tf.float32)
	g_d = tf.Variable(tf.random_normal([10*encoding_dim, 2*encoding_dim], stddev=0.22), dtype=tf.float32)
 
	with tf.variable_scope('attend') as scope:
		infer_gru = tf.nn.rnn_cell.GRUCell(4*encoding_dim)
		infer_state = infer_gru.zero_state(batch_size, tf.float32)
		for iter_step in range(8):
			if iter_step > 0:
				scope.reuse_variables()
 
			_, q_glimpse = glimpse(W_q, b_q, encoded_Q, infer_state)
			d_attention, d_glimpse = glimpse(W_d, b_d, encoded_X, tf.concat([infer_state, q_glimpse], 1))
 
			gate_concat = tf.concat([infer_state, q_glimpse, d_glimpse, q_glimpse * d_glimpse], 1)
 
			r_d = tf.sigmoid(tf.matmul(gate_concat, g_d))
			r_d = tf.nn.dropout(r_d, keep_prob)
			r_q = tf.sigmoid(tf.matmul(gate_concat, g_q))
			r_q = tf.nn.dropout(r_q, keep_prob)
 
			combined_gated_glimpse = tf.concat([r_q * q_glimpse, r_d * d_glimpse], 1)
			_, infer_state = infer_gru(combined_gated_glimpse, infer_state)
 
	return tf.to_float(tf.sign(tf.abs(X))) * d_attention

In [10]:

def train_neural_attention():
	X_attentions = neural_attention()
	loss = -tf.reduce_mean(tf.log(tf.reduce_sum(tf.to_float(tf.equal(tf.expand_dims(A, -1), X)) * X_attentions, 1) + tf.constant(0.00001)))
 
	optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
	grads_and_vars = optimizer.compute_gradients(loss)
	capped_grads_and_vars = [(tf.clip_by_norm(g, 5), v) for g,v in grads_and_vars]
	train_op = optimizer.apply_gradients(capped_grads_and_vars)
 
	saver = tf.train.Saver()
	with tf.Session() as sess:
		sess.run(tf.global_variables_initializer())
 
		# writer = tf.summary.FileWriter()
		# 恢复前一次训练
		ckpt = tf.train.get_checkpoint_state('.')
		if ckpt != None:
			print(ckpt.model_checkpoint_path)
			saver.restore(sess, ckpt.model_checkpoint_path)
		else:
			print("没找到模型")
 
		for step in range(20000):
			train_x, train_q, train_a = get_next_batch()
			loss_, _ = sess.run([loss, train_op], feed_dict={X:train_x, Q:train_q, A:train_a, keep_prob:0.7})
			print('step/20000 --------------->',step/200,'% ','loss -------->',loss_)
 
			# 保存模型并计算准确率
			if step % 1000 == 0:
				path = saver.save(sess, 'machine_reading.model', global_step=step)
				print('model------------------------------------>',path)
 
				test_x, test_q, test_a = get_test_batch()
				test_x, test_q, test_a = np.array(test_x[:batch_size]), np.array(test_q[:batch_size]), np.array(test_a[:batch_size])
				attentions = sess.run(X_attentions, feed_dict={X:test_x, Q:test_q, keep_prob:1.})
				correct_count = 0
				for x in range(test_x.shape[0]):
					probs = defaultdict(int)
					for idx, word in enumerate(test_x[x,:]):
						probs[word] += attentions[x, idx]
					guess = max(probs, key=probs.get)
					if guess == test_a[x]:
						correct_count += 1
				print('acc -------------------------------------->',correct_count / test_x.shape[0])

In [None]:
train_neural_attention()

./machine_reading.model-1000
INFO:tensorflow:Restoring parameters from ./machine_reading.model-1000
step/20000 ---------------> 0.0 %  loss --------> 1.72968
model------------------------------------> machine_reading.model-0
acc --------------------------------------> 0.53125
step/20000 ---------------> 0.005 %  loss --------> 1.76237
step/20000 ---------------> 0.01 %  loss --------> 1.70313
step/20000 ---------------> 0.015 %  loss --------> 1.5382
step/20000 ---------------> 0.02 %  loss --------> 1.45105
step/20000 ---------------> 0.025 %  loss --------> 1.64077
step/20000 ---------------> 0.03 %  loss --------> 1.27137
step/20000 ---------------> 0.035 %  loss --------> 1.24154
step/20000 ---------------> 0.04 %  loss --------> 1.72224
step/20000 ---------------> 0.045 %  loss --------> 1.44046
step/20000 ---------------> 0.05 %  loss --------> 1.42223
step/20000 ---------------> 0.055 %  loss --------> 1.90536
step/20000 ---------------> 0.06 %  loss --------> 1.98717
step/20000

step/20000 ---------------> 0.675 %  loss --------> 1.52102
step/20000 ---------------> 0.68 %  loss --------> 1.76172
step/20000 ---------------> 0.685 %  loss --------> 1.67434
step/20000 ---------------> 0.69 %  loss --------> 1.50907
step/20000 ---------------> 0.695 %  loss --------> 1.68866
step/20000 ---------------> 0.7 %  loss --------> 1.42995
step/20000 ---------------> 0.705 %  loss --------> 1.45511
step/20000 ---------------> 0.71 %  loss --------> 1.92083
step/20000 ---------------> 0.715 %  loss --------> 1.58054
step/20000 ---------------> 0.72 %  loss --------> 1.29093
step/20000 ---------------> 0.725 %  loss --------> 1.57574
step/20000 ---------------> 0.73 %  loss --------> 1.13569
step/20000 ---------------> 0.735 %  loss --------> 1.25166
step/20000 ---------------> 0.74 %  loss --------> 1.46779
step/20000 ---------------> 0.745 %  loss --------> 1.18554
step/20000 ---------------> 0.75 %  loss --------> 1.43727
step/20000 ---------------> 0.755 %  loss -------

step/20000 ---------------> 1.37 %  loss --------> 1.67212
step/20000 ---------------> 1.375 %  loss --------> 1.47239
step/20000 ---------------> 1.38 %  loss --------> 1.13021
step/20000 ---------------> 1.385 %  loss --------> 1.00024
step/20000 ---------------> 1.39 %  loss --------> 1.27197
step/20000 ---------------> 1.395 %  loss --------> 1.68263
step/20000 ---------------> 1.4 %  loss --------> 1.31955
step/20000 ---------------> 1.405 %  loss --------> 1.41735
step/20000 ---------------> 1.41 %  loss --------> 1.21381
step/20000 ---------------> 1.415 %  loss --------> 1.56131
step/20000 ---------------> 1.42 %  loss --------> 1.29741
step/20000 ---------------> 1.425 %  loss --------> 1.61654
step/20000 ---------------> 1.43 %  loss --------> 1.73302
step/20000 ---------------> 1.435 %  loss --------> 1.29677
step/20000 ---------------> 1.44 %  loss --------> 1.08714
step/20000 ---------------> 1.445 %  loss --------> 1.60285
step/20000 ---------------> 1.45 %  loss --------

step/20000 ---------------> 2.06 %  loss --------> 1.56459
step/20000 ---------------> 2.065 %  loss --------> 1.50394
step/20000 ---------------> 2.07 %  loss --------> 1.43918
step/20000 ---------------> 2.075 %  loss --------> 1.30675
step/20000 ---------------> 2.08 %  loss --------> 1.48197
step/20000 ---------------> 2.085 %  loss --------> 1.54478
step/20000 ---------------> 2.09 %  loss --------> 1.30126
step/20000 ---------------> 2.095 %  loss --------> 1.38263
step/20000 ---------------> 2.1 %  loss --------> 1.8698
step/20000 ---------------> 2.105 %  loss --------> 1.19453
step/20000 ---------------> 2.11 %  loss --------> 1.70254
step/20000 ---------------> 2.115 %  loss --------> 1.01888
step/20000 ---------------> 2.12 %  loss --------> 1.02891
step/20000 ---------------> 2.125 %  loss --------> 1.22237
step/20000 ---------------> 2.13 %  loss --------> 1.24909
step/20000 ---------------> 2.135 %  loss --------> 1.66195
step/20000 ---------------> 2.14 %  loss -------->

step/20000 ---------------> 2.75 %  loss --------> 1.36408
step/20000 ---------------> 2.755 %  loss --------> 1.2414
step/20000 ---------------> 2.76 %  loss --------> 1.58155
step/20000 ---------------> 2.765 %  loss --------> 1.03223
step/20000 ---------------> 2.77 %  loss --------> 1.38435
step/20000 ---------------> 2.775 %  loss --------> 1.2714
step/20000 ---------------> 2.78 %  loss --------> 1.22085
step/20000 ---------------> 2.785 %  loss --------> 1.05053
step/20000 ---------------> 2.79 %  loss --------> 1.43266
step/20000 ---------------> 2.795 %  loss --------> 1.89264
step/20000 ---------------> 2.8 %  loss --------> 1.57473
step/20000 ---------------> 2.805 %  loss --------> 1.0759
step/20000 ---------------> 2.81 %  loss --------> 1.12675
step/20000 ---------------> 2.815 %  loss --------> 1.38541
step/20000 ---------------> 2.82 %  loss --------> 0.773092
step/20000 ---------------> 2.825 %  loss --------> 1.34607
step/20000 ---------------> 2.83 %  loss --------> 

step/20000 ---------------> 3.44 %  loss --------> 1.25314
step/20000 ---------------> 3.445 %  loss --------> 1.24212
step/20000 ---------------> 3.45 %  loss --------> 0.957003
step/20000 ---------------> 3.455 %  loss --------> 1.68404
step/20000 ---------------> 3.46 %  loss --------> 1.32904
step/20000 ---------------> 3.465 %  loss --------> 1.1391
step/20000 ---------------> 3.47 %  loss --------> 1.22292
step/20000 ---------------> 3.475 %  loss --------> 1.34474
step/20000 ---------------> 3.48 %  loss --------> 1.67424
step/20000 ---------------> 3.485 %  loss --------> 1.04261
step/20000 ---------------> 3.49 %  loss --------> 1.02702
step/20000 ---------------> 3.495 %  loss --------> 1.49868
step/20000 ---------------> 3.5 %  loss --------> 1.63915
step/20000 ---------------> 3.505 %  loss --------> 0.903978
step/20000 ---------------> 3.51 %  loss --------> 1.20665
step/20000 ---------------> 3.515 %  loss --------> 1.19801
step/20000 ---------------> 3.52 %  loss -------

step/20000 ---------------> 4.13 %  loss --------> 1.41557
step/20000 ---------------> 4.135 %  loss --------> 0.854989
step/20000 ---------------> 4.14 %  loss --------> 1.67087
step/20000 ---------------> 4.145 %  loss --------> 1.06585
step/20000 ---------------> 4.15 %  loss --------> 1.07444
step/20000 ---------------> 4.155 %  loss --------> 1.21386
step/20000 ---------------> 4.16 %  loss --------> 1.0395
step/20000 ---------------> 4.165 %  loss --------> 1.16032
step/20000 ---------------> 4.17 %  loss --------> 1.24524
step/20000 ---------------> 4.175 %  loss --------> 1.31186
step/20000 ---------------> 4.18 %  loss --------> 1.4576
step/20000 ---------------> 4.185 %  loss --------> 1.51303
step/20000 ---------------> 4.19 %  loss --------> 1.53839
step/20000 ---------------> 4.195 %  loss --------> 1.37854
step/20000 ---------------> 4.2 %  loss --------> 1.07691
step/20000 ---------------> 4.205 %  loss --------> 1.40574
step/20000 ---------------> 4.21 %  loss -------->

step/20000 ---------------> 4.82 %  loss --------> 0.833072
step/20000 ---------------> 4.825 %  loss --------> 1.30459
step/20000 ---------------> 4.83 %  loss --------> 1.04844
step/20000 ---------------> 4.835 %  loss --------> 1.47277
step/20000 ---------------> 4.84 %  loss --------> 1.24982
step/20000 ---------------> 4.845 %  loss --------> 1.00313
step/20000 ---------------> 4.85 %  loss --------> 1.16602
step/20000 ---------------> 4.855 %  loss --------> 1.21621
step/20000 ---------------> 4.86 %  loss --------> 1.00411
step/20000 ---------------> 4.865 %  loss --------> 1.63554
step/20000 ---------------> 4.87 %  loss --------> 1.10616
step/20000 ---------------> 4.875 %  loss --------> 1.20548
step/20000 ---------------> 4.88 %  loss --------> 0.948597
step/20000 ---------------> 4.885 %  loss --------> 0.783456
step/20000 ---------------> 4.89 %  loss --------> 1.36274
step/20000 ---------------> 4.895 %  loss --------> 0.746026
step/20000 ---------------> 4.9 %  loss ----

step/20000 ---------------> 5.5 %  loss --------> 1.53447
step/20000 ---------------> 5.505 %  loss --------> 1.30919
step/20000 ---------------> 5.51 %  loss --------> 1.61356
step/20000 ---------------> 5.515 %  loss --------> 1.59908
step/20000 ---------------> 5.52 %  loss --------> 1.56555
step/20000 ---------------> 5.525 %  loss --------> 1.07513
step/20000 ---------------> 5.53 %  loss --------> 1.7567
step/20000 ---------------> 5.535 %  loss --------> 1.81102
step/20000 ---------------> 5.54 %  loss --------> 1.55537
step/20000 ---------------> 5.545 %  loss --------> 1.82304
step/20000 ---------------> 5.55 %  loss --------> 1.34185
step/20000 ---------------> 5.555 %  loss --------> 1.26026
step/20000 ---------------> 5.56 %  loss --------> 1.67499
step/20000 ---------------> 5.565 %  loss --------> 1.00176
step/20000 ---------------> 5.57 %  loss --------> 0.793505
step/20000 ---------------> 5.575 %  loss --------> 1.23665
step/20000 ---------------> 5.58 %  loss --------

step/20000 ---------------> 6.19 %  loss --------> 1.18035
step/20000 ---------------> 6.195 %  loss --------> 1.70187
step/20000 ---------------> 6.2 %  loss --------> 1.25744
step/20000 ---------------> 6.205 %  loss --------> 1.6044
step/20000 ---------------> 6.21 %  loss --------> 1.19595
step/20000 ---------------> 6.215 %  loss --------> 1.15496
step/20000 ---------------> 6.22 %  loss --------> 1.85854
step/20000 ---------------> 6.225 %  loss --------> 1.13522
step/20000 ---------------> 6.23 %  loss --------> 0.929467
step/20000 ---------------> 6.235 %  loss --------> 1.13047
step/20000 ---------------> 6.24 %  loss --------> 1.14357
step/20000 ---------------> 6.245 %  loss --------> 1.38091
step/20000 ---------------> 6.25 %  loss --------> 1.34173
step/20000 ---------------> 6.255 %  loss --------> 1.38343
step/20000 ---------------> 6.26 %  loss --------> 1.40355
step/20000 ---------------> 6.265 %  loss --------> 1.59813
step/20000 ---------------> 6.27 %  loss --------

step/20000 ---------------> 6.885 %  loss --------> 1.29908
step/20000 ---------------> 6.89 %  loss --------> 0.849008
step/20000 ---------------> 6.895 %  loss --------> 1.08626
step/20000 ---------------> 6.9 %  loss --------> 1.16033
step/20000 ---------------> 6.905 %  loss --------> 1.21634
step/20000 ---------------> 6.91 %  loss --------> 1.6282
step/20000 ---------------> 6.915 %  loss --------> 1.5672
step/20000 ---------------> 6.92 %  loss --------> 1.28936
step/20000 ---------------> 6.925 %  loss --------> 1.14049
step/20000 ---------------> 6.93 %  loss --------> 1.24224
step/20000 ---------------> 6.935 %  loss --------> 1.4663
step/20000 ---------------> 6.94 %  loss --------> 1.50354
step/20000 ---------------> 6.945 %  loss --------> 1.52647
step/20000 ---------------> 6.95 %  loss --------> 1.34337
step/20000 ---------------> 6.955 %  loss --------> 0.876062
step/20000 ---------------> 6.96 %  loss --------> 1.50721
step/20000 ---------------> 6.965 %  loss --------

step/20000 ---------------> 7.58 %  loss --------> 1.36634
step/20000 ---------------> 7.585 %  loss --------> 1.4214
step/20000 ---------------> 7.59 %  loss --------> 1.13092
step/20000 ---------------> 7.595 %  loss --------> 1.10108
step/20000 ---------------> 7.6 %  loss --------> 1.8236
step/20000 ---------------> 7.605 %  loss --------> 1.10133
step/20000 ---------------> 7.61 %  loss --------> 1.8077
step/20000 ---------------> 7.615 %  loss --------> 1.28789
step/20000 ---------------> 7.62 %  loss --------> 1.18418
step/20000 ---------------> 7.625 %  loss --------> 1.04127
step/20000 ---------------> 7.63 %  loss --------> 1.40442
step/20000 ---------------> 7.635 %  loss --------> 1.79516
step/20000 ---------------> 7.64 %  loss --------> 1.72208
step/20000 ---------------> 7.645 %  loss --------> 1.89135
step/20000 ---------------> 7.65 %  loss --------> 1.41051
step/20000 ---------------> 7.655 %  loss --------> 1.39047
step/20000 ---------------> 7.66 %  loss --------> 1

step/20000 ---------------> 8.27 %  loss --------> 1.28288
step/20000 ---------------> 8.275 %  loss --------> 1.13537
step/20000 ---------------> 8.28 %  loss --------> 2.02637
step/20000 ---------------> 8.285 %  loss --------> 1.21586
step/20000 ---------------> 8.29 %  loss --------> 1.35503
step/20000 ---------------> 8.295 %  loss --------> 1.42522
step/20000 ---------------> 8.3 %  loss --------> 1.25613
step/20000 ---------------> 8.305 %  loss --------> 1.64434
step/20000 ---------------> 8.31 %  loss --------> 1.63442
step/20000 ---------------> 8.315 %  loss --------> 1.25581
step/20000 ---------------> 8.32 %  loss --------> 0.983108
step/20000 ---------------> 8.325 %  loss --------> 2.09511
step/20000 ---------------> 8.33 %  loss --------> 1.3329
step/20000 ---------------> 8.335 %  loss --------> 1.6492
step/20000 ---------------> 8.34 %  loss --------> 1.49664
step/20000 ---------------> 8.345 %  loss --------> 1.27629
step/20000 ---------------> 8.35 %  loss -------->

step/20000 ---------------> 8.96 %  loss --------> 1.45967
step/20000 ---------------> 8.965 %  loss --------> 1.33987
step/20000 ---------------> 8.97 %  loss --------> 0.985385
step/20000 ---------------> 8.975 %  loss --------> 0.878063
step/20000 ---------------> 8.98 %  loss --------> 1.4981
step/20000 ---------------> 8.985 %  loss --------> 1.26188
step/20000 ---------------> 8.99 %  loss --------> 1.24976
step/20000 ---------------> 8.995 %  loss --------> 1.13053
step/20000 ---------------> 9.0 %  loss --------> 0.980601
step/20000 ---------------> 9.005 %  loss --------> 1.30821
step/20000 ---------------> 9.01 %  loss --------> 1.31156
step/20000 ---------------> 9.015 %  loss --------> 1.33277
step/20000 ---------------> 9.02 %  loss --------> 1.58099
step/20000 ---------------> 9.025 %  loss --------> 1.37075
step/20000 ---------------> 9.03 %  loss --------> 0.920466
step/20000 ---------------> 9.035 %  loss --------> 1.31327
step/20000 ---------------> 9.04 %  loss -----

step/20000 ---------------> 9.65 %  loss --------> 1.04341
step/20000 ---------------> 9.655 %  loss --------> 1.29999
step/20000 ---------------> 9.66 %  loss --------> 0.994057
step/20000 ---------------> 9.665 %  loss --------> 0.93193
step/20000 ---------------> 9.67 %  loss --------> 1.45195
step/20000 ---------------> 9.675 %  loss --------> 0.967174
step/20000 ---------------> 9.68 %  loss --------> 1.38896
step/20000 ---------------> 9.685 %  loss --------> 1.23696
step/20000 ---------------> 9.69 %  loss --------> 1.01426
step/20000 ---------------> 9.695 %  loss --------> 1.34667
step/20000 ---------------> 9.7 %  loss --------> 1.18577
step/20000 ---------------> 9.705 %  loss --------> 0.769829
step/20000 ---------------> 9.71 %  loss --------> 1.4853
step/20000 ---------------> 9.715 %  loss --------> 1.22017
step/20000 ---------------> 9.72 %  loss --------> 1.19529
step/20000 ---------------> 9.725 %  loss --------> 1.08129
step/20000 ---------------> 9.73 %  loss ------

step/20000 ---------------> 10.325 %  loss --------> 1.21227
step/20000 ---------------> 10.33 %  loss --------> 1.6088
step/20000 ---------------> 10.335 %  loss --------> 1.00279
step/20000 ---------------> 10.34 %  loss --------> 1.55926
step/20000 ---------------> 10.345 %  loss --------> 1.37274
step/20000 ---------------> 10.35 %  loss --------> 1.2233
step/20000 ---------------> 10.355 %  loss --------> 0.868197
step/20000 ---------------> 10.36 %  loss --------> 0.684415
step/20000 ---------------> 10.365 %  loss --------> 1.2064
step/20000 ---------------> 10.37 %  loss --------> 0.991352
step/20000 ---------------> 10.375 %  loss --------> 0.950244
step/20000 ---------------> 10.38 %  loss --------> 1.21025
step/20000 ---------------> 10.385 %  loss --------> 1.42862
step/20000 ---------------> 10.39 %  loss --------> 0.882968
step/20000 ---------------> 10.395 %  loss --------> 1.22763
step/20000 ---------------> 10.4 %  loss --------> 1.204
step/20000 ---------------> 10.40

step/20000 ---------------> 11.005 %  loss --------> 0.9934
step/20000 ---------------> 11.01 %  loss --------> 0.722939
step/20000 ---------------> 11.015 %  loss --------> 1.17292
step/20000 ---------------> 11.02 %  loss --------> 0.98989
step/20000 ---------------> 11.025 %  loss --------> 1.32981
step/20000 ---------------> 11.03 %  loss --------> 1.41828
step/20000 ---------------> 11.035 %  loss --------> 0.759907
step/20000 ---------------> 11.04 %  loss --------> 0.912906
step/20000 ---------------> 11.045 %  loss --------> 1.04783
step/20000 ---------------> 11.05 %  loss --------> 0.920403
step/20000 ---------------> 11.055 %  loss --------> 1.38908
step/20000 ---------------> 11.06 %  loss --------> 1.25951
step/20000 ---------------> 11.065 %  loss --------> 1.2179
step/20000 ---------------> 11.07 %  loss --------> 1.34659
step/20000 ---------------> 11.075 %  loss --------> 1.50782
step/20000 ---------------> 11.08 %  loss --------> 1.13814
step/20000 ---------------> 11

step/20000 ---------------> 11.68 %  loss --------> 1.20525
step/20000 ---------------> 11.685 %  loss --------> 1.26151
step/20000 ---------------> 11.69 %  loss --------> 0.534061
step/20000 ---------------> 11.695 %  loss --------> 0.938799
step/20000 ---------------> 11.7 %  loss --------> 0.753304
step/20000 ---------------> 11.705 %  loss --------> 1.25423
step/20000 ---------------> 11.71 %  loss --------> 1.01438
step/20000 ---------------> 11.715 %  loss --------> 0.824462
step/20000 ---------------> 11.72 %  loss --------> 1.30941
step/20000 ---------------> 11.725 %  loss --------> 0.951051
step/20000 ---------------> 11.73 %  loss --------> 1.05307
step/20000 ---------------> 11.735 %  loss --------> 0.85055
step/20000 ---------------> 11.74 %  loss --------> 1.04028
step/20000 ---------------> 11.745 %  loss --------> 1.19759
step/20000 ---------------> 11.75 %  loss --------> 1.45744
step/20000 ---------------> 11.755 %  loss --------> 0.873349
step/20000 --------------->

step/20000 ---------------> 12.355 %  loss --------> 1.58362
step/20000 ---------------> 12.36 %  loss --------> 1.10715
step/20000 ---------------> 12.365 %  loss --------> 0.912078
step/20000 ---------------> 12.37 %  loss --------> 0.875678
step/20000 ---------------> 12.375 %  loss --------> 0.792544
step/20000 ---------------> 12.38 %  loss --------> 1.30721
step/20000 ---------------> 12.385 %  loss --------> 1.03989
step/20000 ---------------> 12.39 %  loss --------> 1.48263
step/20000 ---------------> 12.395 %  loss --------> 1.29117
step/20000 ---------------> 12.4 %  loss --------> 0.815064
step/20000 ---------------> 12.405 %  loss --------> 1.17989
step/20000 ---------------> 12.41 %  loss --------> 0.965657
step/20000 ---------------> 12.415 %  loss --------> 0.860354
step/20000 ---------------> 12.42 %  loss --------> 0.823284
step/20000 ---------------> 12.425 %  loss --------> 1.14097
step/20000 ---------------> 12.43 %  loss --------> 1.04467
step/20000 ---------------

step/20000 ---------------> 13.03 %  loss --------> 1.20845
step/20000 ---------------> 13.035 %  loss --------> 1.00611
step/20000 ---------------> 13.04 %  loss --------> 0.922518
step/20000 ---------------> 13.045 %  loss --------> 0.746982
step/20000 ---------------> 13.05 %  loss --------> 0.849111
step/20000 ---------------> 13.055 %  loss --------> 1.26637
step/20000 ---------------> 13.06 %  loss --------> 1.21535
step/20000 ---------------> 13.065 %  loss --------> 0.789876
step/20000 ---------------> 13.07 %  loss --------> 1.06724
step/20000 ---------------> 13.075 %  loss --------> 0.695736
step/20000 ---------------> 13.08 %  loss --------> 1.26494
step/20000 ---------------> 13.085 %  loss --------> 1.20216
step/20000 ---------------> 13.09 %  loss --------> 0.808811
step/20000 ---------------> 13.095 %  loss --------> 1.45674
step/20000 ---------------> 13.1 %  loss --------> 0.96063
step/20000 ---------------> 13.105 %  loss --------> 1.86947
step/20000 --------------->

step/20000 ---------------> 13.71 %  loss --------> 1.18674
step/20000 ---------------> 13.715 %  loss --------> 1.06582
step/20000 ---------------> 13.72 %  loss --------> 1.27162
step/20000 ---------------> 13.725 %  loss --------> 1.24627
step/20000 ---------------> 13.73 %  loss --------> 0.997805
step/20000 ---------------> 13.735 %  loss --------> 0.9292
step/20000 ---------------> 13.74 %  loss --------> 1.17998
step/20000 ---------------> 13.745 %  loss --------> 1.40484
step/20000 ---------------> 13.75 %  loss --------> 1.35211
step/20000 ---------------> 13.755 %  loss --------> 1.18568
step/20000 ---------------> 13.76 %  loss --------> 0.97168
step/20000 ---------------> 13.765 %  loss --------> 1.23628
step/20000 ---------------> 13.77 %  loss --------> 1.50504
step/20000 ---------------> 13.775 %  loss --------> 0.870043
step/20000 ---------------> 13.78 %  loss --------> 1.60175
step/20000 ---------------> 13.785 %  loss --------> 1.27412
step/20000 ---------------> 13.