# Tolstoy
https://habrahabr.ru/post/342738/

In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

In [13]:
with open('anna.txt', 'r') as f:
    text=f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [14]:
text[:110]

'ЧАСТЬ ПЕРВАЯ\n\n\n\nI\n\nВсе счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему.'

In [15]:
encoded[:110]

array([ 99,  77,  93,  94, 102,   1,  91,  82,  92,  79,  77, 105,   0,
         0,   0,   0,  30,   0,   0,  79, 123, 111,   1, 123, 129, 106,
       123, 124, 117, 114, 108, 133, 111,   1, 123, 111, 118, 134, 114,
         1, 121, 120, 127, 120, 112, 114,   1, 110, 122, 125, 109,   1,
       119, 106,   1, 110, 122, 125, 109, 106,   7,   1, 116, 106, 112,
       110, 106, 137,   1, 119, 111, 123, 129, 106, 123, 124, 117, 114,
       108, 106, 137,   1, 123, 111, 118, 134, 137,   1, 119, 111, 123,
       129, 106, 123, 124, 117, 114, 108, 106,   1, 121, 120,   8, 123,
       108, 120, 111, 118, 125,   9], dtype=int32)

In [16]:
len(vocab)

140

In [17]:
def get_batches(arr, n_seqs, n_steps):
    '''Создаем генератор, который возвращает пакеты размером
       n_seqs x n_steps из массива arr.
       
       Аргументы
       ---------
       arr: Массив, из которого получаем пакеты
       n_seqs: Batch size, количество последовательностей в пакете
       n_steps: Sequence length, сколько "шагов" делаем в пакете
    '''
    # Считаем количество символов на пакет и количество пакетов, которое можем сформировать
    characters_per_batch = n_seqs * n_steps
    n_batches = len(arr)//characters_per_batch
    
    # Сохраняем в массиве только символы, которые позволяют сформировать целое число пакетов
    arr = arr[:n_batches * characters_per_batch]
    
    # Делаем reshape 1D -> 2D, используя n_seqs как число строк, как на картинке
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # пакет данных, который будет подаваться на вход сети
        x = arr[:, n:n+n_steps]
        # целевой пакет, с которым будем сравнивать предсказание, получаем сдвиганием "x" на один символ вперед
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [18]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
print('x\n', x[:5, :5])
print('\ny\n', y[:5, :5])

x
 [[ 99  77  93  94 102]
 [  1 110 108 114 112]
 [ 79 120 124   1 120]
 [114 119   1 109 120]
 [123 121 122 106 108]]

y
 [[ 77  93  94 102   1]
 [110 108 114 112 111]
 [120 124   1 120 124]
 [119   1 109 120 108]
 [121 122 106 108 111]]


In [19]:
def build_inputs(batch_size, num_steps):
    ''' Определяем placeholder'ы для входных, целевых данных, а также вероятности drop out
    
        Аргументы
        ---------
        batch_size: Batch size, количество последовательностей в пакете
        num_steps: Sequence length, сколько "шагов" делаем в пакете
        
    '''
    # Объявляем placeholder'ы
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    
    # Placeholder для вероятности drop out
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob

In [20]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Строим LSTM ячейку.
    
        Аргументы
        ---------
        keep_prob: Скаляр (tf.placeholder) для dropout keep probability
        lstm_size: Размер скрытых слоев в LSTM ячейках
        num_layers: Количество LSTM слоев
        batch_size: Batch size

    '''
    ### Строим LSTM ячейку
    
    def build_cell(lstm_size, keep_prob):
        # Начинаем с базовой LSTM ячейки
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        
        # Добавляем dropout к ячейке
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    
    
    # Стэкируем несколько LSTM слоев для придания глубины нашему deep learning
    cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    # Инициализируем начальное состояние LTSM ячейки
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [21]:
def build_output(lstm_output, in_size, out_size):
    ''' Строим softmax слой и возвращаем результат его работы.
    
        Аргументы
        ---------
        
        x: Входящий от LSTM тензор
        in_size: Размер входящего тензора, (кол-во LSTM юнитов скрытого слоя)
        out_size: Размер softmax слоя (объем словаря)
    
    '''

    # вытягиваем и решэйпим тензор, выполняя преобразование 3D -> 2D
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    # Соединяем результат LTSM слоев с softmax слоем
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # Считаем logit-функцию
    logits = tf.matmul(x, softmax_w) + softmax_b
    # Используем функцию softmax для получения предсказания
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [22]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Считаем функцию потери на основании значений logit-функции и целевых значений.
    
        Аргументы
        ---------
        logits: значение logit-функции
        targets: целевые значения, с которыми сравниваем предсказания
        lstm_size: Количество юнитов в LSTM слое
        num_classes: Количество классов в целевых значениях (размер словаря)
        
    '''
    # Делаем one-hot кодирование целевых значений и решейпим по образу и подобию logits
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Считаем значение функции потери softmax cross entropy loss и возвращаем среднее значение
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

In [23]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Строим оптимизатор для обучения, используя обрезку градиента.
    
        Arguments:
        loss: значение функции потери
        learning_rate: параметр скорости обучения
    
    '''
    
    # Оптимизатор для обучения, обрезка градиента для контроля "взрывающихся" градиентов
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [24]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # Мы будем использовать эту же сеть для сэмплирования (генерации текста),
        # при этом будем подавать по одному символу за один раз
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Получаем input placeholder'ы
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Строим LSTM ячейку
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Прогоняем данные через RNN слои
        # Делаем one-hot кодирование входящих данных
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Прогоняем данные через RNN и собираем результаты
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Получаем предсказания (softmax) и результат logit-функции
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Считаем потери и оптимизируем (с обрезкой градиента)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [25]:
batch_size = 100        # Размер пакета
num_steps = 100         # Шагов в пакете
lstm_size = 512         # Количество LSTM юнитов в скрытом слое
num_layers = 2          # Количество LSTM слоев
learning_rate = 0.001   # Скорость обучения
keep_prob = 0.5         # Dropout keep probability

In [None]:
epochs = 30
# Сохраняться каждый N итераций
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Можно раскомментировать строчку ниже и продолжить обучение с checkpoint'а
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Обучаем сеть
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/30...  Training Step: 1...  Training loss: 4.9429...  0.3129 sec/batch
Epoch: 1/30...  Training Step: 2...  Training loss: 4.8686...  0.3276 sec/batch
Epoch: 1/30...  Training Step: 3...  Training loss: 4.4622...  0.2294 sec/batch
Epoch: 1/30...  Training Step: 4...  Training loss: 5.0477...  0.2836 sec/batch
Epoch: 1/30...  Training Step: 5...  Training loss: 4.1799...  0.2826 sec/batch
Epoch: 1/30...  Training Step: 6...  Training loss: 3.9392...  0.1968 sec/batch
Epoch: 1/30...  Training Step: 7...  Training loss: 3.9166...  0.1785 sec/batch
Epoch: 1/30...  Training Step: 8...  Training loss: 3.7942...  0.2283 sec/batch
Epoch: 1/30...  Training Step: 9...  Training loss: 3.7031...  0.1859 sec/batch
Epoch: 1/30...  Training Step: 10...  Training loss: 3.6427...  0.1794 sec/batch
Epoch: 1/30...  Training Step: 11...  Training loss: 3.6076...  0.2558 sec/batch
Epoch: 1/30...  Training Step: 12...  Training loss: 3.5756...  0.2708 sec/batch
Epoch: 1/30...  Training Step: 13... 

Epoch: 1/30...  Training Step: 103...  Training loss: 3.8288...  0.2437 sec/batch
Epoch: 1/30...  Training Step: 104...  Training loss: 3.7700...  0.1580 sec/batch
Epoch: 1/30...  Training Step: 105...  Training loss: 3.6463...  0.2126 sec/batch
Epoch: 1/30...  Training Step: 106...  Training loss: 3.5880...  0.1955 sec/batch
Epoch: 1/30...  Training Step: 107...  Training loss: 3.4745...  0.1558 sec/batch
Epoch: 1/30...  Training Step: 108...  Training loss: 3.3973...  0.2311 sec/batch
Epoch: 1/30...  Training Step: 109...  Training loss: 3.2636...  0.1685 sec/batch
Epoch: 1/30...  Training Step: 110...  Training loss: 3.2536...  0.2097 sec/batch
Epoch: 1/30...  Training Step: 111...  Training loss: 3.2231...  0.1792 sec/batch
Epoch: 1/30...  Training Step: 112...  Training loss: 3.2347...  0.2429 sec/batch
Epoch: 1/30...  Training Step: 113...  Training loss: 3.2414...  0.2803 sec/batch
Epoch: 1/30...  Training Step: 114...  Training loss: 3.2465...  0.2693 sec/batch
Epoch: 1/30...  

Epoch: 2/30...  Training Step: 203...  Training loss: 2.7434...  0.2690 sec/batch
Epoch: 2/30...  Training Step: 204...  Training loss: 2.7220...  0.1998 sec/batch
Epoch: 2/30...  Training Step: 205...  Training loss: 2.7286...  0.2150 sec/batch
Epoch: 2/30...  Training Step: 206...  Training loss: 2.7461...  0.2697 sec/batch
Epoch: 2/30...  Training Step: 207...  Training loss: 2.7331...  0.2774 sec/batch
Epoch: 2/30...  Training Step: 208...  Training loss: 2.7159...  0.2515 sec/batch
Epoch: 2/30...  Training Step: 209...  Training loss: 2.7216...  0.3241 sec/batch
Epoch: 2/30...  Training Step: 210...  Training loss: 2.6989...  0.2039 sec/batch
Epoch: 2/30...  Training Step: 211...  Training loss: 2.6953...  0.1740 sec/batch
Epoch: 2/30...  Training Step: 212...  Training loss: 2.6995...  0.2502 sec/batch
Epoch: 2/30...  Training Step: 213...  Training loss: 2.6720...  0.1954 sec/batch
Epoch: 2/30...  Training Step: 214...  Training loss: 2.6732...  0.2288 sec/batch
Epoch: 2/30...  

Epoch: 2/30...  Training Step: 303...  Training loss: 2.4387...  0.2277 sec/batch
Epoch: 2/30...  Training Step: 304...  Training loss: 2.4836...  0.2720 sec/batch
Epoch: 2/30...  Training Step: 305...  Training loss: 2.4754...  0.2317 sec/batch
Epoch: 2/30...  Training Step: 306...  Training loss: 2.4751...  0.2181 sec/batch
Epoch: 2/30...  Training Step: 307...  Training loss: 2.4387...  0.1569 sec/batch
Epoch: 2/30...  Training Step: 308...  Training loss: 2.4396...  0.2585 sec/batch
Epoch: 2/30...  Training Step: 309...  Training loss: 2.4463...  0.2909 sec/batch
Epoch: 2/30...  Training Step: 310...  Training loss: 2.4377...  0.1860 sec/batch
Epoch: 2/30...  Training Step: 311...  Training loss: 2.4330...  0.2384 sec/batch
Epoch: 2/30...  Training Step: 312...  Training loss: 2.4429...  0.2065 sec/batch
Epoch: 2/30...  Training Step: 313...  Training loss: 2.4358...  0.1637 sec/batch
Epoch: 2/30...  Training Step: 314...  Training loss: 2.4515...  0.1892 sec/batch
Epoch: 2/30...  

Epoch: 3/30...  Training Step: 403...  Training loss: 2.2942...  0.1646 sec/batch
Epoch: 3/30...  Training Step: 404...  Training loss: 2.2840...  0.2340 sec/batch
Epoch: 3/30...  Training Step: 405...  Training loss: 2.3145...  0.2608 sec/batch
Epoch: 3/30...  Training Step: 406...  Training loss: 2.2991...  0.2679 sec/batch
Epoch: 3/30...  Training Step: 407...  Training loss: 2.3105...  0.1857 sec/batch
Epoch: 3/30...  Training Step: 408...  Training loss: 2.2730...  0.1899 sec/batch
Epoch: 3/30...  Training Step: 409...  Training loss: 2.3078...  0.2445 sec/batch
Epoch: 3/30...  Training Step: 410...  Training loss: 2.2686...  0.2447 sec/batch
Epoch: 3/30...  Training Step: 411...  Training loss: 2.2840...  0.1581 sec/batch
Epoch: 3/30...  Training Step: 412...  Training loss: 2.2755...  0.1713 sec/batch
Epoch: 3/30...  Training Step: 413...  Training loss: 2.2984...  0.1938 sec/batch
Epoch: 3/30...  Training Step: 414...  Training loss: 2.3180...  0.2541 sec/batch
Epoch: 3/30...  

Epoch: 3/30...  Training Step: 503...  Training loss: 2.1761...  0.2585 sec/batch
Epoch: 3/30...  Training Step: 504...  Training loss: 2.1593...  0.2349 sec/batch
Epoch: 3/30...  Training Step: 505...  Training loss: 2.1649...  0.1739 sec/batch
Epoch: 3/30...  Training Step: 506...  Training loss: 2.1488...  0.1903 sec/batch
Epoch: 3/30...  Training Step: 507...  Training loss: 2.1626...  0.2363 sec/batch
Epoch: 3/30...  Training Step: 508...  Training loss: 2.1603...  0.2912 sec/batch
Epoch: 3/30...  Training Step: 509...  Training loss: 2.1852...  0.2657 sec/batch
Epoch: 3/30...  Training Step: 510...  Training loss: 2.1337...  0.2784 sec/batch
Epoch: 4/30...  Training Step: 511...  Training loss: 2.2658...  0.2355 sec/batch
Epoch: 4/30...  Training Step: 512...  Training loss: 2.1531...  0.2786 sec/batch
Epoch: 4/30...  Training Step: 513...  Training loss: 2.1655...  0.1884 sec/batch
Epoch: 4/30...  Training Step: 514...  Training loss: 2.1926...  0.1727 sec/batch
Epoch: 4/30...  

Epoch: 4/30...  Training Step: 603...  Training loss: 2.0972...  0.2297 sec/batch
Epoch: 4/30...  Training Step: 604...  Training loss: 2.1233...  0.2472 sec/batch
Epoch: 4/30...  Training Step: 605...  Training loss: 2.0740...  0.1830 sec/batch
Epoch: 4/30...  Training Step: 606...  Training loss: 2.0579...  0.1722 sec/batch
Epoch: 4/30...  Training Step: 607...  Training loss: 2.0699...  0.2218 sec/batch
Epoch: 4/30...  Training Step: 608...  Training loss: 2.0788...  0.1553 sec/batch
Epoch: 4/30...  Training Step: 609...  Training loss: 2.0810...  0.1989 sec/batch
Epoch: 4/30...  Training Step: 610...  Training loss: 2.1181...  0.2434 sec/batch
Epoch: 4/30...  Training Step: 611...  Training loss: 2.1126...  0.3072 sec/batch
Epoch: 4/30...  Training Step: 612...  Training loss: 2.0646...  0.2683 sec/batch
Epoch: 4/30...  Training Step: 613...  Training loss: 2.0645...  0.2703 sec/batch
Epoch: 4/30...  Training Step: 614...  Training loss: 2.0584...  0.2516 sec/batch
Epoch: 4/30...  

Epoch: 5/30...  Training Step: 703...  Training loss: 1.9769...  0.1838 sec/batch
Epoch: 5/30...  Training Step: 704...  Training loss: 1.9390...  0.1582 sec/batch
Epoch: 5/30...  Training Step: 705...  Training loss: 2.0007...  0.1948 sec/batch
Epoch: 5/30...  Training Step: 706...  Training loss: 1.9868...  0.2859 sec/batch
Epoch: 5/30...  Training Step: 707...  Training loss: 2.0248...  0.2783 sec/batch
Epoch: 5/30...  Training Step: 708...  Training loss: 2.0001...  0.2503 sec/batch
Epoch: 5/30...  Training Step: 709...  Training loss: 2.0001...  0.2172 sec/batch
Epoch: 5/30...  Training Step: 710...  Training loss: 1.9527...  0.2699 sec/batch
Epoch: 5/30...  Training Step: 711...  Training loss: 2.0038...  0.1817 sec/batch
Epoch: 5/30...  Training Step: 712...  Training loss: 1.9890...  0.1626 sec/batch
Epoch: 5/30...  Training Step: 713...  Training loss: 1.9686...  0.2474 sec/batch
Epoch: 5/30...  Training Step: 714...  Training loss: 1.9490...  0.2766 sec/batch
Epoch: 5/30...  

Epoch: 5/30...  Training Step: 803...  Training loss: 1.9364...  0.2510 sec/batch
Epoch: 5/30...  Training Step: 804...  Training loss: 1.9634...  0.2235 sec/batch
Epoch: 5/30...  Training Step: 805...  Training loss: 1.9092...  0.2458 sec/batch
Epoch: 5/30...  Training Step: 806...  Training loss: 1.9225...  0.2868 sec/batch
Epoch: 5/30...  Training Step: 807...  Training loss: 1.9431...  0.2166 sec/batch
Epoch: 5/30...  Training Step: 808...  Training loss: 1.9307...  0.1553 sec/batch
Epoch: 5/30...  Training Step: 809...  Training loss: 1.9446...  0.1850 sec/batch
Epoch: 5/30...  Training Step: 810...  Training loss: 1.9599...  0.1587 sec/batch
Epoch: 5/30...  Training Step: 811...  Training loss: 1.9857...  0.2592 sec/batch
Epoch: 5/30...  Training Step: 812...  Training loss: 1.9138...  0.2367 sec/batch
Epoch: 5/30...  Training Step: 813...  Training loss: 1.9231...  0.2945 sec/batch
Epoch: 5/30...  Training Step: 814...  Training loss: 1.9533...  0.2410 sec/batch
Epoch: 5/30...  

Epoch: 6/30...  Training Step: 903...  Training loss: 1.8460...  0.2285 sec/batch
Epoch: 6/30...  Training Step: 904...  Training loss: 1.8767...  0.3295 sec/batch
Epoch: 6/30...  Training Step: 905...  Training loss: 1.8567...  0.1976 sec/batch
Epoch: 6/30...  Training Step: 906...  Training loss: 1.8901...  0.2687 sec/batch
Epoch: 6/30...  Training Step: 907...  Training loss: 1.8780...  0.2934 sec/batch
Epoch: 6/30...  Training Step: 908...  Training loss: 1.8365...  0.2598 sec/batch
Epoch: 6/30...  Training Step: 909...  Training loss: 1.8759...  0.2275 sec/batch
Epoch: 6/30...  Training Step: 910...  Training loss: 1.8647...  0.2554 sec/batch
Epoch: 6/30...  Training Step: 911...  Training loss: 1.8715...  0.2484 sec/batch
Epoch: 6/30...  Training Step: 912...  Training loss: 1.8606...  0.2955 sec/batch
Epoch: 6/30...  Training Step: 913...  Training loss: 1.8457...  0.2638 sec/batch
Epoch: 6/30...  Training Step: 914...  Training loss: 1.8530...  0.2652 sec/batch
Epoch: 6/30...  

Epoch: 6/30...  Training Step: 1004...  Training loss: 1.7936...  0.1634 sec/batch
Epoch: 6/30...  Training Step: 1005...  Training loss: 1.8112...  0.3037 sec/batch
Epoch: 6/30...  Training Step: 1006...  Training loss: 1.7750...  0.1747 sec/batch
Epoch: 6/30...  Training Step: 1007...  Training loss: 1.8102...  0.1588 sec/batch
Epoch: 6/30...  Training Step: 1008...  Training loss: 1.8396...  0.1782 sec/batch
Epoch: 6/30...  Training Step: 1009...  Training loss: 1.8021...  0.2616 sec/batch
Epoch: 6/30...  Training Step: 1010...  Training loss: 1.8427...  0.2002 sec/batch
Epoch: 6/30...  Training Step: 1011...  Training loss: 1.8153...  0.2303 sec/batch
Epoch: 6/30...  Training Step: 1012...  Training loss: 1.8340...  0.2393 sec/batch
Epoch: 6/30...  Training Step: 1013...  Training loss: 1.8255...  0.2741 sec/batch
Epoch: 6/30...  Training Step: 1014...  Training loss: 1.7919...  0.2983 sec/batch
Epoch: 6/30...  Training Step: 1015...  Training loss: 1.7877...  0.2583 sec/batch
Epoc

Epoch: 7/30...  Training Step: 1103...  Training loss: 1.7879...  0.2512 sec/batch
Epoch: 7/30...  Training Step: 1104...  Training loss: 1.7659...  0.2025 sec/batch
Epoch: 7/30...  Training Step: 1105...  Training loss: 1.7872...  0.2601 sec/batch
Epoch: 7/30...  Training Step: 1106...  Training loss: 1.7565...  0.2553 sec/batch
Epoch: 7/30...  Training Step: 1107...  Training loss: 1.7911...  0.2905 sec/batch
Epoch: 7/30...  Training Step: 1108...  Training loss: 1.7878...  0.2540 sec/batch
Epoch: 7/30...  Training Step: 1109...  Training loss: 1.7903...  0.2511 sec/batch
Epoch: 7/30...  Training Step: 1110...  Training loss: 1.7330...  0.2232 sec/batch
Epoch: 7/30...  Training Step: 1111...  Training loss: 1.7602...  0.2227 sec/batch
Epoch: 7/30...  Training Step: 1112...  Training loss: 1.7500...  0.1785 sec/batch
Epoch: 7/30...  Training Step: 1113...  Training loss: 1.7902...  0.2911 sec/batch
Epoch: 7/30...  Training Step: 1114...  Training loss: 1.8060...  0.2552 sec/batch
Epoc

Epoch: 8/30...  Training Step: 1202...  Training loss: 1.7208...  0.1998 sec/batch
Epoch: 8/30...  Training Step: 1203...  Training loss: 1.7180...  0.1656 sec/batch
Epoch: 8/30...  Training Step: 1204...  Training loss: 1.7084...  0.2093 sec/batch
Epoch: 8/30...  Training Step: 1205...  Training loss: 1.7774...  0.2004 sec/batch
Epoch: 8/30...  Training Step: 1206...  Training loss: 1.7729...  0.3042 sec/batch
Epoch: 8/30...  Training Step: 1207...  Training loss: 1.6980...  0.2991 sec/batch
Epoch: 8/30...  Training Step: 1208...  Training loss: 1.7352...  0.2195 sec/batch
Epoch: 8/30...  Training Step: 1209...  Training loss: 1.7267...  0.1726 sec/batch
Epoch: 8/30...  Training Step: 1210...  Training loss: 1.7193...  0.2203 sec/batch
Epoch: 8/30...  Training Step: 1211...  Training loss: 1.7331...  0.2078 sec/batch
Epoch: 8/30...  Training Step: 1212...  Training loss: 1.6863...  0.1702 sec/batch
Epoch: 8/30...  Training Step: 1213...  Training loss: 1.6971...  0.1579 sec/batch
Epoc

Epoch: 8/30...  Training Step: 1301...  Training loss: 1.6882...  0.2484 sec/batch
Epoch: 8/30...  Training Step: 1302...  Training loss: 1.6665...  0.2628 sec/batch
Epoch: 8/30...  Training Step: 1303...  Training loss: 1.6981...  0.2665 sec/batch
Epoch: 8/30...  Training Step: 1304...  Training loss: 1.7146...  0.2766 sec/batch
Epoch: 8/30...  Training Step: 1305...  Training loss: 1.7181...  0.2289 sec/batch
Epoch: 8/30...  Training Step: 1306...  Training loss: 1.7075...  0.2451 sec/batch
Epoch: 8/30...  Training Step: 1307...  Training loss: 1.6759...  0.2501 sec/batch
Epoch: 8/30...  Training Step: 1308...  Training loss: 1.7156...  0.2157 sec/batch
Epoch: 8/30...  Training Step: 1309...  Training loss: 1.6784...  0.2392 sec/batch
Epoch: 8/30...  Training Step: 1310...  Training loss: 1.7087...  0.2423 sec/batch
Epoch: 8/30...  Training Step: 1311...  Training loss: 1.6708...  0.2057 sec/batch
Epoch: 8/30...  Training Step: 1312...  Training loss: 1.6844...  0.2959 sec/batch
Epoc

Epoch: 9/30...  Training Step: 1400...  Training loss: 1.6848...  0.2042 sec/batch
Epoch: 9/30...  Training Step: 1401...  Training loss: 1.6859...  0.1825 sec/batch
Epoch: 9/30...  Training Step: 1402...  Training loss: 1.6544...  0.2551 sec/batch
Epoch: 9/30...  Training Step: 1403...  Training loss: 1.6489...  0.1808 sec/batch
Epoch: 9/30...  Training Step: 1404...  Training loss: 1.7095...  0.2233 sec/batch
Epoch: 9/30...  Training Step: 1405...  Training loss: 1.6814...  0.2278 sec/batch
Epoch: 9/30...  Training Step: 1406...  Training loss: 1.6533...  0.2506 sec/batch
Epoch: 9/30...  Training Step: 1407...  Training loss: 1.6876...  0.1853 sec/batch
Epoch: 9/30...  Training Step: 1408...  Training loss: 1.6815...  0.1660 sec/batch
Epoch: 9/30...  Training Step: 1409...  Training loss: 1.6776...  0.1964 sec/batch
Epoch: 9/30...  Training Step: 1410...  Training loss: 1.6838...  0.2802 sec/batch
Epoch: 9/30...  Training Step: 1411...  Training loss: 1.7223...  0.2847 sec/batch
Epoc

Epoch: 9/30...  Training Step: 1499...  Training loss: 1.6259...  0.2452 sec/batch
Epoch: 9/30...  Training Step: 1500...  Training loss: 1.6439...  0.2967 sec/batch
Epoch: 9/30...  Training Step: 1501...  Training loss: 1.6405...  0.2061 sec/batch
Epoch: 9/30...  Training Step: 1502...  Training loss: 1.6422...  0.1885 sec/batch
Epoch: 9/30...  Training Step: 1503...  Training loss: 1.6270...  0.2647 sec/batch
Epoch: 9/30...  Training Step: 1504...  Training loss: 1.6420...  0.1843 sec/batch
Epoch: 9/30...  Training Step: 1505...  Training loss: 1.6321...  0.2723 sec/batch
Epoch: 9/30...  Training Step: 1506...  Training loss: 1.5968...  0.2165 sec/batch
Epoch: 9/30...  Training Step: 1507...  Training loss: 1.6623...  0.2759 sec/batch
Epoch: 9/30...  Training Step: 1508...  Training loss: 1.6353...  0.1991 sec/batch
Epoch: 9/30...  Training Step: 1509...  Training loss: 1.6356...  0.2861 sec/batch
Epoch: 9/30...  Training Step: 1510...  Training loss: 1.6433...  0.1967 sec/batch
Epoc

Epoch: 10/30...  Training Step: 1597...  Training loss: 1.6456...  0.2010 sec/batch
Epoch: 10/30...  Training Step: 1598...  Training loss: 1.6100...  0.2768 sec/batch
Epoch: 10/30...  Training Step: 1599...  Training loss: 1.6119...  0.3001 sec/batch
Epoch: 10/30...  Training Step: 1600...  Training loss: 1.6152...  0.2578 sec/batch
Epoch: 10/30...  Training Step: 1601...  Training loss: 1.5864...  0.2546 sec/batch
Epoch: 10/30...  Training Step: 1602...  Training loss: 1.6221...  0.3229 sec/batch
Epoch: 10/30...  Training Step: 1603...  Training loss: 1.6236...  0.2028 sec/batch
Epoch: 10/30...  Training Step: 1604...  Training loss: 1.6400...  0.2530 sec/batch
Epoch: 10/30...  Training Step: 1605...  Training loss: 1.6361...  0.1895 sec/batch
Epoch: 10/30...  Training Step: 1606...  Training loss: 1.5846...  0.2248 sec/batch
Epoch: 10/30...  Training Step: 1607...  Training loss: 1.6155...  0.1675 sec/batch
Epoch: 10/30...  Training Step: 1608...  Training loss: 1.6289...  0.2344 se

Epoch: 10/30...  Training Step: 1695...  Training loss: 1.5649...  0.2583 sec/batch
Epoch: 10/30...  Training Step: 1696...  Training loss: 1.5695...  0.2342 sec/batch
Epoch: 10/30...  Training Step: 1697...  Training loss: 1.5885...  0.2432 sec/batch
Epoch: 10/30...  Training Step: 1698...  Training loss: 1.5895...  0.2428 sec/batch
Epoch: 10/30...  Training Step: 1699...  Training loss: 1.6033...  0.2475 sec/batch
Epoch: 10/30...  Training Step: 1700...  Training loss: 1.5742...  0.2302 sec/batch
Epoch: 11/30...  Training Step: 1701...  Training loss: 1.6647...  0.1942 sec/batch
Epoch: 11/30...  Training Step: 1702...  Training loss: 1.5638...  0.1974 sec/batch
Epoch: 11/30...  Training Step: 1703...  Training loss: 1.5824...  0.1652 sec/batch
Epoch: 11/30...  Training Step: 1704...  Training loss: 1.6133...  0.2819 sec/batch
Epoch: 11/30...  Training Step: 1705...  Training loss: 1.6119...  0.2119 sec/batch
Epoch: 11/30...  Training Step: 1706...  Training loss: 1.6265...  0.1901 se

Epoch: 11/30...  Training Step: 1793...  Training loss: 1.5974...  0.2430 sec/batch
Epoch: 11/30...  Training Step: 1794...  Training loss: 1.6172...  0.2035 sec/batch
Epoch: 11/30...  Training Step: 1795...  Training loss: 1.5715...  0.2758 sec/batch
Epoch: 11/30...  Training Step: 1796...  Training loss: 1.5503...  0.2516 sec/batch
Epoch: 11/30...  Training Step: 1797...  Training loss: 1.5429...  0.2929 sec/batch
Epoch: 11/30...  Training Step: 1798...  Training loss: 1.5628...  0.2576 sec/batch
Epoch: 11/30...  Training Step: 1799...  Training loss: 1.5699...  0.2861 sec/batch
Epoch: 11/30...  Training Step: 1800...  Training loss: 1.6261...  0.2283 sec/batch
Epoch: 11/30...  Training Step: 1801...  Training loss: 1.6161...  0.1795 sec/batch
Epoch: 11/30...  Training Step: 1802...  Training loss: 1.5819...  0.2460 sec/batch
Epoch: 11/30...  Training Step: 1803...  Training loss: 1.5760...  0.1898 sec/batch
Epoch: 11/30...  Training Step: 1804...  Training loss: 1.5823...  0.1628 se

Epoch: 12/30...  Training Step: 1891...  Training loss: 1.5587...  0.2017 sec/batch
Epoch: 12/30...  Training Step: 1892...  Training loss: 1.5150...  0.3013 sec/batch
Epoch: 12/30...  Training Step: 1893...  Training loss: 1.5288...  0.2845 sec/batch
Epoch: 12/30...  Training Step: 1894...  Training loss: 1.5204...  0.2333 sec/batch
Epoch: 12/30...  Training Step: 1895...  Training loss: 1.5714...  0.2406 sec/batch
Epoch: 12/30...  Training Step: 1896...  Training loss: 1.5496...  0.3087 sec/batch
Epoch: 12/30...  Training Step: 1897...  Training loss: 1.5756...  0.2437 sec/batch
Epoch: 12/30...  Training Step: 1898...  Training loss: 1.5794...  0.2721 sec/batch
Epoch: 12/30...  Training Step: 1899...  Training loss: 1.5492...  0.1888 sec/batch
Epoch: 12/30...  Training Step: 1900...  Training loss: 1.5241...  0.1598 sec/batch
Epoch: 12/30...  Training Step: 1901...  Training loss: 1.5539...  0.1661 sec/batch
Epoch: 12/30...  Training Step: 1902...  Training loss: 1.5294...  0.2179 se

Epoch: 12/30...  Training Step: 1989...  Training loss: 1.5240...  0.2606 sec/batch
Epoch: 12/30...  Training Step: 1990...  Training loss: 1.5480...  0.2535 sec/batch
Epoch: 12/30...  Training Step: 1991...  Training loss: 1.5240...  0.2017 sec/batch
Epoch: 12/30...  Training Step: 1992...  Training loss: 1.5358...  0.2351 sec/batch
Epoch: 12/30...  Training Step: 1993...  Training loss: 1.5521...  0.1957 sec/batch
Epoch: 12/30...  Training Step: 1994...  Training loss: 1.5764...  0.1557 sec/batch
Epoch: 12/30...  Training Step: 1995...  Training loss: 1.5186...  0.2588 sec/batch
Epoch: 12/30...  Training Step: 1996...  Training loss: 1.5269...  0.2325 sec/batch
Epoch: 12/30...  Training Step: 1997...  Training loss: 1.5429...  0.2561 sec/batch
Epoch: 12/30...  Training Step: 1998...  Training loss: 1.5528...  0.1751 sec/batch
Epoch: 12/30...  Training Step: 1999...  Training loss: 1.5439...  0.2815 sec/batch
Epoch: 12/30...  Training Step: 2000...  Training loss: 1.5802...  0.2402 se

Epoch: 13/30...  Training Step: 2088...  Training loss: 1.5483...  0.2532 sec/batch
Epoch: 13/30...  Training Step: 2089...  Training loss: 1.5276...  0.1976 sec/batch
Epoch: 13/30...  Training Step: 2090...  Training loss: 1.5351...  0.2258 sec/batch
Epoch: 13/30...  Training Step: 2091...  Training loss: 1.5862...  0.2459 sec/batch
Epoch: 13/30...  Training Step: 2092...  Training loss: 1.5060...  0.2803 sec/batch
Epoch: 13/30...  Training Step: 2093...  Training loss: 1.5060...  0.2652 sec/batch
Epoch: 13/30...  Training Step: 2094...  Training loss: 1.5061...  0.2289 sec/batch
Epoch: 13/30...  Training Step: 2095...  Training loss: 1.5218...  0.2524 sec/batch
Epoch: 13/30...  Training Step: 2096...  Training loss: 1.5409...  0.1886 sec/batch
Epoch: 13/30...  Training Step: 2097...  Training loss: 1.5292...  0.2158 sec/batch
Epoch: 13/30...  Training Step: 2098...  Training loss: 1.5006...  0.2182 sec/batch
Epoch: 13/30...  Training Step: 2099...  Training loss: 1.5477...  0.1775 se

Epoch: 13/30...  Training Step: 2186...  Training loss: 1.4677...  0.2402 sec/batch
Epoch: 13/30...  Training Step: 2187...  Training loss: 1.5278...  0.1871 sec/batch
Epoch: 13/30...  Training Step: 2188...  Training loss: 1.5211...  0.2959 sec/batch
Epoch: 13/30...  Training Step: 2189...  Training loss: 1.5115...  0.2295 sec/batch
Epoch: 13/30...  Training Step: 2190...  Training loss: 1.4970...  0.2079 sec/batch
Epoch: 13/30...  Training Step: 2191...  Training loss: 1.5188...  0.2626 sec/batch
Epoch: 13/30...  Training Step: 2192...  Training loss: 1.5297...  0.2117 sec/batch
Epoch: 13/30...  Training Step: 2193...  Training loss: 1.5093...  0.1543 sec/batch
Epoch: 13/30...  Training Step: 2194...  Training loss: 1.4940...  0.1703 sec/batch
Epoch: 13/30...  Training Step: 2195...  Training loss: 1.4932...  0.1973 sec/batch
Epoch: 13/30...  Training Step: 2196...  Training loss: 1.4777...  0.1765 sec/batch
Epoch: 13/30...  Training Step: 2197...  Training loss: 1.4968...  0.2117 se

Epoch: 14/30...  Training Step: 2284...  Training loss: 1.5348...  0.2692 sec/batch
Epoch: 14/30...  Training Step: 2285...  Training loss: 1.5247...  0.1838 sec/batch
Epoch: 14/30...  Training Step: 2286...  Training loss: 1.4645...  0.1873 sec/batch
Epoch: 14/30...  Training Step: 2287...  Training loss: 1.4803...  0.2339 sec/batch
Epoch: 14/30...  Training Step: 2288...  Training loss: 1.5100...  0.2619 sec/batch
Epoch: 14/30...  Training Step: 2289...  Training loss: 1.5155...  0.1892 sec/batch
Epoch: 14/30...  Training Step: 2290...  Training loss: 1.5274...  0.2430 sec/batch
Epoch: 14/30...  Training Step: 2291...  Training loss: 1.5186...  0.2784 sec/batch
Epoch: 14/30...  Training Step: 2292...  Training loss: 1.5113...  0.1851 sec/batch
Epoch: 14/30...  Training Step: 2293...  Training loss: 1.5120...  0.2205 sec/batch
Epoch: 14/30...  Training Step: 2294...  Training loss: 1.4860...  0.1726 sec/batch
Epoch: 14/30...  Training Step: 2295...  Training loss: 1.5064...  0.1581 se

Epoch: 15/30...  Training Step: 2382...  Training loss: 1.4531...  0.2511 sec/batch
Epoch: 15/30...  Training Step: 2383...  Training loss: 1.4649...  0.2265 sec/batch
Epoch: 15/30...  Training Step: 2384...  Training loss: 1.5084...  0.2237 sec/batch
Epoch: 15/30...  Training Step: 2385...  Training loss: 1.5011...  0.2492 sec/batch
Epoch: 15/30...  Training Step: 2386...  Training loss: 1.5124...  0.2409 sec/batch
Epoch: 15/30...  Training Step: 2387...  Training loss: 1.4827...  0.2577 sec/batch
Epoch: 15/30...  Training Step: 2388...  Training loss: 1.4851...  0.2236 sec/batch
Epoch: 15/30...  Training Step: 2389...  Training loss: 1.4556...  0.2620 sec/batch
Epoch: 15/30...  Training Step: 2390...  Training loss: 1.4586...  0.2688 sec/batch
Epoch: 15/30...  Training Step: 2391...  Training loss: 1.4510...  0.2335 sec/batch
Epoch: 15/30...  Training Step: 2392...  Training loss: 1.4749...  0.1828 sec/batch
Epoch: 15/30...  Training Step: 2393...  Training loss: 1.4664...  0.1630 se

Epoch: 15/30...  Training Step: 2480...  Training loss: 1.5155...  0.1789 sec/batch
Epoch: 15/30...  Training Step: 2481...  Training loss: 1.5203...  0.2294 sec/batch
Epoch: 15/30...  Training Step: 2482...  Training loss: 1.4935...  0.2303 sec/batch
Epoch: 15/30...  Training Step: 2483...  Training loss: 1.4777...  0.2335 sec/batch
Epoch: 15/30...  Training Step: 2484...  Training loss: 1.4736...  0.2309 sec/batch
Epoch: 15/30...  Training Step: 2485...  Training loss: 1.4686...  0.1892 sec/batch
Epoch: 15/30...  Training Step: 2486...  Training loss: 1.4681...  0.1993 sec/batch
Epoch: 15/30...  Training Step: 2487...  Training loss: 1.4768...  0.2674 sec/batch
Epoch: 15/30...  Training Step: 2488...  Training loss: 1.4548...  0.1726 sec/batch
Epoch: 15/30...  Training Step: 2489...  Training loss: 1.4936...  0.1699 sec/batch
Epoch: 15/30...  Training Step: 2490...  Training loss: 1.4564...  0.2465 sec/batch
Epoch: 15/30...  Training Step: 2491...  Training loss: 1.4642...  0.2516 se

Epoch: 16/30...  Training Step: 2578...  Training loss: 1.4805...  0.2208 sec/batch
Epoch: 16/30...  Training Step: 2579...  Training loss: 1.4549...  0.1809 sec/batch
Epoch: 16/30...  Training Step: 2580...  Training loss: 1.4301...  0.2078 sec/batch
Epoch: 16/30...  Training Step: 2581...  Training loss: 1.4569...  0.2071 sec/batch
Epoch: 16/30...  Training Step: 2582...  Training loss: 1.4442...  0.2007 sec/batch
Epoch: 16/30...  Training Step: 2583...  Training loss: 1.4598...  0.2233 sec/batch
Epoch: 16/30...  Training Step: 2584...  Training loss: 1.4386...  0.1767 sec/batch
Epoch: 16/30...  Training Step: 2585...  Training loss: 1.4437...  0.2486 sec/batch
Epoch: 16/30...  Training Step: 2586...  Training loss: 1.4564...  0.2319 sec/batch
Epoch: 16/30...  Training Step: 2587...  Training loss: 1.4955...  0.2415 sec/batch
Epoch: 16/30...  Training Step: 2588...  Training loss: 1.4811...  0.2508 sec/batch
Epoch: 16/30...  Training Step: 2589...  Training loss: 1.4550...  0.2090 se

Epoch: 16/30...  Training Step: 2676...  Training loss: 1.4425...  0.2015 sec/batch
Epoch: 16/30...  Training Step: 2677...  Training loss: 1.4571...  0.2012 sec/batch
Epoch: 16/30...  Training Step: 2678...  Training loss: 1.4602...  0.1883 sec/batch
Epoch: 16/30...  Training Step: 2679...  Training loss: 1.4649...  0.1662 sec/batch
Epoch: 16/30...  Training Step: 2680...  Training loss: 1.4889...  0.2281 sec/batch
Epoch: 16/30...  Training Step: 2681...  Training loss: 1.4970...  0.2684 sec/batch
Epoch: 16/30...  Training Step: 2682...  Training loss: 1.4249...  0.2506 sec/batch
Epoch: 16/30...  Training Step: 2683...  Training loss: 1.4302...  0.2199 sec/batch
Epoch: 16/30...  Training Step: 2684...  Training loss: 1.4837...  0.2130 sec/batch
Epoch: 16/30...  Training Step: 2685...  Training loss: 1.4511...  0.1697 sec/batch
Epoch: 16/30...  Training Step: 2686...  Training loss: 1.4879...  0.1657 sec/batch
Epoch: 16/30...  Training Step: 2687...  Training loss: 1.4304...  0.2276 se

Epoch: 17/30...  Training Step: 2774...  Training loss: 1.4278...  0.2417 sec/batch
Epoch: 17/30...  Training Step: 2775...  Training loss: 1.4514...  0.2593 sec/batch
Epoch: 17/30...  Training Step: 2776...  Training loss: 1.4654...  0.2266 sec/batch
Epoch: 17/30...  Training Step: 2777...  Training loss: 1.4515...  0.2270 sec/batch
Epoch: 17/30...  Training Step: 2778...  Training loss: 1.4179...  0.2022 sec/batch
Epoch: 17/30...  Training Step: 2779...  Training loss: 1.4642...  0.2007 sec/batch
Epoch: 17/30...  Training Step: 2780...  Training loss: 1.4579...  0.2643 sec/batch
Epoch: 17/30...  Training Step: 2781...  Training loss: 1.4464...  0.1935 sec/batch
Epoch: 17/30...  Training Step: 2782...  Training loss: 1.4319...  0.2011 sec/batch
Epoch: 17/30...  Training Step: 2783...  Training loss: 1.4266...  0.2335 sec/batch
Epoch: 17/30...  Training Step: 2784...  Training loss: 1.4428...  0.2175 sec/batch
Epoch: 17/30...  Training Step: 2785...  Training loss: 1.4614...  0.1846 se

Epoch: 17/30...  Training Step: 2873...  Training loss: 1.4346...  0.2119 sec/batch
Epoch: 17/30...  Training Step: 2874...  Training loss: 1.4156...  0.1789 sec/batch
Epoch: 17/30...  Training Step: 2875...  Training loss: 1.4138...  0.2171 sec/batch
Epoch: 17/30...  Training Step: 2876...  Training loss: 1.3976...  0.2849 sec/batch
Epoch: 17/30...  Training Step: 2877...  Training loss: 1.4239...  0.1885 sec/batch
Epoch: 17/30...  Training Step: 2878...  Training loss: 1.4357...  0.2147 sec/batch
Epoch: 17/30...  Training Step: 2879...  Training loss: 1.4149...  0.1570 sec/batch
Epoch: 17/30...  Training Step: 2880...  Training loss: 1.4522...  0.2262 sec/batch
Epoch: 17/30...  Training Step: 2881...  Training loss: 1.4230...  0.2540 sec/batch
Epoch: 17/30...  Training Step: 2882...  Training loss: 1.4554...  0.2389 sec/batch
Epoch: 17/30...  Training Step: 2883...  Training loss: 1.4318...  0.1762 sec/batch
Epoch: 17/30...  Training Step: 2884...  Training loss: 1.4283...  0.1608 se

Epoch: 18/30...  Training Step: 2971...  Training loss: 1.4451...  0.2435 sec/batch
Epoch: 18/30...  Training Step: 2972...  Training loss: 1.4326...  0.1682 sec/batch
Epoch: 18/30...  Training Step: 2973...  Training loss: 1.4416...  0.1586 sec/batch
Epoch: 18/30...  Training Step: 2974...  Training loss: 1.4038...  0.1959 sec/batch
Epoch: 18/30...  Training Step: 2975...  Training loss: 1.4357...  0.2058 sec/batch
Epoch: 18/30...  Training Step: 2976...  Training loss: 1.4139...  0.1718 sec/batch
Epoch: 18/30...  Training Step: 2977...  Training loss: 1.4355...  0.2481 sec/batch
Epoch: 18/30...  Training Step: 2978...  Training loss: 1.4507...  0.2183 sec/batch
Epoch: 18/30...  Training Step: 2979...  Training loss: 1.4484...  0.2244 sec/batch
Epoch: 18/30...  Training Step: 2980...  Training loss: 1.4039...  0.1541 sec/batch
Epoch: 18/30...  Training Step: 2981...  Training loss: 1.4120...  0.2127 sec/batch
Epoch: 18/30...  Training Step: 2982...  Training loss: 1.4064...  0.1799 se

In [27]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/i1000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1000_l512.ckpt"

In [28]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [29]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="Гостиная Анны Павловны начала понемногу наполняться."):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [30]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab))
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i1000_l512.ckpt
Гостиная Анны Павловны начала понемногу наполняться.

Семей придужалось в песелица и своех серделовости с теперь не выславится и высольным не выпоснимаясь, и она была одной совершенно вы сказала ничего она сказал на совериенностью в доменной слишкого слиданным полежалим, от намило и пать и ответа и с как было понимал, он не все помочула и, привав им в постерной. Она вишал настольно престили его продотные волости, к тому которые он странное поледу, что предстание оточно столо вставался на том, что она ничестна не мыгатеть. Она все стала оставал вся себя верово не межно и непрастние свои поднавшие стало. Он получал совели встому, что он не случно непись на подошло его не могла. Она было слешить ни слывали, которое приведить ей, на теперь своима и придалась из к него с своим с нимименния, подомнила она сона понавалась своюго стерным, кордаты, как начто беселие возно все друго и в думой весловие все все весело, ни он при