## Neural Tolstoy Model (a character-prediction LSTM)

In this part you will train a character prediction LSTM that predicts the next character given previous characters. 
The training data is from the book "War and Peace" by Leo Tolstoy. 

In [11]:
from __future__ import print_function
import datetime
import os
import numpy as np
import tensorflow as tf
import tolstoy_reader

def get_default_gpu_session(fraction=0.333):
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = fraction
    return tf.Session(config=config)

def run_tolstoy_train(n_hid):
    # generate data
    btg, map_dict, backmap_dict = \
        tolstoy_reader.batch_tolstoy_generator(batch_size=200, seq_size=100)
    shape = dict(n_steps_per_batch=100, n_unique_ids=len(map_dict), n_hidden_dim=n_hid)
    # define LSTM
    model = build_lstm_discrete_prediction_model(shape)
    
    #logdir = './tensorboard/tolstoy'  # if on Windows
    logdir = 'tolstoy'  # if on Unix
    try:
        os.makedirs(logdir)
    except os.error:
        pass
    time_now = datetime.datetime.now().strftime("%d-%b-%H-%M-%S")
    run_name = 'hidden=%d' % n_hid
    sum_path = os.path.join(logdir, run_name + '_' + time_now)
    print(sum_path)
    max_iter_i = 0
    with model['graph'].as_default() as g, get_default_gpu_session(0.9) as sess:
        sess.run(tf.global_variables_initializer())
        sum_writer = tf.summary.FileWriter(sum_path, g)
        for epoch_i in range(10):
            for iter_i, data_batch in enumerate(btg):
                max_iter_i = max(iter_i, max_iter_i)
                global_step = epoch_i*max_iter_i+iter_i
                
                # run training step
                train_feed_dict = dict(zip(model['inputs'], data_batch))
                to_compute = [model['train_op'], model['summ'], model['loss']]
                _, summ, loss_val = sess.run(to_compute, train_feed_dict)
                
                # for tensorboard
                sum_writer.add_summary(summ, global_step)
                sum_writer.flush()
                
                # display loss
                if iter_i % 100 == 0:
                    print(loss_val, end=', ')
                if iter_i % 1000:
                    continue
                # test generation
                pred_length = 50
                data_input = next(iter(btg))[0][[0]]
                original_sample = data_input.copy()
                pred_seq = []
                for _ in range(pred_length):
                    pred = sess.run(model['pred'], {model['inputs'][0]: data_input})
                    pred_seq.append(pred[0])
                    data_input = np.roll(data_input, -1, axis=1)
                    data_input[0, -1] = pred[0]
                print('[%d] Input text:' % (iter_i))
                print(''.join([backmap_dict[x] for x in original_sample[0]]))
                print('[%d] Generated continuation:' % (iter_i))
                print(''.join([backmap_dict[x] for x in pred_seq]))
                print(pred_seq)
                print()
                
def build_lstm_discrete_prediction_model(shape):
    # shape is dict with keys:
    # n_steps_per_batch, n_unique_ids, n_hidden_dim
    with tf.Graph().as_default() as g:
        X = tf.placeholder(tf.float32, [None, shape['n_steps_per_batch']])
        y = tf.placeholder(tf.int32, [None])
        
        ################################################################
        ####################   PUT YOUR CODE HERE   ####################
        # define LSTM parameters (scope: weights)
        with tf.variable_scope('weights'):
            
            w_i = tf.get_variable('W_i', [1,shape['n_hidden_dim']])
            w_f = tf.get_variable('W_f', [1,shape['n_hidden_dim']])
            w_c = tf.get_variable('W_c', [1,shape['n_hidden_dim']])
            w_o = tf.get_variable('W_o', [1,shape['n_hidden_dim']])
            
            u_i = tf.get_variable('U_i', [shape['n_hidden_dim'] , shape['n_hidden_dim']])
            u_f = tf.get_variable('U_f', [shape['n_hidden_dim'] , shape['n_hidden_dim']])
            u_c = tf.get_variable('U_c', [shape['n_hidden_dim'] , shape['n_hidden_dim']])
            u_o = tf.get_variable('U_o', [shape['n_hidden_dim'] , shape['n_hidden_dim']])
            
            v_o = tf.get_variable('V_o', [shape['n_hidden_dim'] , shape['n_hidden_dim']])
            
            w_y = tf.get_variable('W_y', [shape['n_hidden_dim'] , shape['n_unique_ids']])
            
            h_0 = tf.get_variable('h_0', [1,shape['n_hidden_dim']])
            c_0 = tf.get_variable('c_0' , [1,shape['n_hidden_dim']])
            
            c_t = c_0
            h_t = h_0
            for t in range(shape['n_steps_per_batch']):
                xt = X[:,t]
                x_t = tf.reshape(xt,[-1,1])
                i_t = tf.nn.sigmoid(tf.matmul(x_t,w_i) + tf.matmul (h_t,u_i))
                c_bar_t = tf.nn.tanh(tf.matmul(x_t,w_c) + tf.matmul (h_t,u_c))
                f_t = tf.nn.sigmoid(tf.matmul(x_t,w_f) + tf.matmul (h_t,u_f))
                c_t = tf.multiply(i_t,c_bar_t) + tf.multiply(f_t,c_t)
                o_t = tf.nn.sigmoid(tf.matmul(x_t,w_o) + tf.matmul (h_t,u_o) + tf.matmul(c_t,v_o))
                h_t = tf.multiply(o_t,tf.tanh(c_t))   
            
        
        output = h_t  # put your result in variable 'output' 
            
            
        logits = tf.matmul(h_t,w_y)  # compute logits for each discrete output
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
        loss = tf.reduce_mean(losses)  
        ################################################################

        # pred, train_op
        pred = tf.argmax(logits, axis=1)
        train_op = tf.train.AdamOptimizer().minimize(loss)
        summ = tf.summary.scalar('loss_summ', loss)

    return {'inputs': [X, y], 'loss': loss, 'train_op': train_op, 'summ': summ,
            'graph': g, 'pred': pred}

Run the LSTM and see what it says! 

In [12]:
hidden = 200
run_tolstoy_train(hidden)

tolstoy/hidden=200_24-Apr-21-03-36
4.43514, [0] Input text:
 weason to ask to be pwomoted to be a German! Now p'waps Wussians will get a look in. As it was, dev
[0] Generated continuation:
F(F(FFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
[74, 39, 74, 39, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76]

2.98381, 2.8111, 2.79532, 2.7886, 2.78853, 2.71783, 2.59134, 2.72734, 2.54428, 2.49091, [1000] Input text:
he real relation of the forces had already made itself felt in Petersburg, was dispatched, Kutuzov h
[1000] Generated continuation:
is the the the the the the the the the the the the
[79, 22, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38, 11, 69, 43, 38]

2.6716, 2.26178, 2.32184, 2.30663, 2.18396, 2.25039, 2.37151, 2.39