In [1]:
import time

import numpy as np
import tensorflow as tf
import rnndatasets.warandpeace as data

In [2]:
# make a super quick model
def get_lstm_model(input_data, shape, num_outputs, sequence_length, batch_size):
    """gets an lstm model with input projection and the given number of outputs"""
    cells = [tf.nn.rnn_cell.BasicLSTMCell(layer) for layer in shape]
    # no dropout, we are ging super simple
    cell = tf.nn.rnn_cell.MultiRNNCell(cells)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    with tf.device('/cpu:0'):
        # do the embedding on the cpu always
        # same outs as ins
        embedding = tf.get_variable('embedding', [num_outputs, shape[0]])
        inputs = tf.nn.embedding_lookup(embedding, input_data)
    
    inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequence_length, inputs)]
    # inputs = tf.split(1, sequence_length, inputs)
    outputs, state = tf.nn.rnn(cell, inputs, initial_state=initial_state)
    outputs = tf.reshape(tf.concat(1, outputs), [-1, shape[-1]])  # turn list of outputs into a big tensor
    # now do the output projection
    softmax_w = tf.get_variable('softmax_w', [shape[-1], num_outputs])
    softmax_b = tf.get_variable('softmax_b', [num_outputs])
    logits = tf.matmul(outputs, softmax_w) + softmax_b
    return initial_state, logits, state

# make a super quick model
def get_rnn_model(input_data, shape, num_outputs, sequence_length, batch_size):
    """gets an lstm model with input projection and the given number of outputs"""
    cells = [tf.nn.rnn_cell.BasicRNNCell(layer) for layer in shape]
    # no dropout, we are ging super simple
    cell = tf.nn.rnn_cell.MultiRNNCell(cells)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    with tf.device('/cpu:0'):
        # do the embedding on the cpu always
        # same outs as ins
        embedding = tf.get_variable('embedding', [num_outputs, shape[0]])
        inputs = tf.nn.embedding_lookup(embedding, input_data)
    
    inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequence_length, inputs)]
    # inputs = tf.split(1, sequence_length, inputs)
    outputs, state = tf.nn.rnn(cell, inputs, initial_state=initial_state)
    outputs = tf.reshape(tf.concat(1, outputs), [-1, shape[-1]])  # turn list of outputs into a big tensor
    # now do the output projection
    softmax_w = tf.get_variable('softmax_w', [shape[-1], num_outputs])
    softmax_b = tf.get_variable('softmax_b', [num_outputs])
    logits = tf.matmul(outputs, softmax_w) + softmax_b
    return initial_state, logits, state

In [3]:
# get loss for a model given targets
def get_loss(logits, targets, batch_size, sequence_length):
    """cross entropy, because text"""
    loss =  tf.nn.seq2seq.sequence_loss_by_example([logits],
                                                   [tf.reshape(targets, [-1])],
                                                   [tf.ones([batch_size * sequence_length])])
    loss = tf.reduce_sum(loss) / batch_size
    return loss

In [4]:
# get an op to do a step of descent on all trainable variables defined
def train_op(loss, learning_rate, max_grad_norm=5):
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                      max_grad_norm)
    opt = tf.train.AdamOptimizer(learning_rate)
    return opt.apply_gradients(zip(grads, tvars))

In [5]:
def run_epoch(session, inputs, targets, initial_state, final_state, loss, data_iter, eval_op):
    """run the model on some data"""
    state = initial_state.eval()
    costs = 0
    steps = 0
    start = time.time()
    for progress, batch in data_iter:
        batch.append(batch[0])
        in_vals = batch[0:-1]
        target_vals = batch[1:]
        cost, state, _ = session.run(
            [loss, final_state, eval_op],
            {inputs: in_vals,
             targets: target_vals,
             initial_state: state})
        costs += cost
        steps += len(in_vals)
        
        print('\r({:.3f}) perplexity: {:.3f} ({:.0f} cps)'.format(
            progress, np.exp(costs/steps), steps * batch[0].shape[0] / (time.time() - start)),
             end='')
        

In [9]:
# (very slowly) make some samples
# this is really dumb,
# should just rip out tf.nn.rnn and set it up so the input is the last output
# then we could unroll it and get the sequence all at once
import random
def print_sample(session, inputs, initial_state, output, final_state, length, vocab):
    state = initial_state.eval()
    in_data = np.array(vocab[np.random.choice(list(vocab.keys()))]).reshape((1, 1))
    int_to_char = {int(vocab[key]): key for key in vocab}
    print('making a sample')
    sample = []
    char_probs = tf.nn.softmax(output)
    for i in range(length):
        probs = sess.run([char_probs],
                         {initial_state: state,
                          inputs: in_data})
        probs = probs[0].flatten()
        probs = probs / probs.sum()
        char_idx = np.random.multinomial(1, probs, size=(1,))
        char_idx = np.argmax(char_idx)
        in_data = np.array(char_idx).reshape((1, 1))
        sample.append(int_to_char[char_idx])
    result = ''.join(sample)
    print(result)
    return result

In [None]:
tf.reset_default_graph()

batch_size = 100
sequence_length = 200
shape = [64,64]

in_var = tf.placeholder(tf.int32, [batch_size, sequence_length])
target_var = tf.placeholder(tf.int32, [batch_size, sequence_length])

sample_in_var = tf.placeholder(tf.int32, [1, 1])

vocab = data.get_vocab('char')

print('getting model...', end='')
with tf.variable_scope('rnn_model') as scope:
    initial_state, outputs, final_state = get_lstm_model(in_var, shape, len(vocab), sequence_length, batch_size)
    # get a one step at a time model to generate some samplies
    print('...', end='')
    scope.reuse_variables()
    initial_state, output_1, final_state_1 = get_lstm_model(sample_in_var, shape, len(vocab), 1, 1)
print('...', end='')
loss_op = get_loss(outputs, target_var, batch_size, sequence_length)
print('...', end='')
update_weights = train_op(loss_op, 0.05)
print('\r{:~^30}'.format('got model'))
sess = tf.Session()
print('initialising', end='')
sess.run(tf.initialize_all_variables())
print('\r{:~^30}'.format('initialised'))
with sess.as_default():
    for i in range(100):
        print('~~~~~~~~~~Epoch {:>3}:~~~~~~~~~~'.format(i))

        data_iter = data.get_char_iter(sequence_length, batch_size, True)
        run_epoch(sess, 
                  in_var, 
                  target_var, 
                  initial_state, 
                  final_state, 
                  loss_op, 
                  data_iter, 
                  update_weights)
        print()
        print_sample(sess,
                     sample_in_var,
                     initial_state,
                     output_1,
                     final_state_1,
                     250,
                     vocab)

~~~~~~~~~~got model~~~~~~~~~~~
~~~~~~~~~initialised~~~~~~~~~~
~~~~~~~~~~Epoch   0:~~~~~~~~~~
(0.992) perplexity: 527.386 (12516 cps)
making a sample
h .a gl empmnrca  eeesapfesiaieeclrw'atndimo t teivte emnieecifeveampskt1l ,tn T,  cs
ne,acmamayeoam"ea ae  l
oslis yfeoedeianouoml   
  rtttaccc cn  os isnysen arhilp  ieehvmo inesitnoe cic rtpoo eiu rBp
ece-tse tvi,f reov ao y hneresx a ottesceb e 
~~~~~~~~~~Epoch   1:~~~~~~~~~~
(0.992) perplexity: 510.347 (12613 cps)
making a sample
 rnrh n o
wist5t
tiniroieeso ifidm rledt ps dii et t pefo
fae"tn itt.nyao ttaoso hceseenhh ecu sooinncso auo s-rashu.mn cm itof c hrstatlr
t ruuhec u poe, eat l w  ens oidtnfneoni tf esomN Paticpaorm ehhtulipetdnirsltrethct,hcaent 
coheeo onoerhoh  f
~~~~~~~~~~Epoch   2:~~~~~~~~~~
(0.992) perplexity: 509.747 (12701 cps)
making a sample
 csohertsit'e s dr osfmarnfn"e  r i!eeioacc Eefimys .b nnh i
h,sstdtuy 
ae
.eea on seat r td
l oos,vtAaeidmsc n) t.ss nvs jr sne t f-nnioi a seshhn ocoeiinel n  h icofsw    ep