In [0]:
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf

In [0]:
with open('anna.txt') as f:
    text = f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [4]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [5]:
encoded[:100]

array([31, 64, 57, 72, 76, 61, 74,  1, 16,  0,  0,  0, 36, 57, 72, 72, 81,
        1, 62, 57, 69, 65, 68, 65, 61, 75,  1, 57, 74, 61,  1, 57, 68, 68,
        1, 57, 68, 65, 67, 61, 26,  1, 61, 78, 61, 74, 81,  1, 77, 70, 64,
       57, 72, 72, 81,  1, 62, 57, 69, 65, 68, 81,  1, 65, 75,  1, 77, 70,
       64, 57, 72, 72, 81,  1, 65, 70,  1, 65, 76, 75,  1, 71, 79, 70,  0,
       79, 57, 81, 13,  0,  0, 33, 78, 61, 74, 81, 76, 64, 65, 70],
      dtype=int32)

In [0]:
def get_batches(arr, batch_size, n_steps):
  char_per_batch = batch_size*n_steps
  num_batches = len(arr)//char_per_batch

  arr = arr[:num_batches*char_per_batch]
  arr = arr.reshape((batch_size, -1))

  for i in range(0, arr.shape[1], n_steps):
    x = arr[:, i:i+n_steps]
    y_temp = arr[:, i+1:i+n_steps+1]

    y = np.zeros(x.shape, dtype=x.dtype)
    y[:, :y_temp.shape[1]] = y_temp

    yield x, y

In [7]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
print('x\n', x[:, :10])
print('y\n', y[:, :10])

x
 [[31 64 57 72 76 61 74  1 16  0]
 [ 1 57 69  1 70 71 76  1 63 71]
 [78 65 70 13  0  0  3 53 61 75]
 [70  1 60 77 74 65 70 63  1 64]
 [ 1 65 76  1 65 75 11  1 75 65]
 [ 1 37 76  1 79 57 75  0 71 70]
 [64 61 70  1 59 71 69 61  1 62]
 [26  1 58 77 76  1 70 71 79  1]
 [76  1 65 75 70  7 76 13  1 48]
 [ 1 75 57 65 60  1 76 71  1 64]]
y
 [[64 57 72 76 61 74  1 16  0  0]
 [57 69  1 70 71 76  1 63 71 65]
 [65 70 13  0  0  3 53 61 75 11]
 [ 1 60 77 74 65 70 63  1 64 65]
 [65 76  1 65 75 11  1 75 65 74]
 [37 76  1 79 57 75  0 71 70 68]
 [61 70  1 59 71 69 61  1 62 71]
 [ 1 58 77 76  1 70 71 79  1 75]
 [ 1 65 75 70  7 76 13  1 48 64]
 [75 57 65 60  1 76 71  1 64 61]]


In [0]:
def build_inputs(batch_size, n_steps):
  inputs = tf.placeholder(tf.int32, [batch_size, n_steps], 'inputs')
  targets = tf.placeholder(tf.int32, [batch_size, n_steps], 'targets')

  keep_prob = tf.placeholder(tf.float32, name='keep_prob')

  return inputs, targets, keep_prob


In [0]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):

  def build_cell(lstm_size, keep_prob):
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

    return drop
  
  cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
  initial_state = cell.zero_state(batch_size, tf.float32)

  return cell, initial_state


In [0]:
def build_output(lstm_output, in_size, out_size):
  seq_output = tf.concat(lstm_output, axis=1)
  x = tf.reshape(seq_output, [-1, in_size])

  with tf.variable_scope('softmax'):
    softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
    softmax_b = tf.Variable(tf.zeros(out_size))

  logits = tf.matmul(x, softmax_w) + softmax_b

  out = tf.nn.softmax(logits, name='predictions')

  return out, logits

In [0]:
def build_loss(logits, targets, lstm_size, num_classes):

  y_one_hot = tf.one_hot(targets, num_classes)
  y_reshaped = tf.reshape(y_one_hot, logits.shape)

  loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
  loss = tf.reduce_mean(loss)

  return loss

In [0]:
#Clipping Gradients to avoid exploding

def build_optimizer(loss, learning_rate, grad_clip):
  tvars = tf.trainable_variables()
  grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
  train_op = tf.train.AdamOptimizer(learning_rate)
  optimizer = train_op.apply_gradients(zip(grads, tvars))

  return optimizer

In [0]:
class charRNN:

  def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128,
               num_layers=2, learning_rate=0.001, grad_clip=5, sampling=False):
    if sampling == True:
      batch_size, num_steps = 1, 1
    else:
      batch_size, num_steps = batch_size, num_steps

    tf.reset_default_graph()

    self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
    cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

    x_one_hot = tf.one_hot(self.inputs, num_classes)

    outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
    self.final_state = state 
    self.predication, self.logits = build_output(outputs,lstm_size, num_classes)

    self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
    self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)


In [0]:
#HYPERPARAMETERS

batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5

In [27]:
epochs = 20
print_every = 50

save_every = 200

model = charRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())

  c = 0
  for  e in range(epochs):
    new_state = sess.run(model.initial_state)
    loss = 0
    for x, y in get_batches(encoded, batch_size, num_steps):
      c += 1
      start = time.time()
      feed = {model.inputs: x,
              model.targets: y,
              model.keep_prob: keep_prob,
              model.initial_state: new_state}
      batch_loss, new_state, _ = sess.run([model.loss,
                                           model.final_state,
                                           model.optimizer], feed_dict=feed)
      if c%print_every == 0:
        end = time.time()
        print('Epoch: {}/{}...'.format(e+1, epochs),
              'Trainint Step: {}...'.format(c),
               'Training loss: {:.4f}...'.format(batch_loss)
               ,'{:.4f} sec/batch'.format(end-start))
  saver.save(sess, 'checkpoints/i{}_l{}.ckpt'.format(c, lstm_size))

KeyboardInterrupt: ignored

In [0]:
epochs = 20
# Print losses every N interations
print_every_n = 50

# Save every N iterations
save_every_n = 200

model = charRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            if (counter % print_every_n == 0):
                end = time.time()
                print('Epoch: {}/{}... '.format(e+1, epochs),
                      'Training Step: {}... '.format(counter),
                      'Training loss: {:.4f}... '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/20...  Training Step: 50...  Training loss: 3.1551...  7.3041 sec/batch
Epoch: 1/20...  Training Step: 100...  Training loss: 3.0798...  7.3562 sec/batch
Epoch: 1/20...  Training Step: 150...  Training loss: 2.7703...  7.5919 sec/batch
Epoch: 2/20...  Training Step: 200...  Training loss: 2.4216...  6.9708 sec/batch
Epoch: 2/20...  Training Step: 250...  Training loss: 2.3422...  7.5976 sec/batch
Epoch: 2/20...  Training Step: 300...  Training loss: 2.2095...  7.1731 sec/batch
Epoch: 2/20...  Training Step: 350...  Training loss: 2.1548...  7.5330 sec/batch
Epoch: 3/20...  Training Step: 400...  Training loss: 2.0366...  7.1146 sec/batch
Epoch: 3/20...  Training Step: 450...  Training loss: 1.9693...  6.9560 sec/batch
Epoch: 3/20...  Training Step: 500...  Training loss: 1.9009...  7.2840 sec/batch
Epoch: 3/20...  Training Step: 550...  Training loss: 1.8764...  7.3871 sec/batch
Epoch: 4/20...  Training Step: 600...  Training loss: 1.7707...  7.8900 sec/batch
Epoch: 4/20...  T