In [216]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.models.rnn import rnn

In [338]:
class CharRNN(object):
  """Character RNN model."""
  
  def __init__(self, is_training, batch_size, num_unrollings, vocab_size, 
               hidden_size, max_grad_norm, embedding_size, num_layers):
    self.batch_size = batch_size
    self.num_unrollings = num_unrollings
    if not is_training:
        self.batch_size = 1
        self.num_unrollings = 1
    self.hidden_size = hidden_size
    self.vocab_size = vocab_size
    self.input_data = tf.placeholder(tf.int64, [self.batch_size, self.num_unrollings])
    self.targets = tf.placeholder(tf.int64, [self.batch_size, self.num_unrollings])

    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_size, input_size=embedding_size, forget_bias=0.0)
    cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
    self.initial_state = cell.zero_state(self.batch_size, tf.float32)
    with tf.device("/cpu:0"):
      embedding = tf.get_variable("embedding", [vocab_size, embedding_size])
      inputs = tf.nn.embedding_lookup(embedding, self.input_data)

    inputs = [tf.squeeze(input_, [1])
              for input_ in tf.split(1, self.num_unrollings, inputs)]
    outputs, state = rnn.rnn(cell, inputs, initial_state=self.initial_state)

    output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
    softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size])
    softmax_b = tf.get_variable("softmax_b", [vocab_size])
    logits = tf.matmul(output, softmax_w) + softmax_b
    targets = tf.reshape(tf.concat(1, self.targets), [-1])
    self.probs = tf.nn.softmax(logits)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, targets)
    
    self.mean_loss = mean_loss = tf.reduce_sum(loss) / self.batch_size
    self.final_state = state
    
    if is_training:
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(10.0, global_step, 5000, 0.1, staircase=True)
        # self.lr = tf.Variable(0.01, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(mean_loss, tvars),
                                          max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

In [220]:
class BatchGenerator(object):
    """Generate and hold batches."""
    def __init__(self, text, batch_size, n_unrollings, vocab_size):
        self._text = text
        self._text_size = len(text)
        self._batch_size = batch_size
        self.vocab_size = vocab_size
        self._n_unrollings = n_unrollings
        segment = self._text_size // batch_size
        self._cursor = [ offset * segment for offset in range(batch_size)]
        self._last_batch = self._next_batch()

    def _next_batch(self):
        """Generate a single batch from the current cursor position in the data."""
        batch = np.zeros(shape=(self._batch_size), dtype=np.float)
        for b in range(self._batch_size):
            batch[b] = char2id(self._text[self._cursor[b]])
              # print(batch)
            self._cursor[b] = (self._cursor[b] + 1) % self._text_size
        return batch

    def next(self):
        """Generate the next array of batches from the data. The array consists of
        the last batch of the previous array, followed by num_unrollings new ones.
        """
        batches = [self._last_batch]
        for step in range(self._n_unrollings):
            batches.append(self._next_batch())
        self._last_batch = batches[-1]
        return batches

In [None]:
def char2id(char):
    try:
        return vocab_index_dict[char]
    except KeyError:
        print('Unexpected char')
        return 0
  
def id2char(index):
    return index_vocab_dict[index]

In [180]:
# Utility functions
def batches2string(batches):
    """Convert a sequence of batches back into their (most likely) string
    representation."""
    s = [''] * batches[0].shape[0]
    for b in batches:
        s = [''.join(x) for x in zip(s, id2char_list(b))]
    return s
    
def id2char_list(lst):
    return [id2char(i) for i in lst]
    
def characters(probabilities):
    """Turn a 1-hot encoding or a probability distribution over the possible
    characters back into its (most likely) character representation."""
    return [id2char(c) for c in np.argmax(probabilities, 1)]

In [374]:
  """Runs the model on the given data."""
def run_epoch(session, m, data_size, batch_generator, is_train, verbose=False, sample_m=None):
    epoch_size = ((data_size // m.batch_size) - 1) // m.num_unrollings
    start_time = time.time()
    mean_losses = 0.0
    iters = 0
    state = m.initial_state.eval()
    if is_train:
        extra_op = m.train_op
    else:
        extra_op = tf.no_op()
    for step in range(epoch_size):
        data = batch_generator.next()
        x = np.array(data[1:]).transpose()
        y = np.array(data[:-1]).transpose()
        
        mean_loss, state, _ = session.run([m.mean_loss, m.final_state, extra_op],
                                          {m.input_data: x, 
                                           m.targets: y,
                                           m.initial_state: state})
        mean_losses += mean_loss
        iters += m.num_unrollings

        perpl = np.exp(mean_losses / iters)
        if verbose and step % (epoch_size // 10) == 10:
            # print(mean_losses)
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                  (step * 1.0 / epoch_size, perpl, 
                   iters * m.batch_size / (time.time() - start_time)))
            if sample_m is not None:
                print('\n')
                print('='*80)
                print('Generate a sequence starting from "a":')
                print(sample_seq(session, sample_m, 'a', 100))
                print('='*80)
                print('\n')
    return perpl # np.exp(mean_losses / iters)

In [399]:
with open("tiny_shakespeare.txt", 'r') as f:
    text = f.read()
print(text[100:200])
print(len(text))
text = text[:]

 are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you
1115394


In [400]:
# prepare data
train_size = int(0.8 * len(text))
valid_size = int(0.1 * len(text))
test_size = len(text) - train_size - valid_size
train_text = text[:train_size]
valid_text = text[train_size:train_size + valid_size]
test_text = text[train_size + valid_size:]

print(train_size, train_text[:64])
print(valid_size, valid_text[:64])
print(test_size, test_text[:64])

(892315, 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAl')
(111539, "you are,\nThat is, a woman; if you be more, you're none;\nIf you b")
(111540, '?\n\nGREMIO:\nGood morrow, neighbour Baptista.\n\nBAPTISTA:\nGood morr')


In [401]:
unique_chars = list(set(text))
vocab_size = len(unique_chars)
print('vocab size: %d' % vocab_size)
vocab_index_dict = {}
index_vocab_dict = {}

for i, char in enumerate(unique_chars):
    vocab_index_dict[char] = i
    index_vocab_dict[i] = char
# first_letter = ord(string.ascii_lowercase[0])

print(char2id('a'), char2id('z'), char2id(' '), char2id('ï'))
print(id2char(1), id2char(26), id2char(0))

vocab size: 65
Unexpected char
(39, 64, 2, 0)
('!', 'O', '\n')


In [402]:
batch_size = 32
n_unrollings = 10
train_batches = BatchGenerator(train_text, batch_size, n_unrollings, vocab_size)
eval_train_batches = BatchGenerator(train_text, 1, 1, vocab_size)
valid_batches = BatchGenerator(valid_text, 1, 1, vocab_size)
test_batches = BatchGenerator(test_text, 1, 1, vocab_size)

In [403]:
batches2string(train_batches.next())

['First Citiz',
 'Able to bea',
 ' direct way',
 'ANUS:\nLet g',
 ';\nYet, Marc',
 'alack, or w',
 'eful as the',
 ' wake till\n',
 'of your yea',
 'of those fe',
 'ach thee ho',
 ',\nAnd flaky',
 'be found fa',
 'set footing',
 'at from thi',
 'rn, good so',
 'E:\nThou vil',
 ' must love ',
 'o Friar Lau',
 'es do beat\n',
 'f I may tru',
 'll not yiel',
 '\nAnd harmfu',
 'CESTER:\n\nCL',
 '\nWhat, will',
 'ak them fai',
 'h-plight: s',
 'o some foul',
 'server, a b',
 'one of this',
 'undiscovere',
 'od sir, adi']

In [404]:
batches2string(valid_batches.next())

['yo']

In [405]:
params = {'batch_size': batch_size, 'num_unrollings': n_unrollings, 'vocab_size': vocab_size, 
          'hidden_size': 64, 'max_grad_norm': 1.25, 'embedding_size': 50, 
          'num_layers': 1}

In [406]:
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    with tf.variable_scope('char_rnn') as scope:
        train = CharRNN(is_training=True, **params)
        tf.get_variable_scope().reuse_variables()
        valid = CharRNN(is_training=False, **params)
        test = CharRNN(is_training=False, **params)

In [407]:
def sample_seq(session, m, start_char, length, is_argmax=True):
    state = m.initial_state.eval()
    seq = [start_char]
    x = np.array([[char2id(start_char)]])
    for i in range(length):
        probs = session.run(m.probs,
                            {m.input_data: x, 
                             m.initial_state: state})
        if is_argmax:
            sample = np.argmax(probs[0])
        else:
            sample = np.random.choice(m.vocab_size, 1, p=probs[0])[0]
        seq.append(id2char(sample))
        x = np.array([[sample]])
    return ''.join(seq)

In [408]:
n_epochs = 10
# summary_frequency = 100

with tf.Session(graph=graph) as sess:
    tf.initialize_all_variables().run()
    for i in range(n_epochs):
        print('training')
        run_epoch(sess, train, train_size, train_batches, is_train=True, verbose=True) #, sample_m=valid)
        print('validation')
        run_epoch(sess, valid, valid_size, valid_batches, is_train=False, verbose=True)#, sample_m=valid)
        for i in range(vocab_size):
            print('\n')
            print('='*80)
            print(sample_seq(sess, valid, id2char(i), 100, is_argmax=True))
            print('='*80)
            print('\n')
    print('test')
    run_epoch(sess, test, test_size, test_batches, is_train=False, verbose=True) #, sample_m=valid)

training
0.004 perplexity: 83.226 speed: 12690 wps
0.103 perplexity: 2.096 speed: 15993 wps
0.203 perplexity: 1.648 speed: 15109 wps
0.303 perplexity: 1.519 speed: 14199 wps
0.402 perplexity: 1.457 speed: 13907 wps
0.502 perplexity: 1.420 speed: 13917 wps
0.602 perplexity: 1.400 speed: 13987 wps
0.702 perplexity: 1.363 speed: 14079 wps
0.801 perplexity: 1.343 speed: 14145 wps
0.901 perplexity: 1.333 speed: 14245 wps
validation
0.000 perplexity: 7.544 speed: 313 wps
0.100 perplexity: 1.131 speed: 1625 wps
0.200 perplexity: 1.165 speed: 1629 wps
0.300 perplexity: 1.169 speed: 1630 wps
0.400 perplexity: 1.167 speed: 1630 wps
0.500 perplexity: 1.155 speed: 1629 wps
0.600 perplexity: 1.153 speed: 1629 wps
0.700 perplexity: 1.149 speed: 1630 wps
0.800 perplexity: 1.152 speed: 1630 wps
0.900 perplexity: 1.162 speed: 1630 wps












































































































!nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn

KeyboardInterrupt: 

In [200]:
a = np.array([[1],[2],[3]])

In [201]:
np.array([a, a, a])

array([[[1],
        [2],
        [3]],

       [[1],
        [2],
        [3]],

       [[1],
        [2],
        [3]]])

In [234]:
np.random.random_integers(0,1)

1

In [246]:
np.random.choice(5, 1, p=[0.2, 0.4, 0.4, 0.0, 0.0])

array([2])

In [None]:
np.random.randint()