In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
path = tf.keras.utils.get_file('shakespeare.txt',
                               'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
text = open(path).read()

# The length of text is the number of characters in it
print('Length of the text: {} characters'.format(len(text)))

Length of the text: 1115394 characters


In [3]:
# Take a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [4]:
vocab = sorted(set(text))
print(len(vocab))

65


In [5]:
char2idx = {c: i for i, c in enumerate(vocab)}
print(char2idx)

{'\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, "'": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}


In [6]:
idx2char = {i: c for i, c in enumerate(vocab)}
print(idx2char)

{0: '\n', 1: ' ', 2: '!', 3: '$', 4: '&', 5: "'", 6: ',', 7: '-', 8: '.', 9: '3', 10: ':', 11: ';', 12: '?', 13: 'A', 14: 'B', 15: 'C', 16: 'D', 17: 'E', 18: 'F', 19: 'G', 20: 'H', 21: 'I', 22: 'J', 23: 'K', 24: 'L', 25: 'M', 26: 'N', 27: 'O', 28: 'P', 29: 'Q', 30: 'R', 31: 'S', 32: 'T', 33: 'U', 34: 'V', 35: 'W', 36: 'X', 37: 'Y', 38: 'Z', 39: 'a', 40: 'b', 41: 'c', 42: 'd', 43: 'e', 44: 'f', 45: 'g', 46: 'h', 47: 'i', 48: 'j', 49: 'k', 50: 'l', 51: 'm', 52: 'n', 53: 'o', 54: 'p', 55: 'q', 56: 'r', 57: 's', 58: 't', 59: 'u', 60: 'v', 61: 'w', 62: 'x', 63: 'y', 64: 'z'}


In [7]:
text_as_int = np.array([char2idx[c] for c in text]).astype(np.uint8)
print(text_as_int)

[18 47 56 ... 45  8  0]


In [8]:
# Show how the first 13 characters from the text mapped to integers
print('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

'First Citizen' ---- characters mapped to int ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [9]:
BATCH_SIZE = 64
SEQ_LENGTH = 30
data_len = text_as_int.shape[0]

n_batches = (data_len - 1) // (BATCH_SIZE * SEQ_LENGTH)
rounded_len = n_batches * BATCH_SIZE * SEQ_LENGTH

X_train = np.reshape(text_as_int[:rounded_len], [BATCH_SIZE, n_batches * SEQ_LENGTH])
y_train = np.reshape(text_as_int[1:rounded_len + 1], [BATCH_SIZE, n_batches * SEQ_LENGTH])

In [10]:
print(X_train.shape)
print(X_train)

(64, 17400)
[[18 47 56 ... 57 39 61]
 [ 1 46 47 ... 39 56 52]
 [ 1 46 53 ... 42  6  1]
 ...
 [60 47 50 ... 46 63  1]
 [57 53 60 ... 53 59  1]
 [56 43 54 ... 58  0 52]]


In [11]:
print(y_train.shape)
print(y_train)

(64, 17400)
[[47 56 57 ... 39 61  1]
 [46 47 51 ... 56 52  1]
 [46 53 61 ...  6  1 42]
 ...
 [47 50 43 ... 63  1 57]
 [53 60 43 ... 59  1 56]
 [43 54 53 ...  0 52 53]]


In [12]:
print('X:', X_train[0, :15])
print('y:', y_train[0, :15])

X: [18 47 56 57 58  1 15 47 58 47 64 43 52 10  0]
y: [47 56 57 58  1 15 47 58 47 64 43 52 10  0 14]


In [13]:
alpha_size = len(vocab)
N_LAYERS = 2
INTERNAL_SIZE = 128

# Input and output
X = tf.placeholder(tf.float32, [None, None, alpha_size], name='X')     # [BATCH_SIZE, SEQ_LENGTH, alpha_size]
Y_ = tf.placeholder(tf.float32, [None, None, alpha_size], name='Y_')   # [BATCH_SIZE, SEQ_LENGTH, alpha_size]

# 3 stacked GRU layers
cells = [tf.nn.rnn_cell.GRUCell(INTERNAL_SIZE) for _ in range(N_LAYERS)]
multi_cell = tf.nn.rnn_cell.MultiRNNCell(cells)

zero_state = multi_cell.zero_state(BATCH_SIZE, tf.float32)
rnn_out, H_out = tf.nn.dynamic_rnn(multi_cell, X, initial_state=zero_state)   # [BATCH_SIZE, SEQ_LENGTH, INTERNAL_SIZE]

# Hack: reshape unrolled RNN output to be more 'single' ones so the linear layer can share weights and biases easily
rnn_out_flat = tf.reshape(rnn_out, [-1, INTERNAL_SIZE])   # [BATCH_SIZE * SEQ_LENGTH, INTERNAL_SIZE]

# Linear output layer for logits + Softmax activation
with tf.name_scope('linear'):
    W_linear = tf.Variable(tf.truncated_normal([INTERNAL_SIZE, alpha_size], stddev=0.1), name='W')
    B_linear = tf.Variable(tf.constant(0.1, shape=[alpha_size]), name='B')
    
    Y_logits_flat = tf.matmul(rnn_out_flat, W_linear) + B_linear
    
Y_probs_flat = tf.nn.softmax(Y_logits_flat)
Y_probs = tf.reshape(Y_probs_flat, [BATCH_SIZE, SEQ_LENGTH, alpha_size])

tf.summary.histogram('W_linear', W_linear)
tf.summary.histogram('B_linear', B_linear)

# Loss (for training)
Y_flat_ = tf.reshape(Y_, [-1, alpha_size])

with tf.name_scope('cross_entropy'):
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=Y_logits_flat, labels=Y_flat_))
    tf.summary.scalar('cross_entropy', cross_entropy)

with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(0.003).minimize(cross_entropy)

In [14]:
def int2vec(i):
    v = np.zeros(alpha_size)
    v[i] = 1
    return v

def intstr2vecstr(i_str):
    return [int2vec(i) for i in i_str]

def vec2int(v):
    return np.argmax(v)

def vecstr2intstr(v_str):
    return [vec2int(v) for v in v_str]

In [15]:
import time

init = tf.global_variables_initializer()

merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('summary/rnn/test/')

sess = tf.Session()

sess.run(init)
writer.add_graph(sess.graph)

# Initial zero input state (a tuple)
state_in = sess.run(zero_state)

N_EPOCHS = 3

for epoch in range(N_EPOCHS):
    start = time.time()
    
    losses = []
    
    for batch in range(n_batches):
        X_batch = X_train[:, batch * SEQ_LENGTH:(batch + 1) * SEQ_LENGTH]
        y_batch = y_train[:, batch * SEQ_LENGTH:(batch + 1) * SEQ_LENGTH]
        
        X_batch = np.roll(X_batch, -epoch, axis=0)
        y_batch = np.roll(y_batch, -epoch, axis=0)
        
        X_batch = np.array([intstr2vecstr(seq) for seq in X_batch]).astype(np.float32)
        y_batch = np.array([intstr2vecstr(seq) for seq in y_batch]).astype(np.float32)
                
        feed_dict = {X: X_batch, Y_: y_batch}
        for i, v in enumerate(zero_state):
            feed_dict[v] = state_in[i]
        
        _, state_out = sess.run([train_step, H_out], feed_dict=feed_dict)
        
        s = sess.run(merged_summary, feed_dict=feed_dict)
        writer.add_summary(s, epoch * n_batches + batch)
        
        loss = sess.run(cross_entropy, feed_dict=feed_dict)
        losses.append(loss)
        
        state_in = state_out
    
    end = time.time()
    
    print('Epoch {:02d} -> Loss: {:.4f}, Time: {:.2f}s'.format(epoch + 1, np.array(losses).mean(), end - start))

Epoch 01 -> Loss: 2.0652, Time: 63.142629861831665
Epoch 02 -> Loss: 1.5571, Time: 59.52809381484985
Epoch 03 -> Loss: 1.4493, Time: 60.724679946899414


In [28]:
generated_string = 'L'
c = np.array([[int2vec(char2idx[generated_string])]]).astype(np.float32)

# Initial zero input state (a tuple)
state_in = sess.run(zero_state)

for step in range(1000):
    feed_dict = {X: c}
    for i, v in enumerate(zero_state):
        feed_dict[v] = state_in[i]
    
    c, state_out = sess.run([Y_probs, H_out], feed_dict=feed_dict)
    print(c)
    
    generated = idx2char[vec2int(c[0, 0])]
    generated_string.join[generated]

print(generated_string)

InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [1,65] vs. shape[1] = [64,128]
	 [[Node: rnn/while/rnn/multi_rnn_cell/cell_0/gru_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Identity_3, rnn/while/rnn/multi_rnn_cell/cell_0/gru_cell/concat/axis)]]

Caused by op 'rnn/while/rnn/multi_rnn_cell/cell_0/gru_cell/concat', defined at:
  File "/Users/jankaiser/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/jankaiser/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Users/jankaiser/anaconda3/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/Users/jankaiser/anaconda3/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/Users/jankaiser/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-759a2f77dfa5>", line 14, in <module>
    rnn_out, H_out = tf.nn.dynamic_rnn(multi_cell, X, initial_state=zero_state)   # [BATCH_SIZE, SEQ_LENGTH, INTERNAL_SIZE]
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 631, in dynamic_rnn
    dtype=dtype)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 828, in _dynamic_rnn_loop
    swap_memory=swap_memory)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3232, in while_loop
    return_same_structure)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2952, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2887, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3201, in <lambda>
    body = lambda i, lv: (i + 1, orig_body(*lv))
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 799, in _time_step
    (output, new_state) = call_cell()
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 785, in <lambda>
    call_cell = lambda: cell(input_t, state)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 222, in __call__
    return super(RNNCell, self).__call__(inputs, state)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 362, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 736, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1315, in call
    cur_inp, new_state = cell(cur_inp, cur_state)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 329, in __call__
    *args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 362, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 736, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 478, in call
    array_ops.concat([inputs, state], 1), self._gate_kernel)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1114, in concat
    return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1030, in concat_v2
    "ConcatV2", values=values, axis=axis, name=name)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/Users/jankaiser/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [1,65] vs. shape[1] = [64,128]
	 [[Node: rnn/while/rnn/multi_rnn_cell/cell_0/gru_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Identity_3, rnn/while/rnn/multi_rnn_cell/cell_0/gru_cell/concat/axis)]]
