In [1]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe

tfe.enable_eager_execution()

import numpy as np

  from ._conv import register_converters as _register_converters


In [6]:
with open('anna.txt', 'r') as f:
    text=f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

def get_batches(arr, batch_size, n_steps):
    '''Create a generator that returns batches of size
       batch_size x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the number of characters per batch and number of batches we can make
    chars_per_batch = batch_size * n_steps
    n_batches = len(arr)//chars_per_batch
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * chars_per_batch]
    
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y_temp = arr[:, n+1:n+n_steps+1]
        
        # For the very last batch, y will be one character short at the end of 
        # the sequences which breaks things. To get around this, I'll make an 
        # array of the appropriate size first, of all zeros, then add the targets.
        # This will introduce a small artifact in the last batch, but it won't matter.
        y = np.zeros(x.shape, dtype=x.dtype)
        y[:,:y_temp.shape[1]] = y_temp
        
        yield x, y

In [8]:
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5         # Dropout keep probability

epochs = 20
# Print losses every N interations
print_every_n = 50

In [9]:
batches = get_batches(encoded, batch_size, num_steps)

In [11]:
x, y = next(batches)

In [13]:
x.shape, y.shape

((100, 100), (100, 100))

In [26]:
def build_cell(lstm_size, keep_prob):
    # Use a basic LSTM cell
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    # Add dropout to the cell
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    return drop
cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)] )
initial_state = cell.zero_state(batch_size, tf.float32)

In [21]:
x_one_hot = tf.one_hot(x, len(vocab))

In [25]:
x_one_hot.shape

TensorShape([Dimension(100), Dimension(100), Dimension(83)])

In [28]:
outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=initial_state)

In [30]:
outputs.shape

TensorShape([Dimension(100), Dimension(100), Dimension(512)])

In [31]:
state

(LSTMStateTuple(c=<tf.Tensor: id=7161, shape=(100, 512), dtype=float32, numpy=
array([[-0.01368002,  0.00426607, -0.00336585, ...,  0.01164236,
        -0.00806775,  0.01354871],
       [ 0.00271294,  0.02735158, -0.02770866, ..., -0.01740349,
        -0.03233358,  0.01301287],
       [ 0.02291019,  0.02089741,  0.00133606, ...,  0.02235182,
        -0.01734058, -0.01720683],
       ...,
       [-0.01667213, -0.00839673,  0.02491918, ..., -0.02335074,
         0.00675694,  0.02741068],
       [ 0.02162038,  0.04120306,  0.00279209, ..., -0.00942402,
         0.01755924,  0.02493419],
       [ 0.00657174,  0.01228187, -0.01015169, ..., -0.01198205,
         0.01444579,  0.01733122]], dtype=float32)>, h=<tf.Tensor: id=7164, shape=(100, 512), dtype=float32, numpy=
array([[-0.00675584,  0.00209218, -0.00169965, ...,  0.00569445,
        -0.00413005,  0.00680441],
       [ 0.00137915,  0.01376002, -0.01358947, ..., -0.00874368,
        -0.01606269,  0.00663864],
       [ 0.01153642,  0.0106