In [2]:
import keras
import time
import numpy as np

In [3]:
# load the text
with open('anna.txt', 'r') as f:
    text=f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [6]:
def get_batches(arr, batch_size, n_steps):
    '''Create a generator that returns batches of size
       batch_size x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the number of characters per batch and number of batches we can make
    chars_per_batch = batch_size * n_steps
    n_batches = len(arr)//chars_per_batch
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * chars_per_batch]
    
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y_temp = arr[:, n+1:n+n_steps+1]
        
        # For the very last batch, y will be one character short at the end of 
        # the sequences which breaks things. To get around this, I'll make an 
        # array of the appropriate size first, of all zeros, then add the targets.
        # This will introduce a small artifact in the last batch, but it won't matter.
        y = np.zeros(x.shape, dtype=x.dtype)
        y[:,:y_temp.shape[1]] = y_temp
        
        yield x, y

In [22]:
# Build the model
vocab_size = len(vocab)
seq_len = 50
batch_size = 64
embed_size = 512

from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding, CuDNNLSTM

model = Sequential()
model.add(Embedding(vocab_size, 512, batch_input_shape=(batch_size, seq_len)))
model.add(CuDNNLSTM(256, return_sequences=True, stateful=True))
model.add(Dropout(0.5))
model.add(TimeDistributed(Dense(vocab_size)))
model.add(Dense(seq_len, activation='softmax'))

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (64, 50, 512)             42496     
_________________________________________________________________
cu_dnnlstm_2 (CuDNNLSTM)     (64, 50, 256)             788480    
_________________________________________________________________
dropout_2 (Dropout)          (64, 50, 256)             0         
_________________________________________________________________
time_distributed_2 (TimeDist (64, 50, 83)              21331     
_________________________________________________________________
dense_3 (Dense)              (64, 50, 50)              4200      
Total params: 856,507
Trainable params: 856,507
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [25]:
for i, (X, Y) in enumerate(get_batches(encoded, batch_size, seq_len)):
    loss, acc = model.train_on_batch(X, Y)

ValueError: Error when checking target: expected dense_3 to have 3 dimensions, but got array with shape (64, 50)

In [20]:
X.shape

(64, 50)

In [21]:
Y.shape

(64, 50)