In [1]:
import sys 
import numpy as np
from keras.models import Sequential
from keras.layers import Dense,Dropout,LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
file_name='data.txt'
raw_txt=open(file_name,'r',encoding='utf-8').read()
raw_txt=raw_txt.lower()

In [4]:
chars=sorted(list(set(raw_txt)))
chars_to_int=dict((c,i) for i,c in enumerate(chars))
int_to_chars=dict((i,c) for i,c in enumerate(chars))

In [5]:
n_chars=len(raw_txt)
n_vocab=len(chars)

print('Total characters : {}'.format(n_chars))
print('Total Vocab : {}'.format(n_vocab))

Total characters : 163817
Total Vocab : 61


In [7]:
seq_length=100
dataX=[]
dataY=[]
for i in range(0,n_chars-seq_length,1):
    seq_in=raw_txt[i:i+seq_length]
    seq_out=raw_txt[i+seq_length]
    dataX.append([chars_to_int[char] for char in seq_in])
    dataY.append(chars_to_int[seq_out])
n_pattern=len(dataX)
print('Total Patterns : {}'.format(n_pattern))

Total Patterns : 163717


In [14]:
X = np.reshape(dataX, (n_pattern, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)

In [18]:
print('shape of X : {}'.format(X.shape))
print('shape of y : {}'.format(y.shape))

shape of X : (163717, 100, 1)
shape of y : (163717, 60)


In [19]:
model=Sequential()
model.add(LSTM(256,input_shape=(X.shape[1],X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1],activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 256)               264192    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 60)                15420     
Total params: 279,612
Trainable params: 279,612
Non-trainable params: 0
_________________________________________________________________
None


In [20]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [21]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20

Epoch 00001: loss improved from inf to 3.00834, saving model to weights-improvement-01-3.0083.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.00834 to 2.82683, saving model to weights-improvement-02-2.8268.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.82683 to 2.73214, saving model to weights-improvement-03-2.7321.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.73214 to 2.66948, saving model to weights-improvement-04-2.6695.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.66948 to 2.60691, saving model to weights-improvement-05-2.6069.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.60691 to 2.54483, saving model to weights-improvement-06-2.5448.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.54483 to 2.49033, saving model to weights-improvement-07-2.4903.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.49033 to 2.44013, saving model to weights-improvement-08-2.4401.hdf5
Epoch 9/20

Epoch 00009: loss improv

<keras.callbacks.History at 0x247e9e542e8>

In [22]:
filename = "weights-improvement-20-2.0397.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [31]:
start=np.random.randint(0,len(dataX)-1)
pattern=dataX[start]
print('Seed :')
print("\"", ''.join([int_to_chars[value] for value in pattern]), "\"")

Seed :
" under this paragraph to the
     project gutenberg literary archive foundation.  royalty payments
   "


In [32]:
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_chars[index]
    seq_in = [int_to_chars[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

   eedertonc to dnd the perseo of the project gutenberg-tm electronic works on protede the parsen of the project gutenberg-tm electronic works on poo cno cisiri the parss of this agreement to the project gutenberg-tm electronic works on poo cno cisiri th the porject gutenberg-tm electronic works on poo in any piree trohect gutenberg-tm electronic works on poo cno cisiri th the prrject gutenberg-tm electronic works on poo in any pir crolai in the porject gutenberg-tm electronic works on poo in any pir crola ti the porject gutenberg-tm electronic works on poo in aly pir crol an the work on the project gutenberg-tm electronic works on poo in aly pir crol an the work on the project gutenberg-tm electronic works on poo in aly pir crol an the work on the project gutenberg-tm electronic works on poo in aly pir crol an the work on the project gutenberg-tm electronic works on poo in aly pir crol an the work on the project gutenberg-tm electronic works on poo in aly pir crol an the work on the p