In [1]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense,Dropout,LSTM
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# load ascii text and covert to lowercase
filename = "/home/akshay/Desktop/Jupyter notebook/LSTM Text Generation/wonderland"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [3]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [4]:
print (char_to_int)

{'\n': 0, ' ': 1, '!': 2, '#': 3, '$': 4, '%': 5, '(': 6, ')': 7, '*': 8, ',': 9, '-': 10, '.': 11, '/': 12, '0': 13, '1': 14, '2': 15, '3': 16, '4': 17, '5': 18, '6': 19, '7': 20, '8': 21, '9': 22, ':': 23, ';': 24, '?': 25, '@': 26, '[': 27, ']': 28, '_': 29, 'a': 30, 'b': 31, 'c': 32, 'd': 33, 'e': 34, 'f': 35, 'g': 36, 'h': 37, 'i': 38, 'j': 39, 'k': 40, 'l': 41, 'm': 42, 'n': 43, 'o': 44, 'p': 45, 'q': 46, 'r': 47, 's': 48, 't': 49, 'u': 50, 'v': 51, 'w': 52, 'x': 53, 'y': 54, 'z': 55, '‘': 56, '’': 57, '“': 58, '”': 59}


In [5]:
print (int_to_char)

{0: '\n', 1: ' ', 2: '!', 3: '#', 4: '$', 5: '%', 6: '(', 7: ')', 8: '*', 9: ',', 10: '-', 11: '.', 12: '/', 13: '0', 14: '1', 15: '2', 16: '3', 17: '4', 18: '5', 19: '6', 20: '7', 21: '8', 22: '9', 23: ':', 24: ';', 25: '?', 26: '@', 27: '[', 28: ']', 29: '_', 30: 'a', 31: 'b', 32: 'c', 33: 'd', 34: 'e', 35: 'f', 36: 'g', 37: 'h', 38: 'i', 39: 'j', 40: 'k', 41: 'l', 42: 'm', 43: 'n', 44: 'o', 45: 'p', 46: 'q', 47: 'r', 48: 's', 49: 't', 50: 'u', 51: 'v', 52: 'w', 53: 'x', 54: 'y', 55: 'z', 56: '‘', 57: '’', 58: '“', 59: '”'}


In [6]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  163816
Total Vocab:  60


In [7]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars-seq_length):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i+seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
n_patterns= len(dataX)
print(n_patterns)

163716


In [8]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
print(X.shape)

(163716, 100, 1)


In [9]:
# normalize
X = X / float(n_vocab)

In [10]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [11]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 256)               264192    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 60)                15420     
Total params: 279,612
Trainable params: 279,612
Non-trainable params: 0
_________________________________________________________________


In [13]:
# load the network weights
#these weight are generated using single layer
filename = "weights-improvement-20-2.1934.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [14]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
#print(dataX[start])
print (''.join([int_to_char[value] for value in pattern]))

Seed:
e all mad here. i’m mad.
you’re mad.’

‘how do you know i’m mad?’ said alice.

‘you must be,’ said t


In [15]:
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

he daterpillar.

‘ie course ’hu soot if the was i taid then i sant to the toie, 
‘he you dan no woil ii a cat,’ said the daterpillar.

‘ie course ’hu soot if the was i taid then i sant to the woie  and the woils sa then io the wai io the wai an iere  the had neter her here to beri to tee thet  the was aolig to the tabted and the while was io the wab aut oo th the toeee, and the thi gort of the sooee oh the care th the whit th the wooe  ‘he you den toe to be a date io toe oo the toie ’fu ’ou do wote ’hu  io you do woe oo tooe to teee ’hut thee ’huh the war  she mare thin the wai ano oo the woile  and the woile ta then io the was aow toee in the wai an iere  and the dooko so the corre sh the cade an the cane and toen  the was aolng on the was ao the cadd  an the dade oo the tabte, and the toieg th the caden if the care and the woiee th the thbte was a long tuiee the was aow oo the tai anone the white tabbit  and the wai autt on the tab int the was aow aali to the table, and the thi gort 