In [1]:
# Load Larger LSTM network and generate text
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
# load ascii text and covert to lowercase
filename = "d:/machine learning/wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)


Using TensorFlow backend.


Total Characters:  143550
Total Vocab:  42


In [2]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print( "Total Patterns: ", n_patterns)


Total Patterns:  143450


In [3]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [5]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
#model.add(LSTM(256))
#model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
# load the network weights
filename = "d:/machine learning/weights-improvement-02-2.6854.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')


W0730 08:05:29.860978 15764 deprecation_wrapper.py:119] From C:\Users\Tony Diana\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0730 08:05:30.109389 15764 deprecation_wrapper.py:119] From C:\Users\Tony Diana\anaconda3\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [7]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")


Seed:
" frame, or something of the sort.

next came an angry voice--the rabbit's--'pat! pat! where are you?' "


In [13]:
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	#index = numpy.argmax(prediction)
	#result = int_to_char[value]
	#seq_in = [int_to_char[value] for value in pattern]
	#sys.stdout.write(result)
	#pattern.append(index)
	#pattern = pattern[1:len(pattern)]
print(prediction)

[[[1.31128095e-02 3.12280916e-02 3.00940406e-02 ... 1.90802068e-02
   2.24800818e-02 1.96601376e-02]
  [1.72385685e-02 9.80789065e-02 3.67218181e-02 ... 4.47185151e-03
   2.49972958e-02 1.12032574e-02]
  [2.77334396e-02 1.88526228e-01 1.67463124e-02 ... 7.06118240e-04
   2.88389251e-02 3.44211841e-03]
  ...
  [1.66888945e-02 3.63601327e-01 3.89477820e-03 ... 1.17283089e-04
   2.70461943e-02 1.05317379e-03]
  [1.46762645e-02 1.48606196e-01 2.23134086e-03 ... 2.83287925e-04
   8.84336513e-03 1.05503155e-03]
  [9.83386412e-02 1.20523885e-01 8.58272891e-04 ... 1.27915887e-03
   1.38617000e-02 2.60110566e-04]]]


In [14]:
print(str(prediction))

[[[1.31128095e-02 3.12280916e-02 3.00940406e-02 ... 1.90802068e-02
   2.24800818e-02 1.96601376e-02]
  [1.72385685e-02 9.80789065e-02 3.67218181e-02 ... 4.47185151e-03
   2.49972958e-02 1.12032574e-02]
  [2.77334396e-02 1.88526228e-01 1.67463124e-02 ... 7.06118240e-04
   2.88389251e-02 3.44211841e-03]
  ...
  [1.66888945e-02 3.63601327e-01 3.89477820e-03 ... 1.17283089e-04
   2.70461943e-02 1.05317379e-03]
  [1.46762645e-02 1.48606196e-01 2.23134086e-03 ... 2.83287925e-04
   8.84336513e-03 1.05503155e-03]
  [9.83386412e-02 1.20523885e-01 8.58272891e-04 ... 1.27915887e-03
   1.38617000e-02 2.60110566e-04]]]
