In [19]:
import numpy
import sys
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
filename = "/content/drive/MyDrive/Colab Notebooks/TUKL summer internship/week1/pg11.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [4]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [6]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  163781
Total Vocab:  59


In [8]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  163681


In [9]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [10]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [12]:
 #define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [13]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 2.98374, saving model to weights-improvement-01-2.9837.hdf5
Epoch 2/20

Epoch 00002: loss improved from 2.98374 to 2.80501, saving model to weights-improvement-02-2.8050.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.80501 to 2.71232, saving model to weights-improvement-03-2.7123.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.71232 to 2.64213, saving model to weights-improvement-04-2.6421.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.64213 to 2.58932, saving model to weights-improvement-05-2.5893.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.58932 to 2.53660, saving model to weights-improvement-06-2.5366.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.53660 to 2.48912, saving model to weights-improvement-07-2.4891.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.48912 to 2.44576, saving model to weights-improvement-08-2.4458.hdf5
Epoch 9/20

Epoch 00009: loss improved from 2.44576 to 2.40713, saving model to weig

<keras.callbacks.History at 0x7fba70440950>

In [15]:
# load the network weights
filename = "/content/weights-improvement-20-2.0720.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [16]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [20]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" 
here the queen put on her spectacles, and began staring at the hatter,
who turned pale and fidgeted "
 an the could, 
and the whrt hn the care an the could so the thate whs oo whth the sooe. and the whit hn sae soted an anl, and she wordd har ine toiee an the could, and saed to the kure and the was oot in tie toiee  and the whrt ho the moote so the toies oo the toils. 

the word  she pooe turtle harden an the could, and saed to the kure and the wordd hard oe the crrre of the tar oo the toiee  and toe teit to tee the gorse if thetee an the corse. 

the word  she pooe turtle harden an the cade  and the whrt hn the care thth a lore fno ano an the cadl, and the white was a lintle toiee of the car, and then the woide of the carc th tene the car  she woile toine oo the toils oo the toids. 

''bhdpee tiit il whe horse sf the tae--                               *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    *    

In [21]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [23]:
model.fit(X, y, epochs=50, batch_size=64)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fba29527a10>

In [24]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [25]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" ),
and sometimes she scolded herself so severely as to bring tears into
her eyes; and once she remem "
bered herself up to the door and see that had gallen into a little bertat one of the lort of the shope. 
'it was a very curious beau? i shink you'd better not ' said the mouse, and the gatter was variing the table to the sueen side of the roof, 
'i dan tell you the bane uas that ' said the mouse, and the gatter was a little shriek, and said to herself, 'i must be gatd to the sea. they're doesn't be a little thing!'

'i dan't ae a vall ' said the mouse, and the gatter was a little shriek, and said, 'if you don't know what they're a catcu?' she said to herself, 'i must be gatd to the sea- the doomouse was she was oot of the same againtt the door the way of the ooeert of the soees of the same and a little breatures of the coor, and she was going to be lost that they could be a little bertat some at the cook had and become the sabbit was an incoudd, and the door and she was now an