### Import Libraries and Dependencies

In [1]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


### Mount Drive to Import Text Data

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

%cd /content/gdrive/My Drive/Colab Notebooks/EIP Assignments/Assignment-2/

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
/content/gdrive/My Drive/Colab Notebooks/EIP Assignments/Assignment-2


### Load the data and convert it to lowercase

 Load the ASCII text for the book into memory and convert all of the characters to lowercase to reduce the vocabulary that the network must learn.

In [0]:
# load ascii text and covert to lowercase
filename = "wonderland_without_punctuations.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

### Removing Tailing Newline from data

In [4]:
raw_text = raw_text.replace('\n','')
print(raw_text)



### Converting the Data

- We must prepare the data for modeling by the neural network. 
- We cannot model the characters directly, instead we must convert the characters to integers.
- We can do this easily by first creating a set of all of the distinct characters in the book, then creating a map of each character to a unique integer.

In [0]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [6]:
print(char_to_int)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}


### Summarize the data

In [7]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  132674
Total Vocab:  27


### Prepare the dataset

In [8]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
  seq_in = raw_text[i:i + seq_length]
  seq_out = raw_text[i + seq_length]
  dataX.append([char_to_int[char] for char in seq_in])
  dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  132574


### Reshaping, Normalizing and One-Hot Encoding

In [0]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [10]:
print(X.shape[:])
# print(X)

(132574, 100, 1)


### Model Architecture

In [12]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(256))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

W0727 05:27:31.049972 140594308777856 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0727 05:27:31.087101 140594308777856 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0727 05:27:31.093406 140594308777856 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0727 05:27:31.408314 140594308777856 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0727 05:27:31.417314 

### Callbacks

In [0]:
# define the checkpoint
filepath="Models/weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

### Model Training

In [15]:
model.fit(X, y, epochs=100, batch_size=128, callbacks=callbacks_list)

Epoch 1/100

Epoch 00001: loss improved from inf to 2.69276, saving model to Models/weights-improvement-01-2.6928.hdf5
Epoch 2/100

Epoch 00002: loss improved from 2.69276 to 2.28786, saving model to Models/weights-improvement-02-2.2879.hdf5
Epoch 3/100

Epoch 00003: loss improved from 2.28786 to 2.07023, saving model to Models/weights-improvement-03-2.0702.hdf5
Epoch 4/100

Epoch 00004: loss improved from 2.07023 to 1.92853, saving model to Models/weights-improvement-04-1.9285.hdf5
Epoch 5/100

Epoch 00005: loss improved from 1.92853 to 1.82564, saving model to Models/weights-improvement-05-1.8256.hdf5
Epoch 6/100

Epoch 00006: loss improved from 1.82564 to 1.74302, saving model to Models/weights-improvement-06-1.7430.hdf5
Epoch 7/100

Epoch 00007: loss improved from 1.74302 to 1.67368, saving model to Models/weights-improvement-07-1.6737.hdf5
Epoch 8/100

Epoch 00008: loss improved from 1.67368 to 1.61114, saving model to Models/weights-improvement-08-1.6111.hdf5
Epoch 9/100

Epoch 0

In [16]:
model.fit(X, y, epochs=41, batch_size=128, callbacks=callbacks_list)

Epoch 1/41

Epoch 00001: loss improved from 0.67511 to 0.67227, saving model to Models/weights-improvement-01-0.6723.hdf5
Epoch 2/41

Epoch 00002: loss improved from 0.67227 to 0.66931, saving model to Models/weights-improvement-02-0.6693.hdf5
Epoch 3/41

Epoch 00003: loss improved from 0.66931 to 0.66731, saving model to Models/weights-improvement-03-0.6673.hdf5
Epoch 4/41

Epoch 00004: loss improved from 0.66731 to 0.65470, saving model to Models/weights-improvement-04-0.6547.hdf5
Epoch 5/41

Epoch 00005: loss improved from 0.65470 to 0.65154, saving model to Models/weights-improvement-05-0.6515.hdf5
Epoch 6/41

Epoch 00006: loss did not improve from 0.65154
Epoch 7/41

Epoch 00007: loss improved from 0.65154 to 0.64724, saving model to Models/weights-improvement-07-0.6472.hdf5
Epoch 8/41

Epoch 00008: loss improved from 0.64724 to 0.63871, saving model to Models/weights-improvement-08-0.6387.hdf5
Epoch 9/41

Epoch 00009: loss did not improve from 0.63871
Epoch 10/41

Epoch 00010: lo

<keras.callbacks.History at 0x7fde4a1407f0>

### Load the Trained model

In [0]:
# # load the network weights
# filename = "weights-improvement-19-1.9435.hdf5"
# model.load_weights(filename)
# model.compile(loss='categorical_crossentropy', optimizer='adam')

### Assign integers to character

In [0]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

### Text generated by the model

In [19]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(500):
  x = numpy.reshape(pattern, (1, len(pattern), 1))
  x = x / float(n_vocab)
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
#   print(index)
  result = int_to_char[index]
  seq_in = [int_to_char[value] for value in pattern]
  sys.stdout.write(result)
  pattern.append(index)
  pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" e kings argument was that anything that had a head could bebeheaded and that you werent to talk nons "
ensethe king and she was puite dianged oo itll seem to come on and ransi dont because when they make oh littas when she caul soon for the fxecutioner fetch ne ht andbehind himdi bant he peopon it hsom its feet at the end of the least not said alice indignant it i shinke she fane was i dont know the jurors were oicelate so said the duchess and the moral of that is but it did not sneeze off and rarsed oarricd the gryphon and alice was solet mobusion that she was now and the cookfouptaris there wer
Done.
