In [1]:
#! pip install --upgrade keras
# run this command in terminal: ! conda install cudnn

In [2]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import re

# Imports for visualisations
from IPython.display import HTML as html_print
from IPython.display import display
import keras.backend as K

In [3]:
# Read data
filename = "ggmarques.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = re.sub(r'[ ]+', ' ', raw_text)

# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

n_chars = len(raw_text)
n_vocab = len(chars)

In [4]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))

# normalize
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]


Total Patterns:  832410


In [5]:
# define the LSTM model
model = Sequential()

model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.5))

model.add(LSTM(512))
model.add(Dropout(0.5))

model.add(Dense(y.shape[1], activation='softmax'))

# build the LSTM model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

#Train the built model
#model.fit(X, y, epochs=300, batch_size=2048 , callbacks=callbacks_list)
model.fit(X, y, epochs=300, batch_size=2048)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 512)          1052672   
_________________________________________________________________
dropout (Dropout)            (None, 100, 512)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 512)               2099200   
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 85)                43605     
Total params: 3,195,477
Trainable params: 3,195,477
Non-trainable params: 0
_________________________________________________________________
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoc

<tensorflow.python.keras.callbacks.History at 0x7f96b6380850>

In [6]:
#---- Since it takes a long time, we could save the model and next time read the saved model
# serialize model to JSON
model_json = model.to_json()
with open("LSTM1-marqoues-300epoch.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("LSTM1-marqoues-300epoch-weights.h5")
print("Saved model to disk")


Saved model to disk


In [7]:
''' load model from the Github
json_file = open('sample_LSTM1-100epoch.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
'''

' load model from the Github\njson_file = open(\'sample_LSTM1-100epoch.json\', \'r\')\nloaded_model_json = json_file.read()\njson_file.close()\nloaded_model = model_from_json(loaded_model_json)\n# load weights into new model\nloaded_model.load_weights("model.h5")\nprint("Loaded model from disk")\n'

In [13]:

#--------- Now the model is ready and we start to focus on Visualization -----------

# Get output from intermediate layer to visualize activations

# 3rd layer is LSTM layer with output shape (Batch_Size, 512)
lstm = model.layers[2]
print (lstm.output)

attn_func = K.function(inputs = [model.inputs[0]],  
                       outputs = [lstm.output])

# get html element
def cstr(s, color='black'):
    if s == ' ':
        return "<text style=color:#000;padding-left:10px;background-color:{}> </text>".format(color, s)
    else:
        return "<text style=color:#000;background-color:{}>{} </text>".format(color, s)

# print html
def print_color(t):
    display(html_print(''.join([cstr(ti, color=ci) for ti,ci in t])))

# get appropriate color for value
def get_clr(value):
    colors = ['#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8'
        '#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
        '#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
        '#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e']
    value = int((value * 100) / 5)
    return colors[value]

# sigmoid function
def sigmoid(x):
    z = 1/(1 + np.exp(-x)) 
    return z

Tensor("lstm_1/PartitionedCall:0", shape=(None, 512), dtype=float32)


In [14]:
def visualize(output_values, result_list, cell_no):
    print("\nCell Number:", cell_no, "\n")
    text_colours = []
    for i in range(len(output_values)):
        text = (result_list[i], get_clr(output_values[i][cell_no]))
        text_colours.append(text)
    print_color(text_colours)
    
# Get Predictions from random sequence
def get_predictions(data):
    start = np.random.randint(0, len(data)-1)
    pattern = data[start]
    result_list, output_values = [], []
    print("Seed:")
    print("\"" + ''.join([int_to_char[value] for value in pattern]) + "\"")
    print("\nGenerated:")

    for i in range(1000):
        # Reshaping input array for predicting next character
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)

        # Prediction
        prediction = model.predict(x, verbose=0)

        # LSTM Activations
        output = attn_func([x])[0][0]
        output = sigmoid(output)
        output_values.append(output)

        # Predicted Character
        index = np.argmax(prediction)
        result = int_to_char[index]

        # Preparing input for next character
        seq_in = [int_to_char[value] for value in pattern]
        pattern.append(index)
        pattern = pattern[1:len(pattern)]

        # Saving generated characters
        result_list.append(result)
    return output_values, result_list

In [26]:
output_values, result_list = get_predictions(dataX)

for cell_no in [500, 435, 463, 470, 475, 480, 350, 400]:
    visualize(output_values, result_list, cell_no)

Seed:
" would never marry a man who was so simple that he had 
wasted almost an hour and even went without "

Generated:

Cell Number: 500 




Cell Number: 200 




Cell Number: 435 




Cell Number: 463 




Cell Number: 470 




Cell Number: 475 




Cell Number: 480 




Cell Number: 350 




Cell Number: 400 

