In [6]:
import numpy as np
import re
from keras.models import Sequential
from keras.models import Model
from keras.layers import LSTM, Dense, Dropout, Input, concatenate, BatchNormalization, Activation
from keras.layers.embeddings import Embedding
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from pickle import dump
from pickle import load 
from keras.models import load_model
import pickle
from keras.utils import plot_model

In [7]:
# Load the ready-to-run shakespeare training data 
X = load(open("X_training_shakespeare_final.h5", "rb"))
y = load(open("y_training_shakespeare_final.h5", "rb")) 

In [8]:
mapping = load(open("mapping.pkl", "rb"))
vocab_size = len(mapping)

In [None]:
# We will be using pretrained layers in Model 5 to initialize Model 7 
prev_model = load_model("model5_6.h5")

In [12]:
# See model_dense diagram for visualization
# Dual-input LSTM + Dense architecture (Model 7)
# Takes in same input but processes with two channels first
# Char_1 channel: new LSTM + 3 Dense layers 
# Char_2 channel: same architecture as Model 5 
# After merge with concatenation, we added another 120 units 
# Dense layer before final softmax Dense layer. 

char_1_input = Input(shape = (X.shape[1], X.shape[2]), name='char_input_1')
char_2_input = Input(shape = (X.shape[1], X.shape[2]), name="char_input_2")

x_1 = LSTM(120, name='x_1')(char_1_input)
d_1 = Dropout(0.1) (x_1)

x_2 = Dense(120, name='dense_1') (d_1)
b_1 = BatchNormalization()(x_2)
a_1 = Activation('relu')(b_1)

x_3 = Dense(120, name='dense_2') (a_1)
b_2 = BatchNormalization()(x_3)
a_2 = Activation('relu')(b_2)

x_4 = Dense(120, name='dense_3') (a_2)
b_3 = BatchNormalization()(x_4)
a_3 = Activation('relu')(b_3)

extra_output = Dense(vocab_size, activation='softmax', name='extra')(a_3)

y_1 = LSTM(120, return_sequences=True, name='y_1') (char_2_input)
d_2 = Dropout(0.1) (y_1)
y_2 = LSTM(120, return_sequences=True, name='y_2') (d_2)
y_3 = LSTM(120, name='y_3') (y_2)

z = concatenate([y_3, a_3])
z_1 = Dense(120, activation='relu', name='dense_4')(z)
main_output = Dense(vocab_size, activation='softmax', name='softmax')(z_1)

model = Model(inputs=[char_1_input, char_2_input], outputs=[main_output, extra_output])

In [13]:
# Visualize model
plot_model(model, to_file='model_dense.png')

In [14]:
# Get layers from Model 5
prev_LSTM_1 = prev_model.layers[0]
prev_LSTM_2 = prev_model.layers[2]
prev_Dense = prev_model.layers[3]

In [15]:
# Initialize weights
model.get_layer('y_1').set_weights(prev_LSTM_1.get_weights())
model.get_layer('y_2').set_weights(prev_LSTM_2.get_weights())
model.get_layer('softmax').set_weights(prev_Dense.get_weights())

In [16]:
# Model 7 training conditions
# Saved model every 5 to 10 epochs
# Total 20 epochs
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], loss_weights=[1, 0.2])
model.fit([X, X], [y, y], epochs=5, batch_size=256)

Epoch 1/1


<keras.callbacks.History at 0xb31deef98>