In [1]:
filename = "train"

### Load the Training history

In [2]:
import pickle

with open("data/{}.history.pkl".format(filename), "rb") as fp:
    history = pickle.load(fp)

### Show the Log- and Val-Loss as plot

In [3]:
import matplotlib.pyplot as plt

plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Log', 'Val'], loc='upper right')
plt.show()

<Figure size 640x480 with 1 Axes>

## Rebuild the Model

### Var init

In [4]:
FN = 'train'

In [5]:
FN0 = 'vocabulary-embedding'

In [6]:
FN1 = 'train'
#FN1 = ''

In [7]:
maxlend=25 # 0 - if we dont want to use description at all
maxlenh=25
maxlen = maxlend + maxlenh
rnn_size = 512 # must be same as 160330-word-gen
rnn_layers = 3  # match FN1
batch_norm=False

In [8]:
activation_rnn_size = 40 if maxlend else 0

In [9]:
# training parameters
seed=42
p_W, p_U, p_dense, weight_decay = 0, 0, 0, 0
optimizer = 'adam'
LR = 1e-4
batch_size=64
nflips=10

In [10]:
nb_train_samples = 640 # orig was 30k
nb_val_samples = 6 # orig was 3k

### read word embedding

In [11]:
import pickle

with open('data/%s.pkl'%FN0, 'rb') as fp:
    embedding, idx2word, word2idx, glove_idx2idx = pickle.load(fp)
vocab_size, embedding_size = embedding.shape

In [12]:
nb_unknown_words = 10

In [13]:
for i in range(nb_unknown_words):
    idx2word[vocab_size-1-i] = '<%d>'%i

In [14]:
oov0 = vocab_size-nb_unknown_words

In [15]:
for i in range(oov0, len(idx2word)):
    idx2word[i] = idx2word[i]+'^'

In [16]:
empty = 0
eos = 1
idx2word[empty] = '_'
idx2word[eos] = '~'

In [17]:
import numpy as np
from tensorflow.python.keras.preprocessing import sequence
from tensorflow.python.keras.utils import np_utils
import random, sys

### Model

In [18]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers.core import Dense, Activation, Dropout, RepeatVector
from tensorflow.python.keras.layers.wrappers import TimeDistributed
from tensorflow.python.keras.layers.recurrent import LSTM
from tensorflow.python.keras.layers.embeddings import Embedding
from tensorflow.python.keras.regularizers import l2

In [19]:
# seed weight initialization
random.seed(seed)
np.random.seed(seed)

In [20]:
regularizer = l2(weight_decay) if weight_decay else None

In [21]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_size,
                    input_length=maxlen,
                    embeddings_regularizer=regularizer, weights=[embedding], mask_zero=True,
                    name='embedding_1'))

for i in range(rnn_layers):
    lstm = LSTM(rnn_size, return_sequences=True, # batch_norm=batch_norm,
                kernel_regularizer=regularizer, recurrent_regularizer=regularizer,
                bias_regularizer=regularizer, dropout=p_W, recurrent_dropout=p_U,
                name='lstm_%d'%(i+1)
                  )
    model.add(lstm)
    model.add(Dropout(p_dense,name='dropout_%d'%(i+1)))

In [22]:
from tensorflow.python.keras.layers.core import Lambda
import tensorflow.python.keras.backend as K

def simple_context(X, mask, n=activation_rnn_size, maxlend=maxlend, maxlenh=maxlenh):
    desc, head = X[:,:maxlend,:], X[:,maxlend:,:]
    head_activations, head_words = head[:,:,:n], head[:,:,n:]
    desc_activations, desc_words = desc[:,:,:n], desc[:,:,n:]
    
    # RTFM http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot
    # activation for every head word and every desc word
    activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2,2))
    # make sure we dont use description words that are masked out
    activation_energies = activation_energies + -1e20*K.expand_dims(1.-K.cast(mask[:, :maxlend],'float32'),1)
    
    # for every head word compute weights for every desc word
    activation_energies = K.reshape(activation_energies,(-1,maxlend))
    activation_weights = K.softmax(activation_energies)
    activation_weights = K.reshape(activation_weights,(-1,maxlenh,maxlend))

    # for every head word compute weighted average of desc words
    desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2,1))
    return K.concatenate((desc_avg_word, head_words))

In [23]:
if activation_rnn_size:
    model.add(Lambda(simple_context,
                     mask = lambda inputs, mask: mask[:,maxlend:],
                     output_shape = lambda input_shape: (input_shape[0], maxlenh, 2*(rnn_size - activation_rnn_size)),
                     name='simplecontext_1'))
model.add(TimeDistributed(Dense(vocab_size,
                                kernel_regularizer=regularizer, bias_regularizer=regularizer,
                                name = 'timedistributed_1')))
model.add(Activation('softmax', name='activation_1'))

In [24]:
from tensorflow.python.keras.optimizers import Adam, RMSprop # usually I prefer Adam but article used rmsprop
# opt = Adam(lr=LR)  # keep calm and reduce learning rate
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [25]:
K.set_value(model.optimizer.lr,np.float32(LR))

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 50, 100)           4000000   
_________________________________________________________________
lstm_1 (LSTM)                (None, 50, 512)           1255424   
_________________________________________________________________
dropout_1 (Dropout)          (None, 50, 512)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 50, 512)           2099200   
_________________________________________________________________
dropout_2 (Dropout)          (None, 50, 512)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 50, 512)           2099200   
_________________________________________________________________
dropout_3 (Dropout)          (None, 50, 512)           0

### Load weights

In [27]:
if FN1:
    model.load_weights('data/%s.hdf5'%FN1)