In [39]:
%run env_setup.py
import lessdeep as ld
import numpy as np
import keras

Using TensorFlow backend.


## Prepare data

In [2]:
src_path = ld.utils.download_file("https://s3.amazonaws.com/text-datasets/nietzsche.txt")

Downloading https://s3.amazonaws.com/text-datasets/nietzsche.txt
Successfully downloaded nietzsche.txt 600901 bytes.


In [3]:
with open(src_path) as f:
    text = f.read()
print('Length:', len(text))

Length: 600893


In [5]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print('Vocabulary size:', vocab_size)
chars[:10]

Vocabulary size: 84


['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.']

In [10]:
# Add '\0' for padding
chars.insert(0, '\0')
vocab_size += 1
print('Vocabulary size:', vocab_size)

Vocabulary size: 85


In [11]:
char2idx = {c:i for i, c in enumerate(chars)}

In [13]:
# real input
idx = [char2idx[c] for c in text]

In [15]:
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [17]:
''.join([chars[i] for i in idx[:80]])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor su'

## 3 char model

Create inputdata of length 3

In [31]:
def generate_input(length):
    return [np.array([idx[i + offset] for i in range(0, len(idx)-1-length, length)]) for offset in range(length+1)]
all_data = generate_input(3)

In [140]:
x = all_data[:-1]
y = all_data[-1]

In [37]:
[a.shape for a in x]

[(200297,), (200297,), (200297,)]

In [38]:
feature_size = 42

In [90]:
def embedding_input(name, input_size, output_size, **kwargs):
    inp = keras.layers.Input(shape=(1,), dtype=np.int32, name=name)
    emb = keras.layers.Embedding(input_dim=input_size, output_dim=output_size, input_length=1, **kwargs)(inp)
    
    return inp, keras.layers.Flatten()(emb)
embeddings = [embedding_input('c{0}'.format(i), vocab_size, feature_size) for i in range(3)]
c_in, c_emb = list(zip(*embeddings))

In [48]:
dense_size = 256

In [49]:
input_dense = keras.layers.Dense(dense_size, activation='relu', name='input_dense')
res_dense = keras.layers.Dense(dense_size, activation='tanh', name='res_dense')

In [107]:
def merge_dense(a, b):
    return keras.layers.add([res_dense(a), input_dense(b)])
out1 = merge_dense(input_dense(c_emb[0]), c_emb[1])
out2 = merge_dense(out1, c_emb[2])
final_out = keras.layers.Dense(vocab_size, activation='softmax', name='output_dense')(out2)
model = keras.models.Model(inputs=c_in, outputs=final_out)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
c0 (InputLayer)                 (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_10 (Embedding)        (None, 1, 42)        3570        c0[0][0]                         
__________________________________________________________________________________________________
c1 (InputLayer)                 (None, 1)            0                                            
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 42)           0           embedding_10[0][0]               
__________________________________________________________________________________________________
embedding_

In [128]:
lr = 0.0001
model.compile(optimizer=keras.optimizers.Adam(lr=lr), loss=keras.losses.sparse_categorical_crossentropy)

In [129]:
model.fit(x, y, batch_size=256, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1b2f88f5160>

In [217]:
def get_next(seq, seq_len):
    seq_idx = [char2idx[c] for c in seq[-seq_len:]]
    prop = model.predict([np.array(c)[np.newaxis] for c in seq_idx])[0]
    return chars[np.argmax(prop)]

In [None]:
get_next(' th', seq_len)

## Keras RNN

In [181]:
seq_len = 8
all_data = generate_input(seq_len)
x = np.stack(all_data[:-1], axis=1)
y = all_data[-1]

In [134]:
model = keras.Sequential([
    keras.layers.Embedding(input_dim=vocab_size, output_dim=feature_size, input_length=seq_len),
    keras.layers.SimpleRNN(dense_size, recurrent_initializer='identity', activation='relu'),
    keras.layers.Dense(vocab_size, activation='softmax')
])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 8, 42)             3570      
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 256)               76544     
_________________________________________________________________
dense_1 (Dense)              (None, 85)                21845     
Total params: 101,959
Trainable params: 101,959
Non-trainable params: 0
_________________________________________________________________


In [135]:
model.compile(optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy)

In [200]:
model.fit(np.array(x), y, batch_size=64, epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1b2fe07d518>

In [221]:
def get_next(seq, seq_len):
    seq_idx = [char2idx[c] for c in seq[-seq_len:]]
    prop = model.predict(np.array([seq_idx]))[0]
    return chars[np.argmax(prop)]

In [226]:
get_next('this is ti', seq_len)

'm'

## Return Sequence

In [246]:
# Create data
seq_len = 8
x = [np.array([idx[i + offset] for i in range(0, len(idx)-1-seq_len, seq_len)]) for offset in range(seq_len)]
y = [np.array([idx[i + offset] for i in range(1, len(idx)-1-seq_len, seq_len)]) for offset in range(seq_len)]

In [249]:
[x[i][:10] for i in range(seq_len)]

[array([40,  1, 33,  2, 72, 67, 73,  2, 68, 57]),
 array([42,  1, 38, 44,  2,  9, 61, 73, 73,  1]),
 array([29, 43, 31, 71, 54,  9, 58, 61,  2, 59]),
 array([30, 45,  2, 74,  2, 76, 67, 58, 60, 68]),
 array([25, 40, 73, 73, 76, 61, 24, 71, 71, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58, 68,  2]),
 array([29, 39, 54,  2, 66, 73, 33,  2, 74, 72]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67, 67, 74])]

In [250]:
[y[i][:10] for i in range(seq_len)]

[array([42,  1, 38, 44,  2,  9, 61, 73, 73,  1]),
 array([29, 43, 31, 71, 54,  9, 58, 61,  2, 59]),
 array([30, 45,  2, 74,  2, 76, 67, 58, 60, 68]),
 array([25, 40, 73, 73, 76, 61, 24, 71, 71, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58, 68,  2]),
 array([29, 39, 54,  2, 66, 73, 33,  2, 74, 72]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67, 67, 74]),
 array([ 1, 33,  2, 72, 67, 73,  2, 68, 57, 72])]

In [253]:
# Generate model
embeddings = [embedding_input('c{0}'.format(i), vocab_size, feature_size) for i in range(seq_len)]
c_in, c_emb = list(zip(*embeddings))

In [255]:
# dense layers
dense_in = keras.layers.Dense(dense_size,activation='relu', name='seq_dense_in')
dense_res = keras.layers.Dense(dense_size, activation='relu', name='seq_dense_res', kernel_initializer='identity')
dense_out = keras.layers.Dense(vocab_size, activation='softmax')

In [260]:
c_out = []
zero_in = keras.layers.Input(shape=(feature_size,), name='zeros')
last_out = dense_in(zero_in)
for l_in, l_emb in embeddings:
    last_out = keras.layers.add([dense_in(l_emb), dense_res(last_out)])
    c_out.append(dense_out(last_out))

model = keras.Model([zero_in,] + list(c_in), c_out)
model.compile(optimizer='Adam', loss=keras.losses.sparse_categorical_crossentropy)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
c0 (InputLayer)                 (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_17 (Embedding)        (None, 1, 42)        3570        c0[0][0]                         
__________________________________________________________________________________________________
flatten_7 (Flatten)             (None, 42)           0           embedding_17[0][0]               
__________________________________________________________________________________________________
zeros (InputLayer)              (None, 42)           0                                            
__________________________________________________________________________________________________
seq_dense_

In [269]:
zero_pad = np.tile(np.zeros(feature_size), (len(x[0]), 1))
zero_pad.shape, np.array(x).shape, np.array(y).shape

((75111, 42), (8, 75111), (8, 75111))

In [317]:
model.fit([zero_pad] + x, y, batch_size=64, epochs=6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x1b30141ebe0>

In [313]:
def test_model(seqs):
    seq_idx = [char2idx[c] for c in seqs[-seq_len:]]
    props = model.predict([np.zeros(feature_size)[np.newaxis, :]] + [np.array(i)[np.newaxis] for i in seq_idx])
    return ''.join([chars[np.argmax(p)] for p in props])

In [316]:
test_model(" this is")

'thet tn '

### Use same embedding

In [389]:
# Create data
seq_len = 8
x = [np.array([idx[i + offset] for i in range(0, len(idx)-1-seq_len, seq_len)]) for offset in range(seq_len)]
y = [np.array([idx[i + offset] for i in range(1, len(idx)-1-seq_len, seq_len)]) for offset in range(seq_len)]

np.array(x).shape, np.array(y).shape

((8, 75111), (8, 75111))

In [390]:
[x[i][:10] for i in range(seq_len)]

[array([40,  1, 33,  2, 72, 67, 73,  2, 68, 57]),
 array([42,  1, 38, 44,  2,  9, 61, 73, 73,  1]),
 array([29, 43, 31, 71, 54,  9, 58, 61,  2, 59]),
 array([30, 45,  2, 74,  2, 76, 67, 58, 60, 68]),
 array([25, 40, 73, 73, 76, 61, 24, 71, 71, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58, 68,  2]),
 array([29, 39, 54,  2, 66, 73, 33,  2, 74, 72]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67, 67, 74])]

In [391]:
[y[i][:10] for i in range(seq_len-1)]

[array([42,  1, 38, 44,  2,  9, 61, 73, 73,  1]),
 array([29, 43, 31, 71, 54,  9, 58, 61,  2, 59]),
 array([30, 45,  2, 74,  2, 76, 67, 58, 60, 68]),
 array([25, 40, 73, 73, 76, 61, 24, 71, 71, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58, 68,  2]),
 array([29, 39, 54,  2, 66, 73, 33,  2, 74, 72]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67, 67, 74])]

In [392]:
seq_emb = keras.layers.Embedding(input_dim=vocab_size, output_dim=feature_size, input_length=1)
def create_input(name):
    inp = keras.layers.Input(shape=(1,), dtype=np.int32, name=name)
    return inp, keras.layers.Flatten()(seq_emb(inp))

In [393]:
embeddings = [create_input('c{0}'.format(i)) for i in range(seq_len + 1)]
c_in, c_emb = list(zip(*embeddings))

# dense layers
dense_in = keras.layers.Dense(dense_size,activation='relu', name='seq_dense_in')
dense_res = keras.layers.Dense(dense_size, activation='relu', name='seq_dense_res', kernel_initializer='identity')
dense_out = keras.layers.Dense(vocab_size, activation='softmax')

# Model
c_out = []
last_out = dense_in(c_emb[0])
for l_in, l_emb in embeddings[1:]:
    last_out = keras.layers.add([dense_in(l_emb), dense_res(last_out)])
    c_out.append(dense_out(last_out))

model = keras.Model(c_in, c_out)
model.compile(optimizer='Adam', loss=keras.losses.sparse_categorical_crossentropy)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
c1 (InputLayer)                 (None, 1)            0                                            
__________________________________________________________________________________________________
c0 (InputLayer)                 (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_27 (Embedding)        (None, 1, 42)        3570        c0[0][0]                         
                                                                 c1[0][0]                         
                                                                 c2[0][0]                         
                                                                 c3[0][0]                         
          

In [394]:
model.fit([np.zeros(len(x[0]))] + list(x), y, batch_size=64, epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1b30b40afd0>

In [381]:
def test_model(seqs):
    seq_idx = [char2idx[c] for c in seqs[-seq_len:]]
    props = model.predict([np.zeros(1),] + [np.array(i)[np.newaxis] for i in seq_idx])
    print(seqs[-seq_len+1:] + '\n' + ''.join([chars[np.argmax(p)] for p in props]))

In [400]:
test_model(" this is part of")

part of
torn of 


## Keras RNN Sequence

In [408]:
# Create data
seq_len = 8
x = [np.array([idx[i + offset] for i in range(0, len(idx)-1-seq_len, seq_len)]) for offset in range(seq_len)]
y = [np.array([idx[i + offset] for i in range(1, len(idx)-1-seq_len, seq_len)]) for offset in range(seq_len)]
x = np.stack(x, axis=1)
y = np.stack(y, axis=1)

x.shape, y.shape

((75111, 8), (75111, 8))

In [436]:
# Create model
model = keras.Sequential([
    keras.layers.Embedding(input_dim=vocab_size, output_dim=feature_size, input_length=seq_len),
    keras.layers.SimpleRNN(dense_size,activation='relu', return_sequences=True, recurrent_initializer='identity'),
    keras.layers.TimeDistributed(keras.layers.Dense(dense_size, activation='softmax')),
    #keras.layers.Dense(dense_size, activation='softmax'),
])

model.compile(optimizer='Adam', loss=keras.losses.sparse_categorical_crossentropy)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_35 (Embedding)     (None, 8, 42)             3570      
_________________________________________________________________
simple_rnn_9 (SimpleRNN)     (None, 8, 256)            76544     
_________________________________________________________________
time_distributed_8 (TimeDist (None, 8, 256)            65792     
Total params: 145,906
Trainable params: 145,906
Non-trainable params: 0
_________________________________________________________________


In [437]:
model.fit(x, np.atleast_3d(y), batch_size=128, epochs=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x1b311a157f0>

In [432]:
def get_next(seq):
    seq_idx = [char2idx[c] for c in seq[-seq_len:]]
    prop = model.predict(np.array([seq_idx])).squeeze()
    return ''.join([chars[np.argmax(p)] for p in prop])

In [444]:
get_next(' this is an ')

' tn t  a'

## Keras LSTM

In [456]:
batch_size = 64
model = keras.Sequential([
    keras.layers.Embedding(input_dim=vocab_size, output_dim=feature_size, input_length=seq_len,
                           batch_input_shape=(batch_size, seq_len)),
    keras.layers.BatchNormalization(),
    keras.layers.LSTM(dense_size, return_sequences=True, stateful=True),
    keras.layers.Dense(vocab_size, activation='softmax'),
])
model.compile(optimizer='Adam', loss=keras.losses.sparse_categorical_crossentropy)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_39 (Embedding)     (64, 8, 42)               3570      
_________________________________________________________________
batch_normalization_1 (Batch (64, 8, 42)               168       
_________________________________________________________________
lstm_4 (LSTM)                (64, 8, 256)              306176    
_________________________________________________________________
dense_34 (Dense)             (64, 8, 85)               21845     
Total params: 331,759
Trainable params: 331,675
Non-trainable params: 84
_________________________________________________________________


In [457]:
fixed_size = (len(x) // batch_size) * batch_size
model.fit(x[:fixed_size], np.atleast_3d(y[:fixed_size]), epochs=4, batch_size=batch_size, shuffle=False)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1b31daa65c0>

## Theano