# Testing basic LSTM
### creating mock dataset 

In [20]:
# create sample data
# try to learn fruit names letter-wise

words = ['apple', 'banana', 'pear', 'lime', 'cherry', 'orange', 'berry', 'strawberry']

# sequence stop sign
SEQUENCE_STOP_SIGN = '.'

# transform words to letter arrays
words_arrays = [[l for l in w] for w in words]

In [40]:
# create sequences in many-to-many configuration
sequences_x, sequences_y = zip(*[(w, w[1:] + [SEQUENCE_STOP_SIGN]) for w in words_arrays])
sequences_x[0], sequences_y[0]

(['a', 'p', 'p', 'l', 'e'], ['p', 'p', 'l', 'e', '.'])

In [32]:
# one-hot encode letters (and stop sign)
import string
ALPHABET = string.ascii_lowercase + SEQUENCE_STOP_SIGN
ALPHABET_LENGTH = len(ALPHABET)

import numpy as np
def encode_letter(letter):
    res = np.zeros(ALPHABET_LENGTH)
    res[ALPHABET.index(letter)] = 1
    return res

def decode_letter(ohe_vec):
    # for now, one dimention only
    return ALPHABET[ohe_vec.argmax()]

encode_letter('d')

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [39]:
def encode_word(word):
    return np.stack([encode_letter(l) for l in word])
    
def decode_word(ohe_word):
    return ''.join([decode_letter(ohe_letter) for ohe_letter in ohe_word])

encode_word('asd')

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [48]:
# encode dataset
data_x = [encode_word(seq) for seq in sequences_x]
data_y = [encode_word(seq) for seq in sequences_y]

In [70]:
# data generator
import random
def data_gen():
    while True:
        index = random.randint(0, len(data_x) - 1)
        yield (
            np.expand_dims(data_x[index], axis=0), 
            np.expand_dims(data_y[index], axis=0)
        )

### create model

In [85]:
import tensorflow as tf

INPUT_UNITS = ALPHABET_LENGTH
HIDDEN_UNITS = 128
OUTPUT_UNITS = ALPHABET_LENGTH

# (seq_len, input)
INPUT_SHAPE = (None, INPUT_UNITS)

model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(HIDDEN_UNITS, input_shape=INPUT_SHAPE, return_sequences=True),
    tf.keras.layers.Dense(OUTPUT_UNITS, activation='softmax')
])

model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam', 
    metrics=['categorical_accuracy']
)


In [86]:
gen = data_gen()
STEPS_PER_EPOCH = len(data_x)
EPOCHS = 100

model.fit_generator(gen, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100


Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1fc83e3b4a8>

In [82]:
prediction = model.predict(np.expand_dims(encode_word('pe'), axis=0))
decode_word(np.squeeze(prediction, axis=0))

'ea'