In [0]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, LeakyReLU
import numpy as np

Using TensorFlow backend.


In [0]:
char_seq = 'qwertyuiopasdfghjklñzxcvbnm' * 100
char_seq = list(char_seq)

In [0]:
char2id = dict([(char, idx) for idx, char in enumerate(set(char_seq))])

In [0]:
maxlen = 5
sequences = []
next_char = []
 
for i in range(0,len(char_seq)-maxlen):
    sequences.append(char_seq[i:i+maxlen])
    next_char.append(char_seq[i+maxlen])
    
y_labels = len(char2id)
print("5 first sequences: {}".format(sequences[:5]))
print("5 first next characters: {}".format(next_char[:5]))
print("Total sequences: {}".format(len(sequences)))
print("Total output labels: {}".format(y_labels))

In [0]:
maxlen = 5
sequences = []
next_char = []
 
for i in range(0,len(char_seq)-maxlen):
    sequences.append(char_seq[i:i+maxlen])
    next_char.append(char_seq[i+maxlen])
    
y_labels = len(char2id)
print("5 first sequences: {}".format(sequences[:5]))
print("5 first next characters: {}".format(next_char[:5]))
print("Total sequences: {}".format(len(sequences)))
print("Total output labels: {}".format(y_labels))


5 first sequences: [['q', 'w', 'e', 'r', 't'], ['w', 'e', 'r', 't', 'y'], ['e', 'r', 't', 'y', 'u'], ['r', 't', 'y', 'u', 'i'], ['t', 'y', 'u', 'i', 'o']]
5 first next characters: ['y', 'u', 'i', 'o', 'p']
Total sequences: 2695
Total output labels: 27


In [0]:

def one_hot_encoder(seq, ids):
    encoded_seq = np.zeros([len(seq),len(ids)])
    for i,s in enumerate(seq):
        encoded_seq[i][ids[s]] = 1
    return encoded_seq

In [8]:
x = np.array([one_hot_encoder(item, char2id) for item in sequences])
y = np.array(one_hot_encoder(next_char, char2id))
x = x.astype(np.int32)
y = y.astype(np.int32)
 
print("Shape of x: {}".format(x.shape))
print("Shape of y: {}".format(y.shape))

Shape of x: (2695, 5, 27)
Shape of y: (2695, 27)


In [9]:
from sklearn.model_selection import train_test_split
 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)
print('x_train shape: {}'.format(x_train.shape)) 
print('y_train shape: {}'.format(y_train.shape))  
print('x_test shape: {}'.format(x_test.shape)) 
print('y_test shape: {}'.format(y_test.shape))

x_train shape: (2156, 5, 27)
y_train shape: (2156, 27)
x_test shape: (539, 5, 27)
y_test shape: (539, 27)


In [10]:
model = Sequential()
model.add(LSTM(8,input_shape=(maxlen,y_labels)))
model.add(Dense(y_labels))
model.add(LeakyReLU(alpha=.01)) 
 
model.compile(loss='mse', optimizer='rmsprop')

Instructions for updating:
Colocations handled automatically by placer.


In [11]:
history = model.fit(x_train, y_train, batch_size=32, epochs=25, verbose=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [12]:
print('MSE: {:.5f}'.format(model.evaluate(x_test, y_test)))

MSE: 0.00137


In [13]:
prediction = model.predict(x_test)
 
errors = 0
for pr, res in zip(prediction, y_test):
    if not np.array_equal(np.around(pr),res):
        errors+=1
 
print("Errors: {}".format(errors))
print("Hits: {}".format(len(prediction) - errors))
print("Hit average: {}".format((len(prediction) - errors)/len(prediction)))

Errors: 20
Hits: 519
Hit average: 0.9628942486085343


In [0]:
def decode(vec):
    val = np.argmax(vec)
    return list(char2id.keys())[list(char2id.values()).index(val)]

In [0]:
def pred_seq(seq):
    seq = list(seq)
    x = one_hot_encoder(seq,char2id)
    x = np.expand_dims(x, axis=0)
    prediction = model.predict(x, verbose=0)
    return decode(list(prediction[0]))


In [16]:
pred_seq('tyuio')

'p'