# import packages

In [1]:
import numpy as np
seed= 11
np.random.seed( seed )
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

# utility
from tqdm import tqdm

# Deep Learning
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense

Using TensorFlow backend.


# Echo Sequence Prediction Problem

In this problem, we have a random sequence of numbers, and we would like to always pick up a number at a certain index. For ex. we can always choose the third number in a sequence. It does not matter what the sequence is, a classification model must always return the third integer. 

The purpose of investigating this problem with Vanila LSTM models is to demonstrate their capability of memorizing a certain element in a sequence.

As the first step, let's first create a random sequence of numbers:

In [2]:
# generate a sequence of random numbers in [0, n_features)
def generate_sequence(length, n_features):
    """
    length tells us the length of the sequence
    n_features is the upper limit for random numbers
    
    ex. 
    length= 5, n_features= 10
    sequence -> [2, 5, 1, 9, 6]
    """
    return [ np.random.randint(0, n_features) for _ in range(length)]

In [3]:
def one_hot_encode( sequence, n_features):
    """
    We can also do the same with keras.utils.to_categorical
    However, keras method will set the number of features to the biggest number in the sequence, which
    can be wrong because we are creating random numbers and sometimes the biggest number could not 
    show up.
    However, if we have all the sequences, then the method would still works.
    """
    encoding= list()
    for value in sequence:
        vector= [0 for _ in range(n_features)]
        vector[value]= 1
        encoding.append( vector )
    return np.array(encoding)
    

In [4]:
# example
seq1= generate_sequence(length= 5, n_features= 10)
seq2= generate_sequence(length= 5, n_features= 10)

print('Sequence 1: ', seq1)
print('Sequence 1: ', seq2)

print('\nOne hot encoding of seq1: ')
print(one_hot_encode(seq1, n_features= 10))
print('\nOne hot encoding of seq2: ')
print(one_hot_encode(seq2, n_features= 10))

Sequence 1:  [9, 0, 1, 7, 1]
Sequence 1:  [7, 2, 8, 0, 0]

One hot encoding of seq1: 
[[0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 1 0 0 0 0 0 0 0 0]]

One hot encoding of seq2: 
[[0 0 0 0 0 0 0 1 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]]


In [5]:
# we can see to_categorical does the same
to_categorical( [seq1, seq2])

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]], dtype=float32)

In [6]:
def one_hot_decode(encoded_seq):
    """
    Example:
    
    """
    return [ np.argmax(vector) for vector in encoded_seq ]

In [7]:
encoded_seq= one_hot_encode(seq1, n_features= 10)
print('One hot encoding of sequence 1: \n', encoded_seq)
one_hot_decode( encoded_seq)

One hot encoding of sequence 1: 
 [[0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 1 0 0 0 0 0 0 0 0]]


[9, 0, 1, 7, 1]

In [8]:
def generate_example(length, n_features, out_index):
    """
    This fucntion generates one input for our Echo Sequence Prediction problem
    """
    # generate sequence
    sequence= generate_sequence(length, n_features)
    encoded= one_hot_encode(sequence, n_features)
    X= encoded.reshape( (1, length, n_features))
    y= encoded[out_index, :].reshape( 1, n_features )
    return X, y

In [9]:
length= 60
n_features= 100
out_index= 43
X, y= generate_example(length, n_features, out_index)
print('X shape: ', X.shape)
print('y shape: ', y.shape)

X shape:  (1, 60, 100)
y shape:  (1, 100)


In [10]:
model= Sequential()
model.add( LSTM(200, input_shape= (X.shape[1], X.shape[2])))
model.add( Dense(n_features, activation= 'softmax'))
model.compile( loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['acc'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 200)               240800    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               20100     
Total params: 260,900
Trainable params: 260,900
Non-trainable params: 0
_________________________________________________________________


In [11]:
X, y= list(), list()
for i in tqdm(range(5000)):
    X_temp, y_temp= generate_example(length, n_features, out_index)
    X.append(X_temp)
    y.append(y_temp)

X= np.concatenate(X)
y= np.concatenate(y)

print('X shape', X.shape)
print('y shape', y.shape) 

100%|██████████| 5000/5000 [00:09<00:00, 539.05it/s]


X shape (5000, 60, 100)
y shape (5000, 100)


In [12]:
model.fit(X,y, epochs= 50, batch_size= 128, verbose= 2)  

Epoch 1/50
 - 3s - loss: 4.6040 - acc: 0.0106
Epoch 2/50
 - 2s - loss: 4.5811 - acc: 0.0234
Epoch 3/50
 - 2s - loss: 4.5646 - acc: 0.0342
Epoch 4/50
 - 2s - loss: 4.5267 - acc: 0.0374
Epoch 5/50
 - 2s - loss: 4.4848 - acc: 0.0448
Epoch 6/50
 - 2s - loss: 4.4213 - acc: 0.0502
Epoch 7/50
 - 2s - loss: 4.3401 - acc: 0.0570
Epoch 8/50
 - 2s - loss: 4.3225 - acc: 0.0596
Epoch 9/50
 - 2s - loss: 4.2450 - acc: 0.0634
Epoch 10/50
 - 2s - loss: 3.9831 - acc: 0.0958
Epoch 11/50
 - 2s - loss: 3.7369 - acc: 0.1290
Epoch 12/50
 - 2s - loss: 3.5550 - acc: 0.1558
Epoch 13/50
 - 3s - loss: 3.2970 - acc: 0.1994
Epoch 14/50
 - 2s - loss: 3.0604 - acc: 0.2404
Epoch 15/50
 - 3s - loss: 2.7642 - acc: 0.2996
Epoch 16/50
 - 2s - loss: 2.6260 - acc: 0.3220
Epoch 17/50
 - 2s - loss: 2.3608 - acc: 0.3808
Epoch 18/50
 - 2s - loss: 2.1920 - acc: 0.4110
Epoch 19/50
 - 2s - loss: 1.9782 - acc: 0.4650
Epoch 20/50
 - 2s - loss: 1.6221 - acc: 0.5516
Epoch 21/50
 - 2s - loss: 1.5671 - acc: 0.5598
Epoch 22/50
 - 2s - lo

<keras.callbacks.callbacks.History at 0x7fdbd87a1f98>

In [13]:
X_test, y_test= list(), list()
for i in tqdm(range(100)):
    X_temp, y_temp= generate_example(length, n_features, out_index)
    X_test.append(X_temp)
    y_test.append(y_temp)

X_test= np.concatenate(X_test)
y_test= np.concatenate(y_test)

yhat = model.predict(X_test)
#print('Sequence: %s' % [one_hot_decode(x) for x in X_test])
#print('Expected: %s' % one_hot_decode(y_test))
#print('Predicted: %s' % one_hot_decode(yhat))

model.evaluate(X_test, y_test)

100%|██████████| 100/100 [00:00<00:00, 553.28it/s]




[0.3713809025287628, 0.8799999952316284]

### With 5000 examples, we achieved a a test accuracy of 87 %

### To improve test accuracy, we can either add more LSTM units or increase the nuber of samples. Let's try both. 

### First we increase the number of LSTM units from 200 to 400

In [14]:
model= Sequential()
model.add( LSTM(400, input_shape= (X.shape[1], X.shape[2])))
model.add( Dense(n_features, activation= 'softmax'))
model.compile( loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['acc'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 400)               801600    
_________________________________________________________________
dense_2 (Dense)              (None, 100)               40100     
Total params: 841,700
Trainable params: 841,700
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.fit(X,y, epochs= 50, batch_size= 128, verbose= 2)  

Epoch 1/50
 - 3s - loss: 4.6031 - acc: 0.0116
Epoch 2/50
 - 2s - loss: 4.5750 - acc: 0.0288
Epoch 3/50
 - 2s - loss: 4.5424 - acc: 0.0322
Epoch 4/50
 - 2s - loss: 4.5020 - acc: 0.0468
Epoch 5/50
 - 2s - loss: 4.4824 - acc: 0.0430
Epoch 6/50
 - 2s - loss: 4.4470 - acc: 0.0480
Epoch 7/50
 - 2s - loss: 4.4023 - acc: 0.0548
Epoch 8/50
 - 2s - loss: 4.3469 - acc: 0.0604
Epoch 9/50
 - 2s - loss: 4.2367 - acc: 0.0700
Epoch 10/50
 - 2s - loss: 4.0491 - acc: 0.0964
Epoch 11/50
 - 2s - loss: 3.7631 - acc: 0.1334
Epoch 12/50
 - 2s - loss: 3.4961 - acc: 0.1796
Epoch 13/50
 - 2s - loss: 3.0884 - acc: 0.2346
Epoch 14/50
 - 2s - loss: 2.7901 - acc: 0.2910
Epoch 15/50
 - 3s - loss: 2.4497 - acc: 0.3582
Epoch 16/50
 - 2s - loss: 2.3362 - acc: 0.3846
Epoch 17/50
 - 3s - loss: 1.7745 - acc: 0.5108
Epoch 18/50
 - 2s - loss: 1.3298 - acc: 0.6126
Epoch 19/50
 - 2s - loss: 1.0508 - acc: 0.6910
Epoch 20/50
 - 2s - loss: 1.4129 - acc: 0.6184
Epoch 21/50
 - 2s - loss: 0.5162 - acc: 0.8582
Epoch 22/50
 - 2s - lo

<keras.callbacks.callbacks.History at 0x7fdcaf619358>

In [16]:
model.evaluate(X_test, y_test)



[1.2929810881614685, 0.699999988079071]

### with 100 LSTM units

In [17]:
model= Sequential()
model.add( LSTM(100, input_shape= (X.shape[1], X.shape[2])))
model.add( Dense(n_features, activation= 'softmax'))
model.compile( loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['acc'])
# model.summary()

model.fit(X,y, epochs= 50, batch_size= 128, verbose= 2)  

Epoch 1/50
 - 3s - loss: 4.6056 - acc: 0.0094
Epoch 2/50
 - 2s - loss: 4.5922 - acc: 0.0174
Epoch 3/50
 - 2s - loss: 4.5778 - acc: 0.0208
Epoch 4/50
 - 2s - loss: 4.5584 - acc: 0.0250
Epoch 5/50
 - 2s - loss: 4.5322 - acc: 0.0366
Epoch 6/50
 - 2s - loss: 4.4965 - acc: 0.0298
Epoch 7/50
 - 2s - loss: 4.4456 - acc: 0.0428
Epoch 8/50
 - 2s - loss: 4.3804 - acc: 0.0470
Epoch 9/50
 - 2s - loss: 4.2788 - acc: 0.0532
Epoch 10/50
 - 2s - loss: 4.1701 - acc: 0.0666
Epoch 11/50
 - 2s - loss: 4.0674 - acc: 0.0824
Epoch 12/50
 - 2s - loss: 3.9470 - acc: 0.0984
Epoch 13/50
 - 2s - loss: 3.8448 - acc: 0.1202
Epoch 14/50
 - 2s - loss: 3.6758 - acc: 0.1338
Epoch 15/50
 - 2s - loss: 3.5438 - acc: 0.1634
Epoch 16/50
 - 3s - loss: 3.4209 - acc: 0.1836
Epoch 17/50
 - 2s - loss: 3.3479 - acc: 0.1974
Epoch 18/50
 - 3s - loss: 3.1729 - acc: 0.2270
Epoch 19/50
 - 2s - loss: 3.0172 - acc: 0.2560
Epoch 20/50
 - 2s - loss: 2.9538 - acc: 0.2644
Epoch 21/50
 - 2s - loss: 2.7936 - acc: 0.3044
Epoch 22/50
 - 2s - lo

<keras.callbacks.callbacks.History at 0x7fd7e10f9e48>

In [18]:
model.evaluate(X_test, y_test)



[0.8910874938964843, 0.7400000095367432]

### with 150 LSTM units

In [19]:
model= Sequential()
model.add( LSTM(150, input_shape= (X.shape[1], X.shape[2])))
model.add( Dense(n_features, activation= 'softmax'))
model.compile( loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['acc'])
# model.summary()

model.fit(X,y, epochs= 50, batch_size= 128, verbose= 2)  

Epoch 1/50
 - 3s - loss: 4.6050 - acc: 0.0104
Epoch 2/50
 - 2s - loss: 4.5887 - acc: 0.0160
Epoch 3/50
 - 2s - loss: 4.5687 - acc: 0.0354
Epoch 4/50
 - 2s - loss: 4.5440 - acc: 0.0356
Epoch 5/50
 - 2s - loss: 4.5015 - acc: 0.0428
Epoch 6/50
 - 2s - loss: 4.4195 - acc: 0.0486
Epoch 7/50
 - 2s - loss: 4.2699 - acc: 0.0598
Epoch 8/50
 - 2s - loss: 4.1018 - acc: 0.0874
Epoch 9/50
 - 2s - loss: 3.9224 - acc: 0.1080
Epoch 10/50
 - 2s - loss: 3.7228 - acc: 0.1362
Epoch 11/50
 - 2s - loss: 3.5380 - acc: 0.1698
Epoch 12/50
 - 2s - loss: 3.3333 - acc: 0.1958
Epoch 13/50
 - 2s - loss: 3.1563 - acc: 0.2244
Epoch 14/50
 - 2s - loss: 2.9375 - acc: 0.2654
Epoch 15/50
 - 2s - loss: 2.7373 - acc: 0.3098
Epoch 16/50
 - 2s - loss: 2.5809 - acc: 0.3446
Epoch 17/50
 - 2s - loss: 2.3486 - acc: 0.3916
Epoch 18/50
 - 3s - loss: 2.1302 - acc: 0.4350
Epoch 19/50
 - 2s - loss: 1.9402 - acc: 0.4852
Epoch 20/50
 - 3s - loss: 1.6428 - acc: 0.5616
Epoch 21/50
 - 2s - loss: 1.6072 - acc: 0.5632
Epoch 22/50
 - 2s - lo

<keras.callbacks.callbacks.History at 0x7fd7e0de1d30>

In [20]:
model.evaluate(X_test, y_test)



[0.13788420885801314, 0.9700000286102295]

### LSTM with 150 units sounds to be the best. Let's give it more test example

In [21]:
X_test_10000, y_test_10000= list(), list()
for i in tqdm(range(10000)):
    X_temp, y_temp= generate_example(length, n_features, out_index)
    X_test_10000.append(X_temp)
    y_test_10000.append(y_temp)

X_test_10000= np.concatenate(X_test_10000)
y_test_10000= np.concatenate(y_test_10000)

yhat = model.predict(X_test_10000)
#print('Sequence: %s' % [one_hot_decode(x) for x in X_test])
#print('Expected: %s' % one_hot_decode(y_test))
#print('Predicted: %s' % one_hot_decode(yhat))

model.evaluate(X_test_10000, y_test_10000)

100%|██████████| 10000/10000 [00:18<00:00, 534.17it/s]




[0.13183870311379434, 0.970300018787384]

### The test accuracy even got better: 94 %
### Wo we built a model which can memorize a certain index of a sequence with 94 % accuracy 

# Laerning the Alphabet

We would like to try different configurations of LSTM in order to predict the alphabet. 

In [22]:
import numpy as np
seed= 27
np.random.seed(seed)

from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.utils import np_utils
from keras.layers.embeddings import Embedding

import pdb

In [23]:
alphabet= "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# creat mapping of characters to integers and reverse
char_to_int= dict ( [ (el, i) for i, el in enumerate(alphabet)] )
int_to_char= dict( [ (i, el) for i, el in enumerate(alphabet)] )

In [24]:
# prepare the dataset of input to output pairs encoded as integers

"""
example:
When the seq_length= 2 

seq_in  -> ['B', 'C']
seq_out -> ['D']

"""



seq_length= 1
dataX= []
dataY= []
for i in range( 0, len(alphabet) - seq_length, 1):
    seq_in= alphabet[i: i+seq_length]
    seq_out= alphabet[i+seq_length]
    dataX.append( [char_to_int[char] for char in seq_in] )
    dataY.append( char_to_int[seq_out] )
    print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [25]:
# change the format of the input X into [samples, timesteps, features]
X= np.reshape( dataX, ( len(dataX), seq_length, 1) )

# normalize X with min_max scaler
X = X / len(alphabet)

# one-hot encode the output as well
y= np_utils.to_categorical(dataY)

In [26]:
print('X shape:', X.shape)
print('y shape:', y.shape)

X shape: (25, 1, 1)
y shape: (25, 26)


In [27]:
model= Sequential()
model.add( LSTM(32, input_shape= ( X.shape[1], X.shape[2] ) ) )
model.add( Dense(y.shape[1] , activation= 'softmax' ) )
model.compile(loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['accuracy'])
model.fit(X, y, epochs= 500, batch_size= 1, verbose= 2)

Epoch 1/500
 - 1s - loss: 3.2667 - accuracy: 0.0400
Epoch 2/500
 - 0s - loss: 3.2588 - accuracy: 0.0400
Epoch 3/500
 - 0s - loss: 3.2557 - accuracy: 0.0400
Epoch 4/500
 - 0s - loss: 3.2533 - accuracy: 0.0400
Epoch 5/500
 - 0s - loss: 3.2502 - accuracy: 0.0400
Epoch 6/500
 - 0s - loss: 3.2475 - accuracy: 0.0000e+00
Epoch 7/500
 - 0s - loss: 3.2444 - accuracy: 0.0400
Epoch 8/500
 - 0s - loss: 3.2420 - accuracy: 0.0000e+00
Epoch 9/500
 - 0s - loss: 3.2383 - accuracy: 0.0000e+00
Epoch 10/500
 - 0s - loss: 3.2354 - accuracy: 0.0400
Epoch 11/500
 - 0s - loss: 3.2320 - accuracy: 0.0000e+00
Epoch 12/500
 - 0s - loss: 3.2284 - accuracy: 0.0000e+00
Epoch 13/500
 - 0s - loss: 3.2245 - accuracy: 0.0400
Epoch 14/500
 - 0s - loss: 3.2205 - accuracy: 0.0000e+00
Epoch 15/500
 - 0s - loss: 3.2164 - accuracy: 0.0400
Epoch 16/500
 - 0s - loss: 3.2117 - accuracy: 0.0000e+00
Epoch 17/500
 - 0s - loss: 3.2067 - accuracy: 0.0800
Epoch 18/500
 - 0s - loss: 3.2017 - accuracy: 0.0800
Epoch 19/500
 - 0s - loss: 

<keras.callbacks.callbacks.History at 0x7fd7e0ae12b0>

In [28]:
scores= model.evaluate(X, y, verbose= 0)
print('Model Accuracy is : %.2f%%' %(scores[1]*100) )

Model Accuracy is : 80.00%


In [29]:
# evaluation
for pattern in dataX:
    x= np.reshape(pattern, (1, len(pattern), 1) )
    x= x/float(len(alphabet))
    prediction= model.predict_classes(x)[0]
    #pdb.set_trace()
    result= int_to_char[prediction]
    seq_in= [int_to_char[value] for value in pattern]
    print(seq_in, '->', result)

['A'] -> B
['B'] -> B
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> W
['V'] -> X
['W'] -> Z
['X'] -> Z
['Y'] -> Z


### Solving the same problem with one-hot encoding of input data and an embedding layer

### Before, we integer encoded X. Integer encoding is used when we want to use an embedding layer. If we use the integer encoded X as the input, the model assumes there is an ordinal relationship between characters, while there is none. They are not numbers.

### For this reason, I will try to use an embedding layer and see how the results change.

In [30]:
alphabet= "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# creat mapping of characters to integers and reverse
char_to_int= dict ( [ (el, i) for i, el in enumerate(alphabet)] )
int_to_char= dict( [ (i, el) for i, el in enumerate(alphabet)] )

seq_length= 3
dataX= []
dataY= []
for i in range( 0, len(alphabet) - seq_length, 1):
    seq_in= alphabet[i: i+seq_length]
    seq_out= alphabet[i+seq_length]
    dataX.append( [char_to_int[char] for char in seq_in] )
    dataY.append( char_to_int[seq_out] )
    print(seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [31]:
# change the format of the input X into [samples, timesteps]
X= np.reshape( dataX, ( len(dataX), seq_length) )

# one-hot encode the output as well
y= np_utils.to_categorical(dataY)

print('X shape:', X.shape)
print('y shape:', y.shape)

X shape: (23, 3)
y shape: (23, 26)


In [32]:
vocab_size= len(alphabet) + 1 
embedding_vector_length= 4
length= seq_length

model= Sequential()
model.add( Embedding(vocab_size, embedding_vector_length, input_length= length) )
model.add( LSTM(32) ) 
model.add( Dense(y.shape[1], activation= 'softmax'))
model.summary()

model.compile(loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['accuracy'])
model.fit(X, y, epochs= 100, batch_size= 1, verbose= 2)

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 3, 4)              108       
_________________________________________________________________
lstm_6 (LSTM)                (None, 32)                4736      
_________________________________________________________________
dense_6 (Dense)              (None, 26)                858       
Total params: 5,702
Trainable params: 5,702
Non-trainable params: 0
_________________________________________________________________


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/100
 - 1s - loss: 3.2639 - accuracy: 0.0000e+00
Epoch 2/100
 - 0s - loss: 3.2552 - accuracy: 0.0435
Epoch 3/100
 - 0s - loss: 3.2507 - accuracy: 0.0435
Epoch 4/100
 - 0s - loss: 3.2459 - accuracy: 0.0435
Epoch 5/100
 - 0s - loss: 3.2401 - accuracy: 0.0870
Epoch 6/100
 - 0s - loss: 3.2329 - accuracy: 0.0870
Epoch 7/100
 - 0s - loss: 3.2235 - accuracy: 0.0870
Epoch 8/100
 - 0s - loss: 3.2102 - accuracy: 0.1304
Epoch 9/100
 - 0s - loss: 3.1907 - accuracy: 0.0870
Epoch 10/100
 - 0s - loss: 3.1601 - accuracy: 0.1739
Epoch 11/100
 - 0s - loss: 3.1142 - accuracy: 0.2174
Epoch 12/100
 - 0s - loss: 3.0417 - accuracy: 0.2174
Epoch 13/100
 - 0s - loss: 2.9309 - accuracy: 0.2174
Epoch 14/100
 - 0s - loss: 2.7995 - accuracy: 0.2174
Epoch 15/100
 - 0s - loss: 2.6572 - accuracy: 0.2174
Epoch 16/100
 - 0s - loss: 2.5307 - accuracy: 0.2174
Epoch 17/100
 - 0s - loss: 2.4216 - accuracy: 0.1739
Epoch 18/100
 - 0s - loss: 2.3247 - accuracy: 0.2174
Epoch 19/100
 - 0s - loss: 2.2147 - accuracy: 0.260

<keras.callbacks.callbacks.History at 0x7fd7e0b9ad68>

In [33]:
model.evaluate(X, y)



[0.07985977828502655, 1.0]

In [34]:
# evaluation
for x in X:
    #pdb.set_trace()
    x= np.reshape( x, (1, seq_length))
    predicted= model.predict_classes(x)
    seq= [int_to_char[el] for el in x[0]]
    output= int_to_char[predicted[0]]
    print(seq, '->', output)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> W
['U', 'V', 'W'] -> X
['V', 'W', 'X'] -> Y
['W', 'X', 'Y'] -> Z


### As we can see, the performance with embedding layer is perfect. Even with less number of epochs, we achieved an accuracy of almost 1.

### Considering an elaborated dataset

In [35]:
class SpecialString:
    """
    This class creates a string with special indexing and slicing, in that out of bond
    indices will return to the beginning of the sequence. For ex. if the length of the 
    sequence is 26, the index 25 is the last character and the index 26 normally gives an 
    error. However, in this special string, it goes back to the beginning of the string 
    and returns the first character again. The same logic with slicing
    """
    def __init__(self, string_):
        self.string= string_
        self.index= 0
    
    def __getitem__(self, key):
        """
        A= ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i' ]
        A[7:8] -> ['h', 'i' ]
        A[7:10] -> ['h', 'i', 'a', 'b']
        """
        
        if isinstance(key, slice):
            start= key.start % len(self.string)
            end= key.stop % len(self.string)
            if end < start:
                result= self.string[start:]
                result += self.string[:end]
                return result
            else:
                return self.string[start:end]
        else:
            index= key % len(self.string)
            return self.string[index]
    
    def __repr__(self):
        return self.string 
    
    def __len__(self):
        return len(self.string)
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.index < len(self.string):
            result= self.string[self.index]
            self.index += 1
            return result
        else:
            self.index= 0
            raise StopIteration

In [36]:
alphabet= SpecialString( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" )
# creat mapping of characters to integers and reverse
char_to_int= dict ( [ (el, i) for i, el in enumerate(alphabet)] )
int_to_char= dict( [ (i, el) for i, el in enumerate(alphabet)] )


dataX= []
dataY= []
for seq_length in range(1, 26):
    for i in range( 0, len(alphabet), 1):
        seq_in= alphabet[i: i+seq_length]
        #pdb.set_trace()
        seq_out= alphabet[i+seq_length]
        dataX.append( [char_to_int[char] for char in seq_in] )
        dataY.append( char_to_int[seq_out] )
        print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z
Z -> A
AB -> C
BC -> D
CD -> E
DE -> F
EF -> G
FG -> H
GH -> I
HI -> J
IJ -> K
JK -> L
KL -> M
LM -> N
MN -> O
NO -> P
OP -> Q
PQ -> R
QR -> S
RS -> T
ST -> U
TU -> V
UV -> W
VW -> X
WX -> Y
XY -> Z
YZ -> A
ZA -> B
ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z
XYZ -> A
YZA -> B
ZAB -> C
ABCD -> E
BCDE -> F
CDEF -> G
DEFG -> H
EFGH -> I
FGHI -> J
GHIJ -> K
HIJK -> L
IJKL -> M
JKLM -> N
KLMN -> O
LMNO -> P
MNOP -> Q
NOPQ -> R
OPQR -> S
PQRS -> T
QRST -> U
RSTU -> V
STUV -> W
TUVW -> X
UVWX -> Y
VWXY -> Z
WXYZ -> A
XYZA -> B
YZAB -> C
ZABC -> D
ABCDE -> F
BCDEF -> G
CDEFG -> H
DEFGH -> I
EFGHI -> J
FGHIJ -> K
GHIJK -> L
HIJKL -> M
IJKLM -> N
JKLMN -> O
KLMNO 

In [37]:
# padding the sequences
from keras.preprocessing import sequence

In [38]:
# for padding we do not use 0 as it has already been reserved for "A".
# So we instead use 26 = len(alphabet) 
# However, let's add it to out dictionaries
int_to_char[26]= ''

seq_length= len(alphabet) - 1
dataX_padded= sequence.pad_sequences(dataX, maxlen= seq_length, value= len(alphabet) )

# change the format of the input X into [samples, timesteps]
X= np.reshape( dataX_padded, ( len(dataX_padded), seq_length) )

# one-hot encode the output as well
y= np_utils.to_categorical(dataY)

print('X shape:', X.shape)
print('y shape:', y.shape)

X shape: (650, 25)
y shape: (650, 26)


In [39]:
# modelling
vocab_size= len(alphabet) + 1 
embedding_vector_length= 4


model= Sequential()
model.add( Embedding(vocab_size, embedding_vector_length, input_length= seq_length) )
model.add( LSTM(32) ) 
model.add( Dense(y.shape[1], activation= 'softmax'))
model.summary()


model.compile(loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['accuracy'])
model.fit(X, y, epochs= 100, batch_size= 10, verbose= 2)

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 25, 4)             108       
_________________________________________________________________
lstm_7 (LSTM)                (None, 32)                4736      
_________________________________________________________________
dense_7 (Dense)              (None, 26)                858       
Total params: 5,702
Trainable params: 5,702
Non-trainable params: 0
_________________________________________________________________


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/100
 - 2s - loss: 3.2528 - accuracy: 0.0754
Epoch 2/100
 - 2s - loss: 3.0283 - accuracy: 0.1354
Epoch 3/100
 - 2s - loss: 2.5554 - accuracy: 0.1569
Epoch 4/100
 - 2s - loss: 2.2808 - accuracy: 0.2400
Epoch 5/100
 - 2s - loss: 2.1129 - accuracy: 0.3446
Epoch 6/100
 - 2s - loss: 1.9658 - accuracy: 0.3862
Epoch 7/100
 - 2s - loss: 1.8553 - accuracy: 0.4262
Epoch 8/100
 - 2s - loss: 1.7257 - accuracy: 0.5292
Epoch 9/100
 - 2s - loss: 1.6098 - accuracy: 0.5754
Epoch 10/100
 - 2s - loss: 1.5379 - accuracy: 0.6677
Epoch 11/100
 - 2s - loss: 1.4326 - accuracy: 0.6846
Epoch 12/100
 - 2s - loss: 1.3502 - accuracy: 0.7677
Epoch 13/100
 - 2s - loss: 1.2713 - accuracy: 0.8062
Epoch 14/100
 - 2s - loss: 1.2049 - accuracy: 0.8400
Epoch 15/100
 - 2s - loss: 1.1417 - accuracy: 0.8462
Epoch 16/100
 - 2s - loss: 1.0734 - accuracy: 0.8769
Epoch 17/100
 - 2s - loss: 1.0166 - accuracy: 0.8954
Epoch 18/100
 - 2s - loss: 0.9661 - accuracy: 0.9062
Epoch 19/100
 - 2s - loss: 0.9215 - accuracy: 0.9169
Ep

<keras.callbacks.callbacks.History at 0x7fd7f04be6a0>

In [40]:
model.evaluate(X, y)



[0.01830563293913236, 1.0]

In [41]:
# evaluation
for x in X:
    #pdb.set_trace()
    x= np.reshape( x, (1, seq_length))
    predicted= model.predict_classes(x)
    seq= [int_to_char[el] for el in x[0]]
    seq= list( ''.join(seq) ) # for removing empty spaces
    output= int_to_char[predicted[0]]
    print(seq, '->', output)

['A'] -> B
['B'] -> C
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> V
['V'] -> W
['W'] -> X
['X'] -> Y
['Y'] -> Z
['Z'] -> A
['A', 'B'] -> C
['B', 'C'] -> D
['C', 'D'] -> E
['D', 'E'] -> F
['E', 'F'] -> G
['F', 'G'] -> H
['G', 'H'] -> I
['H', 'I'] -> J
['I', 'J'] -> K
['J', 'K'] -> L
['K', 'L'] -> M
['L', 'M'] -> N
['M', 'N'] -> O
['N', 'O'] -> P
['O', 'P'] -> Q
['P', 'Q'] -> R
['Q', 'R'] -> S
['R', 'S'] -> T
['S', 'T'] -> U
['T', 'U'] -> V
['U', 'V'] -> W
['V', 'W'] -> X
['W', 'X'] -> Y
['X', 'Y'] -> Z
['Y', 'Z'] -> A
['Z', 'A'] -> B
['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O'

In [42]:
# let's give it some unusual sequences 

sample= list('FGGGHHHHHLLLLMNOPR')
x= [char_to_int[el] for el in sample]
x= np.reshape(x, (1, len(x)))
# pad it
x= sequence.pad_sequences(x, maxlen= 25, value= 26)
output= model.predict_classes(x)
output= int_to_char[output[0]]
print( sample, '->', output)


['F', 'G', 'G', 'G', 'H', 'H', 'H', 'H', 'H', 'L', 'L', 'L', 'L', 'M', 'N', 'O', 'P', 'R'] -> R


### Finally we can see that we built a model which can totally learn te whole alphabet with 100 % accuracy.

### Even when we feed the model with starnge sequences such as the above, its output is quite reasonable. 