In [38]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
CHARS = "0123456789+ "

In [39]:
class CharacterTable:
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """

    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.
        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.
        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[x] for x in x)


def get_data():        
    ctable = CharacterTable(CHARS)
    questions = []
    expected = []
    seen = set()
    print("Generating data...")
    while len(questions) < TRAINING_SIZE:
        f = lambda: int(
            "".join(
                np.random.choice(list("0123456789"))
                for i in range(np.random.randint(1, DIGITS + 1))
            )
        )
        a, b = f(), f()
        # Skip any addition questions we've already seen
        # Also skip any such that x+Y == Y+x (hence the sorting).
        key = tuple(sorted((a, b)))
        if key in seen:
            continue
        seen.add(key)
        # Pad the data with spaces such that it is always MAXLEN.
        q = "{}+{}".format(a, b)
        query = q + " " * (MAXLEN - len(q))
        ans = str(a + b)
        # Answers can be of maximum size DIGITS + 1.
        ans += " " * (DIGITS + 1 - len(ans))
        if REVERSE:
            # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
            # space used for padding.)
            query = query[::-1]
        questions.append(query)
        expected.append(ans)

    return questions, expected, ctable

In [40]:
questions, expected, ctable = get_data()
print("Total questions:", len(questions))

Generating data...
Total questions: 50000


In [41]:
print("Vectorization...")
x = np.zeros((len(questions), MAXLEN, len(CHARS)), dtype=np.bool_)
y = np.zeros((len(questions), DIGITS + 1, len(CHARS)), dtype=np.bool_)

for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

# Shuffle (x, y) in unison as the later parts of x will almost all be larger digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print("Training Data:")
print(x_train.shape)
print(y_train.shape)

print("Validation Data:")
print(x_val.shape)
print(y_val.shape)

Vectorization...
Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [42]:
num_layers = 1  

model = keras.Sequential()
model.add(layers.LSTM(128, input_shape=(MAXLEN, len(CHARS))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(num_layers):
    model.add(layers.LSTM(128, return_sequences=True))
model.add(layers.Dense(len(CHARS), activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 128)               72192     
                                                                 
 repeat_vector_4 (RepeatVec  (None, 4, 128)            0         
 tor)                                                            
                                                                 
 lstm_9 (LSTM)               (None, 4, 128)            131584    
                                                                 
 dense_4 (Dense)             (None, 4, 12)             1548      
                                                                 
Total params: 205324 (802.05 KB)
Trainable params: 205324 (802.05 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [43]:
epochs = 50
batch_size = 32

for epoch in range(1, epochs):
    print()
    print("Iteration", epoch)
    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=1,
        validation_data=(x_val, y_val),
    )
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = np.argmax(model.predict(rowx), axis=-1)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print("Q", q[::-1] if REVERSE else q, end=" ")
        print("T", correct, end=" ")
        if correct == guess:
            print("☑ " + guess)
        else:
            print("☒ " + guess)



Iteration 1
Q 612+604 T 1216 ☒ 1133
Q 184+94  T 278  ☒ 496 
Q 295+71  T 366  ☒ 223 
Q 855+675 T 1530 ☒ 1503
Q 6+558   T 564  ☒ 656 
Q 713+435 T 1148 ☒ 1133
Q 81+3    T 84   ☒ 11  
Q 650+58  T 708  ☒ 662 
Q 821+123 T 944  ☒ 102 
Q 31+24   T 55   ☒ 12  

Iteration 2
Q 848+87  T 935  ☒ 931 
Q 22+406  T 428  ☒ 459 
Q 2+838   T 840  ☒ 838 
Q 4+465   T 469  ☑ 469 
Q 524+957 T 1481 ☒ 1539
Q 581+178 T 759  ☒ 668 
Q 93+522  T 615  ☒ 597 
Q 97+49   T 146  ☒ 138 
Q 981+7   T 988  ☒ 999 
Q 46+21   T 67   ☒ 50  

Iteration 3
Q 94+11   T 105  ☒ 10  
Q 83+65   T 148  ☒ 144 
Q 423+683 T 1106 ☒ 1118
Q 29+485  T 514  ☒ 518 
Q 76+719  T 795  ☒ 806 
Q 67+38   T 105  ☒ 10  
Q 811+79  T 890  ☒ 896 
Q 477+54  T 531  ☒ 538 
Q 3+505   T 508  ☒ 504 
Q 634+640 T 1274 ☒ 1276

Iteration 4
Q 42+367  T 409  ☒ 404 
Q 984+2   T 986  ☑ 986 
Q 795+836 T 1631 ☒ 1620
Q 64+112  T 176  ☒ 174 
Q 63+162  T 225  ☒ 222 
Q 54+520  T 574  ☒ 572 
Q 635+35  T 670  ☒ 677 
Q 30+712  T 742  ☒ 747 
Q 603+32  T 635  ☒ 636 
Q 26+917  T 

In [89]:
x = np.zeros((1, MAXLEN, len(CHARS)), dtype=np.bool_)
xen = ctable.encode('   22+1', MAXLEN)

len(xen)
for i in range(len(xen)):
    row = xen[i]
    for r in range(len(row)):
        x[0][i][r] = row[r] > 0

result = np.argmax(model.predict(x), axis=-1)
guess = ctable.decode(result[0], calc_argmax=False)

print(guess)

[[4 5 0 0]]
23  
