In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
CHARS = "0123456789+ "

2023-09-17 08:20:55.563663: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-17 08:20:55.737902: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-17 08:20:55.738423: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class CharacterTable:
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """

    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.
        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.
        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[x] for x in x)


def get_data():        
    ctable = CharacterTable(CHARS)
    questions = []
    expected = []
    seen = set()
    print("Generating data...")
    while len(questions) < TRAINING_SIZE:
        f = lambda: int(
            "".join(
                np.random.choice(list("0123456789"))
                for i in range(np.random.randint(1, DIGITS + 1))
            )
        )
        a, b = f(), f()
        # Skip any addition questions we've already seen
        # Also skip any such that x+Y == Y+x (hence the sorting).
        key = tuple(sorted((a, b)))
        if key in seen:
            continue
        seen.add(key)
        # Pad the data with spaces such that it is always MAXLEN.
        q = "{}+{}".format(a, b)
        query = q + " " * (MAXLEN - len(q))
        ans = str(a + b)
        # Answers can be of maximum size DIGITS + 1.
        ans += " " * (DIGITS + 1 - len(ans))
        if REVERSE:
            # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
            # space used for padding.)
            query = query[::-1]
        questions.append(query)
        expected.append(ans)

    return questions, expected, ctable

In [3]:
questions, expected, ctable = get_data()
print("Total questions:", len(questions))

Generating data...
Total questions: 50000


In [4]:
print("Vectorization...")
x = np.zeros((len(questions), MAXLEN, len(CHARS)), dtype=np.bool_)
y = np.zeros((len(questions), DIGITS + 1, len(CHARS)), dtype=np.bool_)

for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

# Shuffle (x, y) in unison as the later parts of x will almost all be larger digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print("Training Data:")
print(x_train.shape)
print(y_train.shape)

print("Validation Data:")
print(x_val.shape)
print(y_val.shape)

Vectorization...
Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [2]:
num_layers = 2

model = keras.Sequential()
model.add(layers.LSTM(128, input_shape=(MAXLEN, len(CHARS))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(num_layers):
    model.add(layers.LSTM(128, return_sequences=True))
model.add(layers.Dense(len(CHARS), activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

NameError: name 'keras' is not defined

In [7]:
epochs = 10
batch_size = 32

for epoch in range(1, epochs):
    print()
    print("Iteration", epoch)
    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=1,
        validation_data=(x_val, y_val),
    )
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = np.argmax(model.predict(rowx), axis=-1)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print("Q", q[::-1] if REVERSE else q, end=" ")
        print("T", correct, end=" ")
        if correct == guess:
            print("OK " + guess)
        else:
            print(":( " + guess)



Iteration 1
Q 8+999   T 507  :( 476 
Q 75+708  T 429  :( 496 
Q 338+65  T 370  :( 456 
Q 97+411  T 302  :( 496 
Q 61+11   T 66   :( 10  
Q 225+235 T 342  :( 456 
Q 30+461  T 260  :( 290 
Q 349+484 T 591  :( 676 
Q 710+2   T 711  :( 776 
Q 592+504 T 844  :( 906 

Iteration 2
Q 620+40  T 640  :( 642 
Q 33+87   T 76   :( 70  
Q 24+63   T 55   :( 61  
Q 264+698 T 613  :( 577 
Q 6+587   T 299  :( 201 
Q 39+604  T 341  :( 331 
Q 78+381  T 268  :( 281 
Q 6+46    T 29   :( 11  
Q 67+980  T 557  :( 575 
Q 65+476  T 303  :( 391 

Iteration 3
Q 65+740  T 435  :( 433 
Q 815+60  T 845  :( 841 
Q 750+69  T 784  :( 781 
Q 799+714 T 1156 :( 1191
Q 346+63  T 377  :( 371 
Q 522+3   T 523  :( 524 
Q 713+94  T 760  :( 763 
Q 313+0   T 313  OK 313 
Q 44+753  T 420  :( 433 
Q 13+87   T 56   :( 54  

Iteration 4
Q 4+144   T 76   :( 67  
Q 111+83  T 152  :( 147 
Q 583+932 T 1049 :( 1058
Q 8+491   T 253  :( 205 
Q 316+1   T 316  :( 314 
Q 799+714 T 1156 :( 1172
Q 913+3   T 914  :( 910 
Q 95+36   T 113  :( 104

In [13]:
x = np.zeros((1, MAXLEN, len(CHARS)), dtype=np.bool_)
xen = ctable.encode('  22+66', MAXLEN)

len(xen)
for i in range(len(xen)):
    row = xen[i]
    for r in range(len(row)):
        x[0][i][r] = row[r] > 0

result = np.argmax(model.predict(x), axis=-1)
guess = ctable.decode(result[0], calc_argmax=False)

print(guess)

73  
