# Sequence to sequence learning for performing number addition

### Setup

In [2]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS


2022-09-27 14:23:50.996216: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


### Generate the data

In [3]:
class CharacterTable:
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """

    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.
        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.
        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[x] for x in x)


# All the numbers, plus sign and space for padding.
chars = "0123456789+ "
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print("Generating data...")
while len(questions) < TRAINING_SIZE:
    f = lambda: int(
        "".join(
            np.random.choice(list("0123456789"))
            for i in range(np.random.randint(1, DIGITS + 1))
        )
    )
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = "{}+{}".format(a, b)
    query = q + " " * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += " " * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print("Total questions:", len(questions))


Generating data...
Total questions: 50000


### Print last 10 questions 

In [6]:
from pprint import pprint
pprint(questions[:10])

['  8+107',
 '   7+38',
 '   3+21',
 '    2+7',
 '  02+48',
 '661+238',
 '  779+9',
 '   5+99',
 ' 41+364',
 '  5+497']


### Vectorize the data

In [7]:
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype = bool)

y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype = bool)

In [13]:
print("dimension of x : ", x.shape)
print("dimension of y : ", y.shape)

dimension of x :  (50000, 7, 12)
dimension of y :  (50000, 4, 12)


### Encode the data

In [14]:
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

### Shuffle x and y

In [15]:
indices = np.arange(len(y))
np.random.shuffle(indices)

x = x[indices]
y = y[indices]

print("dimension of x : ", x.shape)
print("dimension of y : ", y.shape)

dimension of x :  (50000, 7, 12)
dimension of y :  (50000, 4, 12)


In [None]:
# Explicitly set apart 10% for validation data that we never train over.
# split_at = len(x) - len(x) // 10
# (x_train, x_val) = x[:split_at], x[split_at:]
# (y_train, y_val) = y[:split_at], y[split_at:]

### Split data in train and test

In [17]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
                    x,
                    y, 
                    test_size = 0.2, 
                    random_state = 42
)

In [19]:
print("dimension of x_train : ", x_train.shape)
print("dimension of y_train : ", y_train.shape)
print("dimension of x_test : ", x_test.shape)
print("dimension of y_test : ", y_test.shape)

dimension of x_train :  (40000, 7, 12)
dimension of y_train :  (40000, 4, 12)
dimension of x_test :  (10000, 7, 12)
dimension of y_test :  (10000, 4, 12)


### Create model

In [23]:
import tensorflow
num_layers = 1

model = tensorflow.keras.Sequential()
# "Encode" the input sequence using a LSTM, producing an output of size 128.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(layers.LSTM(128, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN can be multiple layers stacked or a single layer.
for _ in range(num_layers):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(layers.LSTM(128, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.Dense(len(chars), activation="softmax"))

NotImplementedError: Cannot convert a symbolic Tensor (lstm_1/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported