<a href="https://colab.research.google.com/github/KBVKarthik/Keras-Natural-Language-Processing/blob/main/Sequence_to_sequence_learning_for_performing_number_addition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np


TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True



MAXLEN = DIGITS + 1 + DIGITS


In [12]:
class CharacterTable:


    def __init__(self, chars):

        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):

        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):

        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[x] for x in x)



chars = "0123456789+ "
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print("Generating data...")
while len(questions) < TRAINING_SIZE:
    f = lambda: int(
        "".join(
            np.random.choice(list("0123456789"))
            for i in range(np.random.randint(1, DIGITS + 1))
        )
    )
    a, b = f(), f()

    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)

    q = "{}+{}".format(a, b)
    query = q + " " * (MAXLEN - len(q))
    ans = str(a + b)
  
    ans += " " * (DIGITS + 1 - len(ans))
    if REVERSE:

        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print("Total questions:", len(questions))


Generating data...
Total questions: 50000


In [13]:
print("Vectorization...")
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)


indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]


split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print("Training Data:")
print(x_train.shape)
print(y_train.shape)

print("Validation Data:")
print(x_val.shape)
print(y_val.shape)


Vectorization...
Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [14]:
print("Build model...")
num_layers = 1  

model = keras.Sequential()

model.add(layers.LSTM(128, input_shape=(MAXLEN, len(chars))))

model.add(layers.RepeatVector(DIGITS + 1))

for _ in range(num_layers):

    model.add(layers.LSTM(128, return_sequences=True))


model.add(layers.Dense(len(chars), activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()


Build model...
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               72192     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 4, 128)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
dense (Dense)                (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [15]:
epochs = 30
batch_size = 32



for epoch in range(1, epochs):
    print()
    print("Iteration", epoch)
    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=1,
        validation_data=(x_val, y_val),
    )

    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = np.argmax(model.predict(rowx), axis=-1)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print("Q", q[::-1] if REVERSE else q, end=" ")
        print("T", correct, end=" ")
        if correct == guess:
            print("☑ " + guess)
        else:
            print("☒ " + guess)


Iteration 1
Q 9+414   T 423  ☒ 144 
Q 35+14   T 49   ☒ 12  
Q 700+765 T 1465 ☒ 1366
Q 973+2   T 975  ☒ 900 
Q 99+412  T 511  ☒ 100 
Q 95+251  T 346  ☒ 510 
Q 513+971 T 1484 ☒ 1566
Q 787+521 T 1308 ☒ 1360
Q 455+2   T 457  ☒ 550 
Q 381+12  T 393  ☒ 390 

Iteration 2
Q 0+784   T 784  ☒ 788 
Q 18+54   T 72   ☒ 88  
Q 401+30  T 431  ☒ 435 
Q 626+12  T 638  ☒ 636 
Q 723+981 T 1704 ☒ 1798
Q 870+439 T 1309 ☒ 1396
Q 9+152   T 161  ☒ 159 
Q 128+917 T 1045 ☒ 1018
Q 552+29  T 581  ☒ 587 
Q 873+3   T 876  ☒ 878 

Iteration 3
Q 7+913   T 920  ☒ 929 
Q 28+45   T 73   ☒ 79  
Q 42+929  T 971  ☒ 970 
Q 29+3    T 32   ☒ 39  
Q 461+9   T 470  ☒ 469 
Q 14+617  T 631  ☒ 638 
Q 143+13  T 156  ☒ 151 
Q 8+204   T 212  ☒ 214 
Q 477+225 T 702  ☒ 701 
Q 190+9   T 199  ☒ 190 

Iteration 4
Q 417+744 T 1161 ☒ 1150
Q 12+84   T 96   ☒ 95  
Q 38+88   T 126  ☒ 123 
Q 120+21  T 141  ☒ 148 
Q 146+6   T 152  ☒ 151 
Q 40+532  T 572  ☒ 578 
Q 486+55  T 541  ☒ 542 
Q 76+12   T 88   ☒ 85  
Q 11+891  T 902  ☑ 902 
Q 44+801  T 