In [2]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, SimpleRNN, RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

In [3]:
vocabulary ='0123456789+-*/.'
print('Vocabulary: ', vocabulary)
num_features = len(vocabulary)   
print('Number of features:', num_features)   

Vocabulary:  0123456789+-*/.
Number of features: 15


In [4]:
char_to_index = dict((c, i) for i, c in enumerate(vocabulary))
index_to_char = dict((i, c) for i, c in enumerate(vocabulary))
print('char_to_index : ', char_to_index)
print('index_to_char : ', index_to_char)

char_to_index :  {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '+': 10, '-': 11, '*': 12, '/': 13, '.': 14}
index_to_char :  {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: '+', 11: '-', 12: '*', 13: '/', 14: '.'}


In [5]:
import nltk
rawData = []
for i in range(101):
    for j in range(1,101):
        k = i + j
        l = i - j
        m = round(i / j,3)
        n = i * j
        rawData.append(str(i) + " + " + str(j) + " = " + str(k))
        rawData.append(str(i) + " - " + str(j) + " = " + str(l))
        rawData.append(str(i) + " / " + str(j) + " = " + str(m))
        rawData.append(str(i) + " * " + str(j) + " = " + str(n))

sentence_data = np.array(rawData)
raw_tokens = np.array([nltk.word_tokenize(i) for i in sentence_data])
tokens = np.array([nltk.word_tokenize(i) for i in sentence_data]).flatten()

enum_tokens = dict((c, i) for i, c in enumerate(tokens))

splitData = []
for i in rawData:
    splitData.append(i.split(" ="))

print(splitData)


+ 71', ' 169'], ['98 - 71', ' 27'], ['98 / 71', ' 1.38'], ['98 * 71', ' 6958'], ['98 + 72', ' 170'], ['98 - 72', ' 26'], ['98 / 72', ' 1.361'], ['98 * 72', ' 7056'], ['98 + 73', ' 171'], ['98 - 73', ' 25'], ['98 / 73', ' 1.342'], ['98 * 73', ' 7154'], ['98 + 74', ' 172'], ['98 - 74', ' 24'], ['98 / 74', ' 1.324'], ['98 * 74', ' 7252'], ['98 + 75', ' 173'], ['98 - 75', ' 23'], ['98 / 75', ' 1.307'], ['98 * 75', ' 7350'], ['98 + 76', ' 174'], ['98 - 76', ' 22'], ['98 / 76', ' 1.289'], ['98 * 76', ' 7448'], ['98 + 77', ' 175'], ['98 - 77', ' 21'], ['98 / 77', ' 1.273'], ['98 * 77', ' 7546'], ['98 + 78', ' 176'], ['98 - 78', ' 20'], ['98 / 78', ' 1.256'], ['98 * 78', ' 7644'], ['98 + 79', ' 177'], ['98 - 79', ' 19'], ['98 / 79', ' 1.241'], ['98 * 79', ' 7742'], ['98 + 80', ' 178'], ['98 - 80', ' 18'], ['98 / 80', ' 1.225'], ['98 * 80', ' 7840'], ['98 + 81', ' 179'], ['98 - 81', ' 17'], ['98 / 81', ' 1.21'], ['98 * 81', ' 7938'], ['98 + 82', ' 180'], ['98 - 82', ' 16'], ['98 / 82', ' 1.195'

In [6]:
hidden_units = 128
max_time_steps = 20    # maximum length of input sequence
def rnn_model():
  rnn_model = Sequential()
  rnn_model.add(SimpleRNN(hidden_units, input_shape=(None, num_features)))
  rnn_model.add(RepeatVector(max_time_steps))
  rnn_model.add(SimpleRNN(hidden_units, return_sequences=True))
  rnn_model.add(TimeDistributed(Dense(num_features, activation='softmax')))
  
  return rnn_model

model = rnn_model()
model.summary()
 
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               18432     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 15)             1935      
Total params: 53,263
Trainable params: 53,263
Non-trainable params: 0
_________________________________________________________________


In [7]:

def vectorize_example(expression, result): 
    
    x = np.zeros((max_time_steps, num_features))                   
    y = np.zeros((max_time_steps, num_features))
    
    diff_x = max_time_steps - len(expression)                         
    diff_y = max_time_steps - len(result)                           
    
    for i, c in enumerate(expression):           
        x[diff_x+i, char_to_index[c]] = 1     
    for i in range(diff_x):                   
        x[i, char_to_index['0']] = 1         
   
    for i, c in enumerate(result):
        y[diff_y+i, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
        
    return x, y

In [8]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

In [9]:
def strip_padding(example):
    encountered_non_zero = False
    output = ''
    for c in example:
        if not encountered_non_zero and c == '0':
            continue
        if c == '+' or c == '-' or c=='*' or c=='/' or c=='.':
            encountered_non_zero = False
        else:
            encountered_non_zero = True
        output += c
    return output