# Deep solutions task
### By Koren Gast

In [1]:
import sys
sys.path.append("../src")
import pandas as pd
from utils import get_vocabulary, get_max,\
    get_varsAndEqn_str, pad_and_vectorize, to_wolfram_format
from models.models import EncoderDecoder_model
import load_data


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [17]:
train_data = load_data.load_alldata()
train_data = get_varsAndEqn_str(train_data)
dev_data = pd.read_json("data/dev_data.json")
dev_data = get_varsAndEqn_str(dev_data)
test_data = pd.read_json("data/test_data.json")
test_data = get_varsAndEqn_str(test_data)


In [18]:
train_dev = train_data.append(dev_data, sort=False)

In [19]:
txt_length = int(get_max(train_dev, 'text')*1.5)
var_length = int(get_max(train_dev, 'str_vars')*1.5)
eqn_length = int(get_max(train_dev, 'str_eqn')*2)


In [20]:
txt_vocab = get_vocabulary(train_dev, is_text=True)
eqn_vocab = get_vocabulary(train_dev, is_text=False)



In [21]:

train_data = pad_and_vectorize(train_data, txt_length, var_length,
                               eqn_length, txt_vocab, eqn_vocab)
dev_data = pad_and_vectorize(dev_data, txt_length, var_length,
                             eqn_length, txt_vocab, eqn_vocab)
test_data = pad_and_vectorize(test_data, txt_length, var_length,
                              eqn_length, txt_vocab, eqn_vocab)


In [22]:
input_shape = txt_length
output_shape = 2+var_length+eqn_length
txt_vocab_size = len(txt_vocab)
eqn_vocab_size = len(eqn_vocab)

In [23]:
EPOCHS = 20
BATCH_SIZE = 50
model = EncoderDecoder_model(input_shape=input_shape,
                             output_shape=output_shape,
                             txt_vocab_size=txt_vocab_size,
                             eqn_vocab_size=eqn_vocab_size,
                             var_length=var_length,
                             eqn_vocab=eqn_vocab,
                             epochs=EPOCHS,
                             batch_size=BATCH_SIZE)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 160)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 160, 128)          962688    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               263168    
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 288, 256)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 288, 128)          197120    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 288, 53)           6837      
Total params: 1,429,813
Trainable params: 1,429,813
Non-trainable params: 0
_________________________________________________________________


In [24]:


model.fit(df=train_data, dev_data=dev_data)

Train on 3220 samples, validate on 374 samples
Epoch 1/20


  50/3220 [..............................] - ETA: 7:43 - loss: 3.9658 - acc: 0.0037

 100/3220 [..............................] - ETA: 6:01 - loss: 3.9308 - acc: 0.4376

 150/3220 [>.............................] - ETA: 5:22 - loss: 3.8932 - acc: 0.5933

 200/3220 [>.............................] - ETA: 5:00 - loss: 3.8418 - acc: 0.6698

 250/3220 [=>............................] - ETA: 4:46 - loss: 3.7679 - acc: 0.7157

 300/3220 [=>............................] - ETA: 4:45 - loss: 3.6560 - acc: 0.7462

 350/3220 [==>...........................] - ETA: 4:45 - loss: 3.4798 - acc: 0.7655

 400/3220 [==>...........................] - ETA: 4:39 - loss: 3.2212 - acc: 0.7810

 450/3220 [===>..........................] - ETA: 4:32 - loss: 2.9461 - acc: 0.7946

 500/3220 [===>..........................] - ETA: 4:23 - loss: 2.7302 - acc: 0.8043

 550/3220 [====>.........................] - ETA: 4:15 - loss: 2.5532 - acc: 0.8130

 600/3220 [====>.........................] - ETA: 4:07 - loss: 2.4120 - acc: 0.8197

 650/3220 [=====>........................] - ETA: 4:00 - loss: 2.2929 - acc: 0.8255

 700/3220 [=====>........................] - ETA: 3:54 - loss: 2.1796 - acc: 0.8316

 750/3220 [=====>........................] - ETA: 3:47 - loss: 2.0845 - acc: 0.8364





































































































Epoch 2/20


  50/3220 [..............................] - ETA: 4:35 - loss: 0.6783 - acc: 0.8972

 100/3220 [..............................] - ETA: 4:34 - loss: 0.7119 - acc: 0.8918

 150/3220 [>.............................] - ETA: 4:37 - loss: 0.6761 - acc: 0.8978

 200/3220 [>.............................] - ETA: 4:40 - loss: 0.6491 - acc: 0.9025

 250/3220 [=>............................] - ETA: 4:39 - loss: 0.6658 - acc: 0.8995

 300/3220 [=>............................] - ETA: 4:39 - loss: 0.6599 - acc: 0.9005

 350/3220 [==>...........................] - ETA: 4:40 - loss: 0.6669 - acc: 0.8992

 400/3220 [==>...........................] - ETA: 4:33 - loss: 0.6594 - acc: 0.9004

 450/3220 [===>..........................] - ETA: 4:27 - loss: 0.6534 - acc: 0.9013

 500/3220 [===>..........................] - ETA: 4:20 - loss: 0.6582 - acc: 0.9004

 550/3220 [====>.........................] - ETA: 4:18 - loss: 0.6625 - acc: 0.8997

 600/3220 [====>.........................] - ETA: 4:15 - loss: 0.6659 - acc: 0.8991

 650/3220 [=====>........................] - ETA: 4:10 - loss: 0.6639 - acc: 0.8995

 700/3220 [=====>........................] - ETA: 4:04 - loss: 0.6629 - acc: 0.8995

 750/3220 [=====>........................] - ETA: 4:03 - loss: 0.6663 - acc: 0.8990





































































































Epoch 3/20


  50/3220 [..............................] - ETA: 4:34 - loss: 0.6647 - acc: 0.8953

 100/3220 [..............................] - ETA: 4:28 - loss: 0.6485 - acc: 0.8989

 150/3220 [>.............................] - ETA: 4:26 - loss: 0.6312 - acc: 0.9023

 200/3220 [>.............................] - ETA: 4:34 - loss: 0.6241 - acc: 0.9033

 250/3220 [=>............................] - ETA: 4:31 - loss: 0.6331 - acc: 0.9021

 300/3220 [=>............................] - ETA: 4:27 - loss: 0.6253 - acc: 0.9036

 350/3220 [==>...........................] - ETA: 4:26 - loss: 0.6290 - acc: 0.9030

 400/3220 [==>...........................] - ETA: 4:23 - loss: 0.6168 - acc: 0.9050

 450/3220 [===>..........................] - ETA: 4:20 - loss: 0.6195 - acc: 0.9045

 500/3220 [===>..........................] - ETA: 4:15 - loss: 0.6201 - acc: 0.9045

 550/3220 [====>.........................] - ETA: 4:11 - loss: 0.6259 - acc: 0.9035

 600/3220 [====>.........................] - ETA: 4:09 - loss: 0.6260 - acc: 0.9034

 650/3220 [=====>........................] - ETA: 4:05 - loss: 0.6273 - acc: 0.9031

 700/3220 [=====>........................] - ETA: 4:02 - loss: 0.6350 - acc: 0.9018

 750/3220 [=====>........................] - ETA: 4:04 - loss: 0.6415 - acc: 0.9006







In [67]:
preds = model.predict(test_data)
print(preds.shape)

(1504, 2)


In [68]:
preds['equations'] = preds.apply(to_wolfram_format, axis=1)

In [69]:
comparison = pd.DataFrame({
    'real_equations': test_data['equations'],
    'predicted_equations': preds['equations']})

In [70]:
print(comparison.head(10))

                                      real_equations predicted_equations
0            [unkn: x,y, equ: x-y=9, equ: 5*y=3*x+7]     [unkn: , equ: ]
1            [unkn: x,y, equ: x-y=3, equ: 4*y-3*x=7]     [unkn: , equ: ]
2  [unkn: x,y, equ: x = y + 9, equ: 7*y - 2*x = 157]     [unkn: , equ: ]
3   [unkn: x,y, equ: x-y=(-38), equ: 2*x-5*y=(-217)]     [unkn: , equ: ]
4  [unkn: x, y, equ: x = y - 4, equ: 2*x - 5*y = ...     [unkn: , equ: ]
5               [unkn: x,y, equ: x+y=18, equ: x-y=4]     [unkn: , equ: ]
6               [unkn: x,y, equ: x+y=22, equ: x-y=4]     [unkn: , equ: ]
7              [unkn: x,y, equ: x-y=10, equ: x+y=34]     [unkn: , equ: ]
8              [unkn: x,y, equ: x+y=38, equ: x-y=12]     [unkn: , equ: ]
9               [unkn: x,y, equ: x+y=39, equ: x-y=7]     [unkn: , equ: ]
