<a href="https://colab.research.google.com/github/AliAkbarAhmadiDaryab/natural-language-processing/blob/master/week4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import random


In [0]:
def generate_equations(allowed_operators, dataset_size, min_value, max_value):
  sample=[]
  for _ in range(dataset_size):
    first = random.randint(min_value, max_value)
    second = random.randint(min_value, max_value)
    operator_index = random.randint(0,1)
    equation = str(first) + allowed_operators[operator_index] + str(second)
    solution = first + second if operator_index == 0 else first - second
    solution = str(solution)
    sample.append((equation, solution))
  return sample
    
    
    

In [0]:
def test_generate_equations():
    allowed_operators = ['+', '-']
    dataset_size = 10
    for (input_, output_) in generate_equations(allowed_operators, dataset_size, 0, 100):
        if not (type(input_) is str and type(output_) is str):
            return "Both parts should be strings."
        if eval(input_) != int(output_):
            return "The (equation: {!r}, solution: {!r}) pair is incorrect.".format(input_, output_)
    return "Tests passed."

In [5]:
print(test_generate_equations())

Tests passed.


In [0]:
from sklearn.model_selection import train_test_split


In [0]:
allowed_operators = ['+', '-']
dataset_size = 100000
data = generate_equations(allowed_operators, dataset_size, min_value=0, max_value=9999)

train_set, test_set = train_test_split(data, test_size=0.2, random_state=42)

In [0]:
word2id = {symbol:i for i, symbol in enumerate('#^$+-1234567890')}
id2word = {i:symbol for symbol, i in word2id.items()}

In [0]:
start_symbol = '^'
end_symbol = '$'
padding_symbol = '#'

In [0]:
import numpy as np

In [0]:
def sentence_to_ids(sentence, word2id, padded_len):
    """ Converts a sequence of symbols to a padded sequence of their ids.
    
      sentence: a string, input/output sequence of symbols.
      word2id: a dict, a mapping from original symbols to ids.
      padded_len: an integer, a desirable length of the sequence.

      result: a tuple of (a list of ids, an actual length of sentence).
    """
    sent_ids = [word2id[padding_symbol]  for i in range(padded_len)] #np.full(padded_len, )
    for i in range(padded_len):
      if i >= len(sentence):
        break
      else:
        sent_ids[i] = word2id[sentence[i]]
        
    if len(sentence) >= padded_len:
      sent_ids[padded_len-1] = word2id[end_symbol]
      sent_len = padded_len
    else:
      sent_ids[len(sentence)] = word2id[end_symbol]
      sent_len = len(sentence)+1
    return sent_ids, sent_len

In [0]:
def test_sentence_to_ids():
    sentences = [("123+123", 7), ("123+123", 8), ("123+123", 10)]
    expected_output = [([5, 6, 7, 3, 5, 6, 2], 7), 
                       ([5, 6, 7, 3, 5, 6, 7, 2], 8), 
                       ([5, 6, 7, 3, 5, 6, 7, 2, 0, 0], 8)] 
    for (sentence, padded_len), (sentence_ids, expected_length) in zip(sentences, expected_output):
        output, length = sentence_to_ids(sentence, word2id, padded_len)
        if output != sentence_ids:
            return("Convertion of '{}' for padded_len={} to {} is incorrect.".format(
                sentence, padded_len, output))
        if length != expected_length:
            return("Convertion of '{}' for padded_len={} has incorrect actual length {}.".format(
                sentence, padded_len, length))
    return("Tests passed.")

In [14]:
print(test_sentence_to_ids())

Tests passed.


In [0]:
def ids_to_sentence(ids, id2word):
    """ Converts a sequence of ids to a sequence of symbols.
    
          ids: a list, indices for the padded sequence.
          id2word:  a dict, a mapping from ids to original symbols.

          result: a list of symbols.
    """
 
    return [id2word[i] for i in ids]

In [0]:
def batch_to_ids(sentences, word2id, max_len):
    """Prepares batches of indices. 
    
       Sequences are padded to match the longest sequence in the batch,
       if it's longer than max_len, then max_len is used instead.

        sentences: a list of strings, original sequences.
        word2id: a dict, a mapping from original symbols to ids.
        max_len: an integer, max len of sequences allowed.

        result: a list of lists of ids, a list of actual lengths.
    """
    
    max_len_in_batch = min(max(len(s) for s in sentences) + 1, max_len)
    batch_ids, batch_ids_len = [], []
    for sentence in sentences:
        ids, ids_len = sentence_to_ids(sentence, word2id, max_len_in_batch)
        batch_ids.append(ids)
        batch_ids_len.append(ids_len)
    return batch_ids, batch_ids_len

In [0]:
def generate_batches(samples, batch_size=64):
    X, Y = [], []
    for i, (x, y) in enumerate(samples, 1):
        X.append(x)
        Y.append(y)
        if i % batch_size == 0:
            yield X, Y
            X, Y = [], []
    if X and Y:
        yield X, Y

In [18]:
sentences = train_set[0]
ids, sent_lens = batch_to_ids(sentences, word2id, max_len=10)
print('Input:', sentences)
print('Ids: {}\nSentences lengths: {}'.format(ids, sent_lens))

Input: ('3928+6016', '9944')
Ids: [[7, 13, 6, 12, 3, 10, 14, 5, 10, 2], [13, 13, 8, 8, 2, 0, 0, 0, 0, 0]]
Sentences lengths: [10, 5]


In [0]:
import tensorflow as tf

In [20]:
print (tf.__version__)

1.13.1


In [0]:
class Seq2SeqModel (object):
  pass


In [0]:
def declare_placeholders(self):
    """Specifies placeholders for the model."""
    
    # Placeholders for input and its actual lengths.
    self.input_batch = tf.placeholder(shape=(None, None), dtype=tf.int32, name='input_batch')
    self.input_batch_lengths = tf.placeholder(shape=(None, ), dtype=tf.int32, name='input_batch_lengths')
    
    # Placeholders for groundtruth and its actual lengths.
    self.ground_truth = tf.placeholder(shape =(None, None), dtype= tf.int32, name= 'ground_truth')
    self.ground_truth_lengths = tf.placeholder(shape=(None,), dtype=tf.int32, name= 'ground_truth_lengths')
        
    self.dropout_ph = tf.placeholder_with_default(tf.cast(1.0, tf.float32), shape=[])
    self.learning_rate_ph = tf.placeholder(shape=[],dtype= tf.float32)

In [0]:
Seq2SeqModel.__declare_placeholders = classmethod(declare_placeholders)

In [0]:

def create_embeddings(self, vocab_size, embeddings_size):
    """Specifies embeddings layer and embeds an input batch."""
     
    random_initializer = tf.random_uniform((vocab_size, embeddings_size), -1.0, 1.0)
    self.embeddings = tf.Variable(random_initializer, dtype= tf.float32, name= 'embeddings')
    
    # Perform embeddings lookup for self.input_batch. 
    self.input_batch_embedded = tf.nn.embedding_lookup(self.embeddings,self.input_batch)

In [0]:
Seq2SeqModel.__create_embeddings = classmethod(create_embeddings)

In [0]:
def build_encoder(self, hidden_size):
    """Specifies encoder architecture and computes its output."""
    
    # Create GRUCell with dropout.
    encoder_cell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.GRUCell(hidden_size),input_keep_prob= self.dropout_ph, output_keep_prob = self.dropout_ph, state_keep_prob= self.dropout_ph)
    
    # Create RNN with the predefined cell.
    _, self.final_encoder_state = tf.nn.dynamic_rnn(encoder_cell,self.input_batch_embedded, sequence_length=  self.input_batch_lengths, dtype= tf.float32 )
    
    

In [0]:
Seq2SeqModel.__build_encoder= classmethod(build_encoder)


In [0]:

def build_decoder(self, hidden_size, vocab_size, max_iter, start_symbol_id, end_symbol_id):
    """Specifies decoder architecture and computes the output.
    
        Uses different helpers:
          - for train: feeding ground truth
          - for inference: feeding generated output

        As a result, self.train_outputs and self.infer_outputs are created. 
        Each of them contains two fields:
          rnn_output (predicted logits)
          sample_id (predictions).

    """
    
    # Use start symbols as the decoder inputs at the first time step.
    batch_size = tf.shape(self.input_batch)[0]
    start_tokens = tf.fill([batch_size], start_symbol_id)
    ground_truth_as_input = tf.concat([tf.expand_dims(start_tokens, 1), self.ground_truth], 1)
    
    # Use the embedding layer defined before to lookup embedings for ground_truth_as_input. 
    self.ground_truth_embedded = tf.nn.embedding_lookup(self.embeddings,ground_truth_as_input)
     
    # Create TrainingHelper for the train stage.
    train_helper = tf.contrib.seq2seq.TrainingHelper(self.ground_truth_embedded, 
                                                     self.ground_truth_lengths)
    
    # Create GreedyEmbeddingHelper for the inference stage.
    # You should provide the embedding layer, start_tokens and index of the end symbol.
    infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embeddings,start_tokens= start_tokens,end_token=end_symbol_id)
    
  
    def decode(helper, scope, reuse=None):
        """Creates decoder and return the results of the decoding with a given helper."""
        
        with tf.variable_scope(scope, reuse=reuse):
            # Create GRUCell with dropout. Do not forget to set the reuse flag properly.
            decoder_cell = tf.contrib.rnn.DropoutWrapper(tf.nn.rnn_cell.GRUCell(hidden_size,reuse = reuse), self.dropout_ph)
            
            # Create a projection wrapper.
            decoder_cell = tf.contrib.rnn.OutputProjectionWrapper(decoder_cell, vocab_size, reuse=reuse)
            
            # Create BasicDecoder, pass the defined cell, a helper, and initial state.
            # The initial state should be equal to the final state of the encoder!
            decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper=helper, initial_state=self.final_encoder_state)
            
            # The first returning argument of dynamic_decode contains two fields:
            #   rnn_output (predicted logits)
            #   sample_id (predictions)
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder, maximum_iterations=max_iter, 
                                                              output_time_major=False, impute_finished=True)

            return outputs
        
    self.train_outputs = decode(train_helper, 'decode')
    self.infer_outputs = decode(infer_helper, 'decode', reuse=True)

In [0]:
Seq2SeqModel.__build_decoder = classmethod(build_decoder)

In [0]:
def compute_loss(self):
    """Computes sequence loss (masked cross-entopy loss with logits)."""
    
    weights = tf.cast(tf.sequence_mask(self.ground_truth_lengths), dtype=tf.float32)
    
    self.loss = tf.contrib.seq2seq.sequence_loss( logits =self.train_outputs.rnn_output, targets= self.ground_truth, weights = weights )

In [0]:
Seq2SeqModel.__compute_loss = classmethod(compute_loss)

In [0]:
def perform_optimization(self):
    """Specifies train_op that optimizes self.loss."""
    
    self.train_op = tf.contrib.layers.optimize_loss( self.loss, global_step= tf.train.get_global_step(), learning_rate= self.learning_rate_ph, optimizer= 'Adam', clip_gradients = 1.0   )

In [0]:
Seq2SeqModel.__perform_optimization = classmethod(perform_optimization)

In [0]:
def init_model(self, vocab_size, embeddings_size, hidden_size, 
               max_iter, start_symbol_id, end_symbol_id, padding_symbol_id):
    
    self.__declare_placeholders()
    self.__create_embeddings(vocab_size, embeddings_size)
    self.__build_encoder(hidden_size)
    self.__build_decoder(hidden_size, vocab_size, max_iter, start_symbol_id, end_symbol_id)
    
    # Compute loss and back-propagate.
    self.__compute_loss()
    self.__perform_optimization()
    
    # Get predictions for evaluation.
    self.train_predictions = self.train_outputs.sample_id
    self.infer_predictions = self.infer_outputs.sample_id

In [0]:
Seq2SeqModel.__init__ = classmethod(init_model)

In [0]:
def train_on_batch(self, session, X, X_seq_len, Y, Y_seq_len, learning_rate, dropout_keep_probability):
    feed_dict = {
            self.input_batch: X,
            self.input_batch_lengths: X_seq_len,
            self.ground_truth: Y,
            self.ground_truth_lengths: Y_seq_len,
            self.learning_rate_ph: learning_rate,
            self.dropout_ph: dropout_keep_probability
        }
    pred, loss, _ = session.run([
            self.train_predictions,
            self.loss,
            self.train_op], feed_dict=feed_dict)
    return pred, loss

In [0]:
Seq2SeqModel.train_on_batch = classmethod(train_on_batch)

In [0]:
def predict_for_batch(self, session, X, X_seq_len):
    feed_dict = {self.input_batch: X, self.input_batch_lengths: X_seq_len}
    pred = session.run([
            self.infer_predictions
        ], feed_dict=feed_dict)[0]
    return pred

def predict_for_batch_with_loss(self, session, X, X_seq_len, Y, Y_seq_len):
    feed_dict = {self.input_batch: X, self.input_batch_lengths: X_seq_len, self.ground_truth: Y,
            self.ground_truth_lengths: Y_seq_len }
    pred, loss = session.run([
            self.infer_predictions,
            self.loss,
        ], feed_dict=feed_dict)
    return pred, loss

In [0]:

Seq2SeqModel.predict_for_batch = classmethod(predict_for_batch)
Seq2SeqModel.predict_for_batch_with_loss = classmethod(predict_for_batch_with_loss)

In [39]:
tf.reset_default_graph()

model = Seq2SeqModel(len(word2id), 20, 512, 7, word2id['^'], word2id['$'], word2id['#'])

batch_size = 128
n_epochs = 10
learning_rate = 0.001
dropout_keep_probability = 0.5
max_len = 20

n_step = int(len(train_set) / batch_size)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [40]:

session = tf.Session()
session.run(tf.global_variables_initializer())
            
invalid_number_prediction_counts = []
all_model_predictions = []
all_ground_truth = []

print('Start training... \n')
for epoch in range(n_epochs):  
    random.shuffle(train_set)
    random.shuffle(test_set)
    
    #clear_output()
    print('Train: Epoch', epoch + 1)
    for n_iter, (X_batch, Y_batch) in enumerate(generate_batches(train_set, batch_size=batch_size)):
        
        x, x_len = batch_to_ids(X_batch, word2id, max_len=max_len)
        y, y_len = batch_to_ids(Y_batch, word2id, max_len=max_len)
        
        # prepare the data (X_batch and Y_batch) for training
        # using function batch_to_ids
        predictions, loss = model.train_on_batch(session=session,
                                                 X=x, Y=y,
                                                 X_seq_len=x_len,
                                                 Y_seq_len=y_len,
                                                 learning_rate=learning_rate,
                                                 dropout_keep_probability=dropout_keep_probability)
        
        
        if n_iter % 200 == 0:
            print("Epoch: [%d/%d], step: [%d/%d], Loss: %f" % (epoch + 1, n_epochs, n_iter + 1, n_step, loss))
                
    X_sent, Y_sent = next(generate_batches(test_set, batch_size=batch_size))
    
    X, X_sent_lens = batch_to_ids(X_sent, word2id, max_len=max_len)
    Y, Y_sent_lens = batch_to_ids(Y_sent, word2id, max_len=max_len)
    
    # prepare test data (X_sent and Y_sent) for predicting 
    # quality and computing value of the loss function
    # using function batch_to_ids
    
    predictions, loss = model.predict_for_batch_with_loss(session, X, X_sent_lens, Y, Y_sent_lens)
    
    print('\nTest: Epoch', epoch + 1, 'Loss:', loss,)
    for x, y, p  in list(zip(X, Y, predictions))[:3]:
        print('X:',''.join(ids_to_sentence(x, id2word)))
        print('Y:',''.join(ids_to_sentence(y, id2word)))
        print('O:',''.join(ids_to_sentence(p, id2word)))
        print('')

    model_predictions = []
    ground_truth = []
    invalid_number_prediction_count = 0
    
    # For the whole test set calculate ground-truth values (as integer numbers)
    # and prediction values (also as integers) to calculate metrics.
    # If generated by model number is not correct (e.g. '1-1'), 
    # increase invalid_number_prediction_count and don't append this and corresponding
    # ground-truth value to the arrays.
    
    for X_batch, Y_batch in generate_batches(test_set, batch_size=batch_size):
        
        X, X_len = batch_to_ids(X_batch, word2id, max_len=max_len)
        Y, Y_len = batch_to_ids(Y_batch, word2id, max_len=max_len)
        predictions = model.predict_for_batch(session, X, X_len)
        
        for Y_true, Y_pred in zip(Y, predictions):
            
            try:
                end_token = '$'
                Y_true, Y_pred = ''.join(ids_to_sentence(Y_true, id2word)), ''.join(ids_to_sentence(Y_pred, id2word))
                Y_true, Y_pred = Y_true[:Y_true.find(end_token)], Y_pred[:Y_pred.find(end_token)]
                model_predictions.append(int(Y_pred))
                ground_truth.append(int(Y_true))
                
            except:
                invalid_number_prediction_count = invalid_number_prediction_count + 1
    
    # Final results
    all_model_predictions.append(model_predictions)
    all_ground_truth.append(ground_truth)
    invalid_number_prediction_counts.append(invalid_number_prediction_count)
            
print('\n...training finished.')

Start training... 

Train: Epoch 1
Epoch: [1/10], step: [1/625], Loss: 2.736478
Epoch: [1/10], step: [201/625], Loss: 1.904754
Epoch: [1/10], step: [401/625], Loss: 1.823560
Epoch: [1/10], step: [601/625], Loss: 1.789265

Test: Epoch 1 Loss: 1.7626752
X: 5531+573$#
Y: 6104$#
O: 10149$

X: 9251-40$##
Y: 9211$#
O: 3398$#

X: 9472+4728$
Y: 14200$
O: 13149$

Train: Epoch 2
Epoch: [2/10], step: [1/625], Loss: 1.788871
Epoch: [2/10], step: [201/625], Loss: 1.731059
Epoch: [2/10], step: [401/625], Loss: 1.707521
Epoch: [2/10], step: [601/625], Loss: 1.689400

Test: Epoch 2 Loss: 1.6315792
X: 9273-8435$
Y: 838$##
O: 186$##

X: 1429+4271$
Y: 5700$#
O: 7066$#

X: 7133+9039$
Y: 16172$
O: 16666$

Train: Epoch 3
Epoch: [3/10], step: [1/625], Loss: 1.649334
Epoch: [3/10], step: [201/625], Loss: 1.670164
Epoch: [3/10], step: [401/625], Loss: 1.635076
Epoch: [3/10], step: [601/625], Loss: 1.577487

Test: Epoch 3 Loss: 1.5412343
X: 6366+2210$
Y: 8576$#
O: 8800$#

X: 621-8714$#
Y: -8093$
O: -7000$

X: 7

In [0]:
from sklearn.metrics import mean_absolute_error

In [42]:

for i, (gts, predictions, invalid_number_prediction_count) in enumerate(zip(all_ground_truth,
                                                                            all_model_predictions,
                                                                            invalid_number_prediction_counts), 1):
    mae = mean_absolute_error(gts, predictions)
    print("Epoch: %i, MAE: %f, Invalid numbers: %i" % (i, mae, invalid_number_prediction_count))

Epoch: 1, MAE: 2518.912200, Invalid numbers: 0
Epoch: 2, MAE: 1094.060500, Invalid numbers: 0
Epoch: 3, MAE: 644.395350, Invalid numbers: 0
Epoch: 4, MAE: 527.084500, Invalid numbers: 0
Epoch: 5, MAE: 478.802150, Invalid numbers: 0
Epoch: 6, MAE: 519.953850, Invalid numbers: 0
Epoch: 7, MAE: 427.954800, Invalid numbers: 0
Epoch: 8, MAE: 354.241200, Invalid numbers: 0
Epoch: 9, MAE: 428.570200, Invalid numbers: 0
Epoch: 10, MAE: 331.852650, Invalid numbers: 0


In [40]:
tf.reset_default_graph()

model = Seq2SeqModel(len(word2id), 20, 512, 7, word2id['^'], word2id['$'], word2id['#'])

batch_size = 128
n_epochs = 20
learning_rate = 0.001
dropout_keep_probability = 0.7
max_len = 20

n_step = int(len(train_set) / batch_size)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [41]:

session = tf.Session()
session.run(tf.global_variables_initializer())
            
invalid_number_prediction_counts = []
all_model_predictions = []
all_ground_truth = []

print('Start training... \n')
for epoch in range(n_epochs):  
    random.shuffle(train_set)
    random.shuffle(test_set)
    
    #clear_output()
    print('Train: Epoch', epoch + 1)
    for n_iter, (X_batch, Y_batch) in enumerate(generate_batches(train_set, batch_size=batch_size)):
        
        x, x_len = batch_to_ids(X_batch, word2id, max_len=max_len)
        y, y_len = batch_to_ids(Y_batch, word2id, max_len=max_len)
        
        # prepare the data (X_batch and Y_batch) for training
        # using function batch_to_ids
        predictions, loss = model.train_on_batch(session=session,
                                                 X=x, Y=y,
                                                 X_seq_len=x_len,
                                                 Y_seq_len=y_len,
                                                 learning_rate=learning_rate,
                                                 dropout_keep_probability=dropout_keep_probability)
        
        
        if n_iter % 200 == 0:
            print("Epoch: [%d/%d], step: [%d/%d], Loss: %f" % (epoch + 1, n_epochs, n_iter + 1, n_step, loss))
                
    X_sent, Y_sent = next(generate_batches(test_set, batch_size=batch_size))
    
    X, X_sent_lens = batch_to_ids(X_sent, word2id, max_len=max_len)
    Y, Y_sent_lens = batch_to_ids(Y_sent, word2id, max_len=max_len)
    
    # prepare test data (X_sent and Y_sent) for predicting 
    # quality and computing value of the loss function
    # using function batch_to_ids
    
    predictions, loss = model.predict_for_batch_with_loss(session, X, X_sent_lens, Y, Y_sent_lens)
    
    print('\nTest: Epoch', epoch + 1, 'Loss:', loss,)
    for x, y, p  in list(zip(X, Y, predictions))[:3]:
        print('X:',''.join(ids_to_sentence(x, id2word)))
        print('Y:',''.join(ids_to_sentence(y, id2word)))
        print('O:',''.join(ids_to_sentence(p, id2word)))
        print('')

    model_predictions = []
    ground_truth = []
    invalid_number_prediction_count = 0
    
    # For the whole test set calculate ground-truth values (as integer numbers)
    # and prediction values (also as integers) to calculate metrics.
    # If generated by model number is not correct (e.g. '1-1'), 
    # increase invalid_number_prediction_count and don't append this and corresponding
    # ground-truth value to the arrays.
    
    for X_batch, Y_batch in generate_batches(test_set, batch_size=batch_size):
        
        X, X_len = batch_to_ids(X_batch, word2id, max_len=max_len)
        Y, Y_len = batch_to_ids(Y_batch, word2id, max_len=max_len)
        predictions = model.predict_for_batch(session, X, X_len)
        
        for Y_true, Y_pred in zip(Y, predictions):
            
            try:
                end_token = '$'
                Y_true, Y_pred = ''.join(ids_to_sentence(Y_true, id2word)), ''.join(ids_to_sentence(Y_pred, id2word))
                Y_true, Y_pred = Y_true[:Y_true.find(end_token)], Y_pred[:Y_pred.find(end_token)]
                model_predictions.append(int(Y_pred))
                ground_truth.append(int(Y_true))
                
            except:
                invalid_number_prediction_count = invalid_number_prediction_count + 1
    
    # Final results
    all_model_predictions.append(model_predictions)
    all_ground_truth.append(ground_truth)
    invalid_number_prediction_counts.append(invalid_number_prediction_count)
            
print('\n...training finished.')

Start training... 

Train: Epoch 1
Epoch: [1/20], step: [1/625], Loss: 2.705762
Epoch: [1/20], step: [201/625], Loss: 1.826759
Epoch: [1/20], step: [401/625], Loss: 1.777515
Epoch: [1/20], step: [601/625], Loss: 1.671082

Test: Epoch 1 Loss: 1.5962147
X: 2654-6564$
Y: -3910$
O: -3100$

X: 3552-588$#
Y: 2964$#
O: 1116$#

X: 2513+7820$
Y: 10333$
O: 10188$

Train: Epoch 2
Epoch: [2/20], step: [1/625], Loss: 1.656287
Epoch: [2/20], step: [201/625], Loss: 1.635415
Epoch: [2/20], step: [401/625], Loss: 1.563997
Epoch: [2/20], step: [601/625], Loss: 1.548381

Test: Epoch 2 Loss: 1.4862914
X: 8559+7098$
Y: 15657$
O: 16180$

X: 5727-4720$
Y: 1007$#
O: 1088$#

X: 3167-3451$
Y: -284$#
O: -108$#

Train: Epoch 3
Epoch: [3/20], step: [1/625], Loss: 1.558989
Epoch: [3/20], step: [201/625], Loss: 1.471212
Epoch: [3/20], step: [401/625], Loss: 1.477180
Epoch: [3/20], step: [601/625], Loss: 1.530802

Test: Epoch 3 Loss: 1.3846678
X: 7314-1372$
Y: 5942$#
O: 5999$#

X: 6502-2083$
Y: 4419$#
O: 4076$#

X: 1

In [0]:
#Number of Epochs is 20 and Keep probability is 0.7
from sklearn.metrics import mean_absolute_error

In [43]:

for i, (gts, predictions, invalid_number_prediction_count) in enumerate(zip(all_ground_truth,
                                                                            all_model_predictions,
                                                                            invalid_number_prediction_counts), 1):
    mae = mean_absolute_error(gts, predictions)
    print("Epoch: %i, MAE: %f, Invalid numbers: %i" % (i, mae, invalid_number_prediction_count))

Epoch: 1, MAE: 1140.300150, Invalid numbers: 0
Epoch: 2, MAE: 491.827100, Invalid numbers: 0
Epoch: 3, MAE: 338.826100, Invalid numbers: 0
Epoch: 4, MAE: 274.013550, Invalid numbers: 0
Epoch: 5, MAE: 270.604450, Invalid numbers: 0
Epoch: 6, MAE: 220.413650, Invalid numbers: 0
Epoch: 7, MAE: 212.035750, Invalid numbers: 0
Epoch: 8, MAE: 182.017550, Invalid numbers: 0
Epoch: 9, MAE: 176.339000, Invalid numbers: 0
Epoch: 10, MAE: 168.941900, Invalid numbers: 0
Epoch: 11, MAE: 162.965600, Invalid numbers: 0
Epoch: 12, MAE: 156.563000, Invalid numbers: 0
Epoch: 13, MAE: 161.817300, Invalid numbers: 0
Epoch: 14, MAE: 153.902100, Invalid numbers: 0
Epoch: 15, MAE: 138.465100, Invalid numbers: 0
Epoch: 16, MAE: 146.804650, Invalid numbers: 0
Epoch: 17, MAE: 148.412250, Invalid numbers: 0
Epoch: 18, MAE: 141.421150, Invalid numbers: 0
Epoch: 19, MAE: 125.738450, Invalid numbers: 0
Epoch: 20, MAE: 139.743500, Invalid numbers: 0
