# Creating a Spell Checker

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from os import listdir
from os.path import isfile, join
from collections import namedtuple
from tensorflow.python.layers.core import Dense
from tensorflow.python.ops.rnn_cell_impl import _zero_state_tensors
import time
import re
from sklearn.model_selection import train_test_split
!pip install tensorflow-addons
import tensorflow_addons as tfa

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-addons
  Downloading tensorflow_addons-0.17.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 4.5 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.17.1


## Loading the Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('drive/MyDrive/NPL-Project-Data/dataset.csv')
df = df.dropna()

In [None]:
df[:10]

Unnamed: 0,noise_sentence,label
0,AYandon abandoned or abandonment may reffr to ...,Abandon abandoned or abandonment may refer to ...
1,Abandoned an episode of the TV secies Smallvil...,Abandoned an episode of the TV series Smallvil...
2,Thy include common abilities like walking and ...,They include common abilities like walking and...
3,Abilities aer intelligent powers they are guid...,Abilities are intelligent powers they are guid...
4,They are closely related to but not identical ...,They are closely related to but not identical ...
5,Theories of ability aim to articulate the natu...,Theories of ability aim to articulate the natu...
6,TraditioWally hhe conditional anplysis has bee...,Traditionally the conditional analysis has bee...
7,AccoNding to it baving an ability means one wo...,According to it having an ability means one wo...
8,On thus visw Michael Phelps has the ability to...,On this view Michael Phelps has the ability to...
9,Thie appCoach ahs been criticized ni various ways,This approach has been criticized in various ways


In [None]:
samples = df.values.tolist()
print(f"There are {len(samples)} samples.")

There are 225884 samples.


In [None]:
samples[:10]

[['AYandon abandoned or abandonment may reffr to Common uses Abandonment emotional a subjectiv2 emotional state in which peole feel undesired lrft behind insecure ro discarded bandonment elgal a legal term regarding propertK Child abandonment the extralegal abandonment of children Los6 misalid and ababdoned property legal status of property after abaTdonment and reciscovery Abqndonment mysticism Art entertainmwnt and media Film Abandon film a gilm starrinn Katie HolLes Rbandoned film stadring Dennis Abandoned film the nEglish language title of the Italian war film Gli Sbandati Abandoned film a Hungarian film Abandoned filh starring Bgittany Murphy Abandoned film a television Yovie about the shipwreck of the Rose Noelle in The Abandoned film a Mexican tilm The AbZndoned film by Nacho Th AbandoneS film starring Mira Furlan The Abandoned film starring Louisa K5ause The Abandonment a silent short f9lm Litera4ure Abandon a thriller novl by Blake Cro7ch Abandonmetn a play by Kate AtkinsoU Mu

In [None]:
# Create a dictionary to convert the vocabulary (characters) to integers
vocab_to_int = {}
count = 0
for sample in samples:
    noisy_sen, sen = sample
    
    for character in sen:
        if character not in vocab_to_int:
            vocab_to_int[character] = count
            count += 1

    for character in noisy_sen:
        if character not in vocab_to_int:
            vocab_to_int[character] = count
            count += 1

# Add special tokens to vocab_to_int
codes = ['<PAD>','<EOS>','<GO>']
for code in codes:
    vocab_to_int[code] = count
    count += 1

In [None]:
# Check the size of vocabulary and all of the values
vocab_size = len(vocab_to_int)
print("The vocabulary contains {} characters.".format(vocab_size))
print(sorted(vocab_to_int))

The vocabulary contains 78 characters.
[' ', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '<EOS>', '<GO>', '<PAD>', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{']


In [None]:
# Convert sentences to integers
int_sentences = []
int_noise_sentences = []

for sample in samples:
    noisy_sen, sentence = sample
  
    int_sentence = []
    for character in sentence:
        int_sentence.append(vocab_to_int[character])
    int_sentences.append(int_sentence)

    int_noise_sentence = []
    for character in sentence:
        int_noise_sentence.append(vocab_to_int[character])
    int_noise_sentences.append(int_noise_sentence)

In [None]:
print(int_sentences[:3])
print(int_noise_sentences[:3])

[[0, 1, 2, 3, 4, 5, 3, 6, 2, 1, 2, 3, 4, 5, 3, 7, 4, 6, 5, 8, 6, 2, 1, 2, 3, 4, 5, 3, 9, 7, 3, 10, 6, 9, 2, 11, 6, 8, 7, 12, 7, 8, 6, 10, 5, 6, 13, 5, 9, 9, 5, 3, 6, 14, 15, 7, 15, 6, 0, 1, 2, 3, 4, 5, 3, 9, 7, 3, 10, 6, 7, 9, 5, 10, 16, 5, 3, 2, 17, 6, 2, 6, 15, 14, 1, 18, 7, 19, 10, 16, 20, 7, 6, 7, 9, 5, 10, 16, 5, 3, 2, 17, 6, 15, 10, 2, 10, 7, 6, 16, 3, 6, 21, 22, 16, 19, 22, 6, 23, 7, 5, 23, 17, 7, 6, 12, 7, 7, 17, 6, 14, 3, 4, 7, 15, 16, 8, 7, 4, 6, 17, 7, 12, 10, 6, 1, 7, 22, 16, 3, 4, 6, 16, 3, 15, 7, 19, 14, 8, 7, 6, 5, 8, 6, 4, 16, 15, 19, 2, 8, 4, 7, 4, 6, 0, 1, 2, 3, 4, 5, 3, 9, 7, 3, 10, 6, 17, 7, 24, 2, 17, 6, 2, 6, 17, 7, 24, 2, 17, 6, 10, 7, 8, 9, 6, 8, 7, 24, 2, 8, 4, 16, 3, 24, 6, 23, 8, 5, 23, 7, 8, 10, 11, 6, 13, 22, 16, 17, 4, 6, 2, 1, 2, 3, 4, 5, 3, 9, 7, 3, 10, 6, 10, 22, 7, 6, 7, 25, 10, 8, 2, 17, 7, 24, 2, 17, 6, 2, 1, 2, 3, 4, 5, 3, 9, 7, 3, 10, 6, 5, 12, 6, 19, 22, 16, 17, 4, 8, 7, 3, 6, 26, 5, 15, 10, 6, 9, 16, 15, 17, 2, 16, 4, 6, 2, 3, 4, 6, 2, 1, 2, 3, 4

In [None]:
# Find the length of each sentence
lengths = []
for sentence in int_sentences:
    lengths.append(len(sentence))
lengths = pd.DataFrame(lengths, columns=["counts"])

In [None]:
lengths.describe()

Unnamed: 0,counts
count,225884.0
mean,151.98934
std,213.41225
min,1.0
25%,84.0
50%,123.0
75%,177.0
max,17646.0


In [None]:
# Limit the data we will use to train our model
max_length = 200 # 92
min_length = 10

int_sentences = list(filter(lambda x: len(x) <= max_length and len(x) >= min_length, int_sentences))
int_noise_sentences = list(filter(lambda x: len(x) <= max_length and len(x) >= min_length, int_noise_sentences))

if len(int_sentences) != len(int_noise_sentences):
  raise Exception("can not map samples")

data = list(zip(int_noise_sentences, int_sentences))

print("We will use {} to train and test our model.".format(len(data)))

We will use 183720 to train and test our model.


In [None]:
print(data[2])

([40, 22, 7, 11, 6, 2, 8, 7, 6, 19, 17, 5, 15, 7, 17, 11, 6, 8, 7, 17, 2, 10, 7, 4, 6, 10, 5, 6, 1, 14, 10, 6, 3, 5, 10, 6, 16, 4, 7, 3, 10, 16, 19, 2, 17, 6, 21, 16, 10, 22, 6, 20, 2, 8, 16, 5, 14, 15, 6, 5, 10, 22, 7, 8, 6, 19, 5, 3, 19, 7, 23, 10, 15, 6, 15, 14, 19, 22, 6, 2, 15, 6, 4, 16, 15, 23, 5, 15, 16, 10, 16, 5, 3, 6, 2, 23, 10, 16, 10, 14, 4, 7, 6, 10, 2, 17, 7, 3, 10, 6, 23, 5, 10, 7, 3, 10, 16, 2, 17, 6, 2, 3, 4, 6, 15, 37, 16, 17, 17], [40, 22, 7, 11, 6, 2, 8, 7, 6, 19, 17, 5, 15, 7, 17, 11, 6, 8, 7, 17, 2, 10, 7, 4, 6, 10, 5, 6, 1, 14, 10, 6, 3, 5, 10, 6, 16, 4, 7, 3, 10, 16, 19, 2, 17, 6, 21, 16, 10, 22, 6, 20, 2, 8, 16, 5, 14, 15, 6, 5, 10, 22, 7, 8, 6, 19, 5, 3, 19, 7, 23, 10, 15, 6, 15, 14, 19, 22, 6, 2, 15, 6, 4, 16, 15, 23, 5, 15, 16, 10, 16, 5, 3, 6, 2, 23, 10, 16, 10, 14, 4, 7, 6, 10, 2, 17, 7, 3, 10, 6, 23, 5, 10, 7, 3, 10, 16, 2, 17, 6, 2, 3, 4, 6, 15, 37, 16, 17, 17])


In [None]:
# Split the data into training and testing sentences
training, testing = train_test_split(data, test_size = 0.2, random_state = 2)

print("Number of training sentences:", len(training))
print("Number of testing sentences:", len(testing))

Number of training sentences: 146976
Number of testing sentences: 36744


In [None]:
training[0]

([40,
  22,
  7,
  6,
  9,
  5,
  4,
  7,
  8,
  3,
  6,
  15,
  10,
  14,
  4,
  11,
  6,
  5,
  12,
  6,
  22,
  16,
  15,
  10,
  5,
  8,
  11,
  6,
  16,
  15,
  6,
  2,
  3,
  4,
  6,
  16,
  3,
  19,
  17,
  14,
  4,
  7,
  15,
  6,
  10,
  22,
  7,
  6,
  15,
  10,
  14,
  4,
  11,
  6,
  5,
  12,
  6,
  15,
  23,
  7,
  19,
  16,
  12,
  16,
  19,
  6,
  8,
  7,
  24,
  16,
  5,
  3,
  15,
  6,
  2,
  3,
  4,
  6,
  10,
  22,
  7,
  6,
  15,
  10,
  14,
  4,
  11,
  6,
  5,
  12,
  6,
  19,
  7,
  8,
  10,
  2,
  16,
  3,
  6,
  10,
  5,
  23,
  16,
  19,
  2,
  17,
  6,
  5,
  8,
  6,
  10,
  22,
  7,
  9,
  2,
  10,
  16,
  19,
  6,
  7,
  17,
  7,
  9,
  7,
  3,
  10,
  15,
  6,
  5,
  12,
  6,
  22,
  16,
  15,
  10,
  5,
  8,
  16,
  19,
  2,
  17,
  6,
  16,
  3,
  20,
  7,
  15,
  10,
  16,
  24,
  2,
  10,
  16,
  5,
  3],
 [40,
  22,
  7,
  6,
  9,
  5,
  4,
  7,
  8,
  3,
  6,
  15,
  10,
  14,
  4,
  11,
  6,
  5,
  12,
  6,
  22,
  16,
  15,
  10,
  5,
  8,
  11,
  

# Building the Model

## Build model

In [None]:
def model_inputs():
    '''Create palceholders for inputs to the model'''
    
    with tf.name_scope('inputs'):
        inputs = tf.compat.v1.placeholder(tf.int32, [None, None], name='inputs')
    with tf.name_scope('targets'):
        targets = tf.compat.v1.placeholder(tf.int32, [None, None], name='targets')
    keep_prob = tf.compat.v1.placeholder(tf.float32, name='keep_prob')
    inputs_length = tf.compat.v1.placeholder(tf.int32, (None,), name='inputs_length')
    targets_length = tf.compat.v1.placeholder(tf.int32, (None,), name='targets_length')
    max_target_length = tf.reduce_max(targets_length, name='max_target_len')

    return inputs, targets, keep_prob, inputs_length, targets_length, max_target_length

In [None]:
def process_encoding_input(targets, vocab_to_int, batch_size):
    '''Remove the last word id from each batch and concat the <GO> to the begining of each batch'''
    
    with tf.name_scope("process_encoding"):
        ending = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1])
        dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1)

    return dec_input

In [None]:
def encoding_layer(rnn_size, sequence_length, num_layers, rnn_inputs, keep_prob, direction):
    '''Create the encoding layer'''
    
    if direction == 1:
        with tf.name_scope("RNN_Encoder_Cell_1D"):
            for layer in range(num_layers):
                with tf.compat.v1.variable_scope('encoder_{}'.format(layer)):
                    lstm = tf.compat.v1.nn.rnn_cell.LSTMCell(rnn_size)

                    drop = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm, 
                                                         input_keep_prob = keep_prob)

                    enc_output, enc_state = tf.nn.dynamic_rnn(drop, 
                                                              rnn_inputs,
                                                              sequence_length,
                                                              dtype=tf.float32)

            return enc_output, enc_state
        
        
    if direction == 2:
        with tf.name_scope("RNN_Encoder_Cell_2D"):
            for layer in range(num_layers):
                with tf.compat.v1.variable_scope('encoder_{}'.format(layer)):
                    cell_fw = tf.compat.v1.nn.rnn_cell.LSTMCell(rnn_size)
                    cell_fw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(cell_fw, 
                                                            input_keep_prob = keep_prob)

                    cell_bw = tf.compat.v1.nn.rnn_cell.LSTMCell(rnn_size)
                    cell_bw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(cell_bw, 
                                                            input_keep_prob = keep_prob)

                    enc_output, enc_state = tf.compat.v1.nn.bidirectional_dynamic_rnn(cell_fw, 
                                                                            cell_bw, 
                                                                            rnn_inputs,
                                                                            sequence_length,
                                                                            dtype=tf.float32)
            # Join outputs since we are using a bidirectional RNN
            enc_output = tf.concat(enc_output,2)
            # Use only the forward state because the model can't use both states at once
            return enc_output, enc_state[0]

In [None]:
def training_decoding_layer(dec_embed_input, targets_length, dec_cell, initial_state, output_layer, 
                            vocab_size, max_target_length):
    '''Create the training logits'''
    
    with tf.name_scope("Training_Decoder"):
        training_helper = tf.compat.v1.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input,
                                                            sequence_length=targets_length,
                                                            time_major=False)

        training_decoder = tf.compat.v1.contrib.seq2seq.BasicDecoder(dec_cell,
                                                           training_helper,
                                                           initial_state,
                                                           output_layer) 

        training_logits, _ = tf.compat.v1.contrib.seq2seq.dynamic_decode(training_decoder,
                                                               output_time_major=False,
                                                               impute_finished=True,
                                                               maximum_iterations=max_target_length)
        return training_logits

In [None]:
def inference_decoding_layer(embeddings, start_token, end_token, dec_cell, initial_state, output_layer,
                             max_target_length, batch_size):
    '''Create the inference logits'''
    
    with tf.name_scope("Inference_Decoder"):
        start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [batch_size], name='start_tokens')

        inference_helper = tf.compat.v1.contrib.seq2seq.GreedyEmbeddingHelper(embeddings,
                                                                    start_tokens,
                                                                    end_token)

        inference_decoder = tf.compat.v1.contrib.seq2seq.BasicDecoder(dec_cell,
                                                            inference_helper,
                                                            initial_state,
                                                            output_layer)

        inference_logits, _ = tf.compat.v1.contrib.seq2seq.dynamic_decode(inference_decoder,
                                                                output_time_major=False,
                                                                impute_finished=True,
                                                                maximum_iterations=max_target_length)

        return inference_logits

In [None]:
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, inputs_length, targets_length, 
                   max_target_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers, direction):
    '''Create the decoding cell and attention for the training and inference decoding layers'''
    
    with tf.name_scope("RNN_Decoder_Cell"):
        for layer in range(num_layers):
            with tf.compat.v1.variable_scope('decoder_{}'.format(layer)):
                lstm = tf.compat.v1.nn.rnn_cell.LSTMCell(rnn_size)
                dec_cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm, 
                                                         input_keep_prob = keep_prob)
    
    output_layer = Dense(vocab_size,
                         kernel_initializer = tf.compat.v1.truncated_normal_initializer(mean = 0.0, stddev=0.1))
    
    attn_mech = tfa.seq2seq.BahdanauAttention(rnn_size,
                                                  enc_output,
                                                  inputs_length,
                                                  normalize=False,
                                                  name='BahdanauAttention')
    
    with tf.name_scope("Attention_Wrapper"):
        dec_cell = tfa.seq2seq.AttentionWrapper(dec_cell,
                                                              attn_mech,
                                                              rnn_size)
    
    initial_state = tfa.seq2seq.AttentionWrapper(enc_state,
                                                                    _zero_state_tensors(rnn_size, 
                                                                                        batch_size, 
                                                                                        tf.float32))

    with tf.compat.v1.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input, 
                                                  targets_length, 
                                                  dec_cell, 
                                                  initial_state,
                                                  output_layer,
                                                  vocab_size, 
                                                  max_target_length)
    with tf.compat.v1.variable_scope("decode", reuse=True):
        inference_logits = inference_decoding_layer(embeddings,  
                                                    vocab_to_int['<GO>'], 
                                                    vocab_to_int['<EOS>'],
                                                    dec_cell, 
                                                    initial_state, 
                                                    output_layer,
                                                    max_target_length,
                                                    batch_size)

    return training_logits, inference_logits

In [None]:
def seq2seq_model(inputs, targets, keep_prob, inputs_length, targets_length, max_target_length, 
                  vocab_size, rnn_size, num_layers, vocab_to_int, batch_size, embedding_size, direction):
    '''Use the previous functions to create the training and inference logits'''
    
    enc_embeddings = tf.Variable(tf.random.uniform([vocab_size, embedding_size], -1, 1))
    enc_embed_input = tf.nn.embedding_lookup(enc_embeddings, inputs)
    enc_output, enc_state = encoding_layer(rnn_size, inputs_length, num_layers, 
                                           enc_embed_input, keep_prob, direction)
    
    dec_embeddings = tf.Variable(tf.random.uniform([vocab_size, embedding_size], -1, 1))
    dec_input = process_encoding_input(targets, vocab_to_int, batch_size)
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
    
    training_logits, inference_logits  = decoding_layer(dec_embed_input, 
                                                        dec_embeddings,
                                                        enc_output,
                                                        enc_state, 
                                                        vocab_size, 
                                                        inputs_length, 
                                                        targets_length, 
                                                        max_target_length,
                                                        rnn_size, 
                                                        vocab_to_int, 
                                                        keep_prob, 
                                                        batch_size,
                                                        num_layers,
                                                        direction)
    
    return training_logits, inference_logits

In [None]:
def pad_sentence_batch(sentence_batch):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [vocab_to_int['<PAD>']] * (max_sentence - len(sentence)) for sentence in sentence_batch]

In [None]:
def get_batches(sentences, batch_size, threshold):
    """Batch sentences, noisy sentences, and the lengths of their sentences together.
       With each epoch, sentences will receive new mistakes"""
    
    for batch_i in range(0, len(sentences)//batch_size):
        start_i = batch_i * batch_size
        sentences_batch = sentences[start_i:start_i + batch_size]
        
        sentences_batch_noisy = []
        for sentence in sentences_batch:
            sentences_batch_noisy.append(sentence[0])
            
        sentences_batch_eos = []
        for sentence in sentences_batch:
            sentence[1].append(vocab_to_int['<EOS>'])
            sentences_batch_eos.append(sentence[1])
            
        pad_sentences_batch = np.array(pad_sentence_batch(sentences_batch_eos))
        pad_sentences_noisy_batch = np.array(pad_sentence_batch(sentences_batch_noisy))
        
        # Need the lengths for the _lengths parameters
        pad_sentences_lengths = []
        for sentence in pad_sentences_batch:
            pad_sentences_lengths.append(len(sentence))
        
        pad_sentences_noisy_lengths = []
        for sentence in pad_sentences_noisy_batch:
            pad_sentences_noisy_lengths.append(len(sentence))
        
        yield pad_sentences_noisy_batch, pad_sentences_batch, pad_sentences_noisy_lengths, pad_sentences_lengths

*Note: This set of values achieved the best results.*

In [None]:
# The default parameters
epochs = 100
batch_size = 128
num_layers = 2
rnn_size = 512
embedding_size = 128
learning_rate = 0.0005
direction = 2
threshold = 0.95
keep_probability = 0.75

In [None]:
def build_graph(keep_prob, rnn_size, num_layers, batch_size, learning_rate, embedding_size, direction):

    tf.compat.v1.reset_default_graph()    

    # Load the model inputs    
    inputs, targets, keep_prob, inputs_length, targets_length, max_target_length = model_inputs()

    # Create the training and inference logits
    training_logits, inference_logits = seq2seq_model(tf.reverse(inputs, [-1]),
                                                      targets, 
                                                      keep_prob,   
                                                      inputs_length,
                                                      targets_length,
                                                      max_target_length,
                                                      len(vocab_to_int)+1,
                                                      rnn_size, 
                                                      num_layers, 
                                                      vocab_to_int,
                                                      batch_size,
                                                      embedding_size,
                                                      direction)

    # Create tensors for the training logits and inference logits
    training_logits = tf.identity(training_logits.rnn_output, 'logits')

    with tf.name_scope('predictions'):
        predictions = tf.identity(inference_logits.sample_id, name='predictions')
        tf.summary.histogram('predictions', predictions)

    # Create the weights for sequence_loss
    masks = tf.sequence_mask(targets_length, max_target_length, dtype=tf.float32, name='masks')
    
    with tf.name_scope("cost"):
        # Loss function
        cost = tf.compat.v1.contrib.seq2seq.sequence_loss(training_logits, 
                                                targets, 
                                                masks)
        tf.summary.scalar('cost', cost)

    with tf.name_scope("optimze"):
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

    # Merge all of the summaries
    merged = tf.summary.merge_all()    

    # Export the nodes 
    export_nodes = ['inputs', 'targets', 'keep_prob', 'cost', 'inputs_length', 'targets_length',
                    'predictions', 'merged', 'train_op','optimizer']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])

    return graph

## Training the Model

In [None]:
def train(model, epochs, log_string):
    '''Train the RNN'''
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Used to determine when to stop the training early
        testing_loss_summary = []

        # Keep track of which batch iteration is being trained
        iteration = 0
        
        display_step = 30 # The progress of the training will be displayed after every 30 batches
        stop_early = 0 
        stop = 3 # If the batch_loss_testing does not decrease in 3 consecutive checks, stop training
        per_epoch = 3 # Test the model 3 times per epoch
        testing_check = (len(training)//batch_size//per_epoch)-1

        print()
        print("Training Model: {}".format(log_string))

        train_writer = tf.summary.FileWriter('./logs/1/train/{}'.format(log_string), sess.graph)
        test_writer = tf.summary.FileWriter('./logs/1/test/{}'.format(log_string))

        for epoch_i in range(1, epochs+1): 
            batch_loss = 0
            batch_time = 0
            
            for batch_i, (input_batch, target_batch, input_length, target_length) in enumerate(
                    get_batches(training, batch_size, threshold)):
                start_time = time.time()

                summary, loss, _ = sess.run([model.merged,
                                             model.cost, 
                                             model.train_op], 
                                             {model.inputs: input_batch,
                                              model.targets: target_batch,
                                              model.inputs_length: input_length,
                                              model.targets_length: target_length,
                                              model.keep_prob: keep_probability})


                batch_loss += loss
                end_time = time.time()
                batch_time += end_time - start_time

                # Record the progress of training
                train_writer.add_summary(summary, iteration)

                iteration += 1

                if batch_i % display_step == 0 and batch_i > 0:
                    print('Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f}, Seconds: {:>4.2f}'
                          .format(epoch_i,
                                  epochs, 
                                  batch_i, 
                                  len(training) // batch_size, 
                                  batch_loss / display_step, 
                                  batch_time))
                    batch_loss = 0
                    batch_time = 0

                #### Testing ####
                if batch_i % testing_check == 0 and batch_i > 0:
                    batch_loss_testing = 0
                    batch_time_testing = 0
                    for batch_i, (input_batch, target_batch, input_length, target_length) in enumerate(
                            get_batches(testing, batch_size, threshold)):
                        start_time_testing = time.time()
                        summary, loss = sess.run([model.merged,
                                                  model.cost], 
                                                     {model.inputs: input_batch,
                                                      model.targets: target_batch,
                                                      model.inputs_length: input_length,
                                                      model.targets_length: target_length,
                                                      model.keep_prob: 1})

                        batch_loss_testing += loss
                        end_time_testing = time.time()
                        batch_time_testing += end_time_testing - start_time_testing

                        # Record the progress of testing
                        test_writer.add_summary(summary, iteration)

                    n_batches_testing = batch_i + 1
                    print('Testing Loss: {:>6.3f}, Seconds: {:>4.2f}'
                          .format(batch_loss_testing / n_batches_testing, 
                                  batch_time_testing))
                    
                    batch_time_testing = 0

                    # If the batch_loss_testing is at a new minimum, save the model
                    testing_loss_summary.append(batch_loss_testing)
                    if batch_loss_testing <= min(testing_loss_summary):
                        print('New Record!') 
                        stop_early = 0
                        checkpoint = "./{}.ckpt".format(log_string)
                        saver = tf.train.Saver()
                        saver.save(sess, checkpoint)

                    else:
                        print("No Improvement.")
                        stop_early += 1
                        if stop_early == stop:
                            break

            if stop_early == stop:
                print("Stopping Training.")
                break

## Train

In [None]:
tf.compat.v1.disable_eager_execution()
# Train the model with the desired tuning parameters
for keep_probability in [0.75]:
    for num_layers in [2]:
        for threshold in [0.95]:
            log_string = 'kp={},nl={},th={}'.format(keep_probability,
                                                    num_layers,
                                                    threshold) 
            model = build_graph(keep_probability, rnn_size, num_layers, batch_size, 
                                learning_rate, embedding_size, direction)
            train(model, epochs, log_string)

  partitioner=maybe_partitioner)
  initializer=initializer)
  


TypeError: ignored

## Fixing Custom Sentences

In [None]:
def text_to_ints(text):
    '''Prepare the text for the model'''
    
    text = clean_text(text)
    return [vocab_to_int[word] for word in text]

In [None]:
# Create your own sentence or use one from the dataset
text = "Spellin is difficult, whch is wyh you need to study everyday."
text = text_to_ints(text)

#random = np.random.randint(0,len(testing_sorted))
#text = testing_sorted[random]
#text = noise_maker(text, 0.95)

checkpoint = "./kp=0.75,nl=2,th=0.95.ckpt"

model = build_graph(keep_probability, rnn_size, num_layers, batch_size, learning_rate, embedding_size, direction) 

with tf.Session() as sess:
    # Load saved model
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint)
    
    #Multiply by batch_size to match the model's input parameters
    answer_logits = sess.run(model.predictions, {model.inputs: [text]*batch_size, 
                                                 model.inputs_length: [len(text)]*batch_size,
                                                 model.targets_length: [len(text)+1], 
                                                 model.keep_prob: [1.0]})[0]

# Remove the padding from the generated sentence
pad = vocab_to_int["<PAD>"] 

print('\nText')
print('  Word Ids:    {}'.format([i for i in text]))
print('  Input Words: {}'.format("".join([int_to_vocab[i] for i in text])))

print('\nSummary')
print('  Word Ids:       {}'.format([i for i in answer_logits if i != pad]))
print('  Response Words: {}'.format("".join([int_to_vocab[i] for i in answer_logits if i != pad])))

In [None]:
import os

def find_parent(input_directory, input_file):
  for p in os.listdir(input_directory):
    if p == input_file:
      print(input_directory)
    if (not p[0]=='.') and (not os.path.isfile(os.path.join(input_directory, p))):
      find_parent(os.path.join(input_directory, p), input_file)

input_directory, input_file = input('directory: '), input('file: ')
find_parent(input_directory, input_file)