# Train Neural Network to behave like a support agent

In [1]:
import os
data_dir = './data/examples4.txt'
def load_data(path):
    """
    Load Dataset from File
    """
    input_file = os.path.join(path)
    with open(input_file, "r") as f:
        data = f.read()
    return data
# raw_text = load_data(data_dir)

### Lookup Table
To create a word embedding, we first need to transform the words to ids.  In this function, we create two dictionaries:
- Dictionary to go from the words to an id, we'll call `vocab_to_int`
- Dictionary to go from the id to word, we'll call `int_to_vocab`

Return these dictionaries in the following tuple `(vocab_to_int, int_to_vocab)`

In [2]:
import numpy as np
from collections import Counter

def create_lookup_tables(text):
    """
    Create lookup tables for vocabulary
    :param text: The text of tv scripts split into words
    :return: A tuple of dicts (vocab_to_int, int_to_vocab)
    """
    counts = Counter(text)
    vocab = sorted(counts, key=counts.get, reverse=True)
    vocab = [word for word in vocab if counts[word] > 15]
    vocab_to_int = {word: ii for ii, word in enumerate(vocab)}
    int_to_vocab = {ii: word for ii, word in enumerate(vocab)}
    last_element = len(vocab)
    vocab_to_int['||UNKNOWN||'] = last_element
    int_to_vocab[last_element] = '||UNKNOWN||'
    return vocab_to_int, int_to_vocab


## Preprocess all the data and save it
Running the code cell below will preprocess all the data and save it to file.

In [4]:
import pickle
def preprocess_and_save_data(dataset_path):
    """
    Preprocess Text Data
    """
    text = load_data(dataset_path)

    lines = text.split('\n')
    
    text = text.replace('\n', ' ').split()

    vocab_to_int, int_to_vocab = create_lookup_tables(text)
    int_lines = [[vocab_to_int.get(word, len(vocab_to_int) - 1) for word in line.split()] for line in lines]
    pickle.dump((int_lines, vocab_to_int, int_to_vocab), open('preprocess.p', 'wb'))
    
preprocess_and_save_data(data_dir)

# Check Point
This is your first checkpoint. If you ever decide to come back to this notebook or have to restart the notebook, you can start from here. The preprocessed data has been saved to disk.

In [3]:
import helper
import numpy as np
import pickle

def load_preprocess():
    """
    Load the Preprocessed Training data and return them in batches of <batch_size> or less
    """
    return pickle.load(open('preprocess.p', mode='rb'))

int_lines, vocab_to_int, int_to_vocab = load_preprocess()

## Build the Neural Network
Build the components necessary to build a RNN by implementing the following functions below:
- get_inputs
- get_init_cell
- get_embed
- build_rnn
- build_nn
- get_batches

### Check the Version of TensorFlow and Access to GPU

In [2]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.0.0




### Input
Implementing the `get_inputs()` function to create TF Placeholders for the Neural Network.  It should create the following placeholders:
- Input text placeholder named "input" using the [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder) `name` parameter.
- Targets placeholder
- Learning Rate placeholder

Return the placeholders in the following tuple `(Input, Targets, LearningRate)`

In [5]:
import tensorflow as tf

def get_inputs():
    """
    Create TF Placeholders for input, targets, and learning rate.
    :return: Tuple (input, targets, learning rate)
    """
    input_placeholder = tf.placeholder(tf.int32, [None, None], name="input")
    targets_placeholder = tf.placeholder(tf.int32, [None, None], name="targets")
    learning_rate_placeholder = tf.placeholder(tf.float32, name="learning_rate")
    keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob')
    return input_placeholder, targets_placeholder, learning_rate_placeholder, keep_prob_placeholder



### Build RNN Cell and Initialize
Stack one or more [`BasicLSTMCells`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/BasicLSTMCell) in a [`MultiRNNCell`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/MultiRNNCell).
- The Rnn size should be set using `rnn_size`
- Initalize Cell State using the MultiRNNCell's [`zero_state()`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/MultiRNNCell#zero_state) function
    - Apply the name "initial_state" to the initial state using [`tf.identity()`](https://www.tensorflow.org/api_docs/python/tf/identity)

Return the cell and initial state in the following tuple `(Cell, InitialState)`

In [6]:
def get_init_cell(batch_size, rnn_size, keep_prob):
    """
    Create an RNN Cell and initialize it.
    :param batch_size: Size of batches
    :param rnn_size: Size of RNNs
    :return: Tuple (cell, initialize state)
    """
    lstm_layers = 5
    
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
    
    initial_state = cell.zero_state(batch_size, tf.float32)
    named_state = tf.identity(initial_state, name="initial_state")
    return cell, named_state


### Word Embedding
Apply embedding to `input_data` using TensorFlow.  Return the embedded sequence.

In [7]:
def get_embed(input_data, vocab_size, embed_dim):
    """
    Create embedding for <input_data>.
    :param input_data: TF placeholder for text input.
    :param vocab_size: Number of words in vocabulary.
    :param embed_dim: Number of embedding dimensions
    :return: Embedded input.
    """
    
    embedding = tf.Variable(tf.random_uniform((vocab_size, embed_dim), -1, 1))
    embed = tf.nn.embedding_lookup(embedding, input_data)
    return embed



### Build RNN
You created a RNN Cell in the `get_init_cell()` function.  Time to use the cell to create a RNN.
- Build the RNN using the [`tf.nn.dynamic_rnn()`](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn)
 - Apply the name "final_state" to the final state using [`tf.identity()`](https://www.tensorflow.org/api_docs/python/tf/identity)

Return the outputs and final_state state in the following tuple `(Outputs, FinalState)` 

In [8]:
def build_rnn(cell, inputs):
    """
    Create a RNN using a RNN Cell
    :param cell: RNN Cell
    :param inputs: Input text data
    :return: Tuple (Outputs, Final State)
    """
    
    outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
    named_state = tf.identity(final_state, name="final_state")
    return outputs, named_state


### Build the Neural Network
Apply the functions you implemented above to:
- Apply embedding to `input_data` using your `get_embed(input_data, vocab_size, embed_dim)` function.
- Build RNN using `cell` and your `build_rnn(cell, inputs)` function.
- Apply a fully connected layer with a linear activation and `vocab_size` as the number of outputs.

Return the logits and final state in the following tuple (Logits, FinalState) 

In [9]:
def build_nn(cell, rnn_size, input_data, vocab_size, embed_dim):
    """
    Build part of the neural network
    :param cell: RNN cell
    :param rnn_size: Size of rnns
    :param input_data: Input data
    :param vocab_size: Vocabulary size
    :param embed_dim: Number of embedding dimensions
    :return: Tuple (Logits, FinalState)
    """
    embeding = get_embed(input_data, vocab_size, embed_dim)
    outputs, final_state = build_rnn(cell, embeding)
    logits = tf.contrib.layers.fully_connected(outputs, vocab_size, activation_fn=None)
    return logits, final_state


### Batches
Implement `get_batches` to create batches of input and targets using `int_text`.  The batches should be a Numpy array with the shape `(number of batches, 2, batch size, sequence length)`. Each batch contains two elements:
- The first element is a single batch of **input** with the shape `[batch size, sequence length]`
- The second element is a single batch of **targets** with the shape `[batch size, sequence length]`

If you can't fill the last batch with enough data, drop the last batch.

For exmple, `get_batches([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], 2, 3)` would return a Numpy array of the following:
```
[
  # First Batch
  [
    # Batch of Input
    [[ 1  2  3], [ 7  8  9]],
    # Batch of targets
    [[ 2  3  4], [ 8  9 10]]
  ],
 
  # Second Batch
  [
    # Batch of Input
    [[ 4  5  6], [10 11 12]],
    # Batch of targets
    [[ 5  6  7], [11 12 13]]
  ]
]
```

In [10]:
import math
end_char = vocab_to_int['||END||']
def transform_line(line, seq_length):
    should_be_length = math.ceil(len(line) / seq_length) * seq_length
    split_points = np.arange(seq_length, should_be_length, seq_length)
    seqs = np.array_split(line, split_points)
    return [np.lib.pad(seq, (0, seq_length - len(seq)), 'constant', constant_values=(0, end_char)) for seq in seqs]
    
def get_batches(int_lines, batch_size, seq_length):
    """
    Return batches of input and target
    :param int_text: Text with the words replaced by their ids
    :param batch_size: The size of batch
    :param seq_length: The length of sequence
    :return: Batches as a Numpy array
    """
    np.random.shuffle(int_lines)
    n_examples = 0
    for line in int_lines:
        n_examples += math.ceil(len(line) / seq_length)
    
    n_batches = n_examples // batch_size
#     int_lines = np.array([transform_line(np.array(line), seq_length) for line in int_lines])
    int_text = np.zeros((n_examples + 1, seq_length), dtype=int)
    i = 0
    for line in int_lines:
        sequences = transform_line(np.array(line), seq_length)
        for seq in sequences:
            i+=1
    i = 0
    for line in int_lines:
        sequences = transform_line(np.array(line), seq_length)
        for seq in sequences:
            int_text[i] = seq
            i+=1
            
    int_text = int_text.flatten()
    should_have_length = n_batches * 2 * batch_size * seq_length
    diff_in_length = should_have_length - len(int_text)
    if diff_in_length > 0:
        int_text = np.append(int_text, [end_char] * diff_in_length)
    
    total_elements = n_batches * batch_size * seq_length
    result = np.zeros((n_batches, 2, batch_size, seq_length), dtype=int)
    for batch_i in range(0, n_batches):
        for type_i in range(0, 2):
            for subbatch_i in range(0, batch_size):
                from_i = seq_length * n_batches * subbatch_i + seq_length * batch_i + type_i
                int_range = int_text[from_i:from_i + seq_length]
                end_index = np.where(int_range==end_char)[0]
#                 if len(end_index) > 0:
#                     int_range[end_index[0]:seq_length] = [end_char] * (seq_length - end_index[0])
                result[batch_i][type_i][subbatch_i] = int_range
        
    return result



example_result_1 = get_batches([[1, 2, 3, 4, 5], [6, 7, 8], [9, 10, 11, 12, 13, 14, 15]], 3, 3)
print(example_result_1)

[[[[ 9 10 11]
   [15 26 26]
   [ 4  5 26]]

  [[10 11 12]
   [26 26  1]
   [ 5 26  6]]]


 [[[12 13 14]
   [ 1  2  3]
   [ 6  7  8]]

  [[13 14 15]
   [ 2  3  4]
   [ 7  8  0]]]]


## Neural Network Training
### Hyperparameters
Tune the following parameters:

- Set `num_epochs` to the number of epochs.
- Set `batch_size` to the batch size.
- Set `rnn_size` to the size of the RNNs.
- Set `embed_dim` to the size of the embedding.
- Set `seq_length` to the length of sequence.
- Set `learning_rate` to the learning rate.
- Set `show_every_n_batches` to the number of batches the neural network should print progress.

In [11]:
# Number of Epochs
num_epochs = 10
# Batch Size
batch_size = 150
# RNN Size
rnn_size = 512
# Embedding Dimension Size
embed_dim = 400
# Sequence Length
seq_length = 60
# Learning Rate
learning_rate = 0.001
# Show stats for every n number of batches
show_every_n_batches = 100
save_every_n_batches = 500

"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
save_dir = './save'

### Build the Graph
Build the graph using the neural network you implemented.

In [14]:
from tensorflow.contrib import seq2seq

train_graph = tf.Graph()
with train_graph.as_default():
    vocab_size = len(int_to_vocab)
    input_text, targets, lr, keep_prob = get_inputs()
    input_data_shape = tf.shape(input_text)
    cell, initial_state = get_init_cell(input_data_shape[0], rnn_size, keep_prob)
    logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size, embed_dim)

    # Probabilities for generating words
    probs = tf.nn.softmax(logits, name='probs')

    # Loss function
    cost = seq2seq.sequence_loss(
        logits,
        targets,
        tf.ones([input_data_shape[0], input_data_shape[1]]))

    # Optimizer
    optimizer = tf.train.AdamOptimizer(lr)

    # Gradient Clipping
    gradients = optimizer.compute_gradients(cost)
    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]
    train_op = optimizer.apply_gradients(capped_gradients)

In [15]:
batches = get_batches(int_lines, batch_size, seq_length)

## Train
Train the neural network on the preprocessed data.

In [None]:
# batches = get_batches(int_lines, batch_size, seq_length)

with tf.Session(graph=train_graph) as sess:
    saver = tf.train.Saver()
#     sess.run(tf.global_variables_initializer())
    saver.restore(sess, save_dir)
    
    for epoch_i in range(num_epochs):
        state = sess.run(initial_state, {input_text: batches[0][0]})

        for batch_i, (x, y) in enumerate(batches):
            # Show every <show_every_n_batches> batches
            if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0:
                feed = {
                    input_text: x,
                    targets: y,
                    keep_prob: 1,
                    initial_state: state,
                    lr: learning_rate}
                train_loss, state, _ = sess.run([cost, final_state, train_op], feed)
                print('Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(
                    epoch_i,
                    batch_i,
                    len(batches),
                    train_loss))
            else:
                feed = {
                    input_text: x,
                    targets: y,
                    keep_prob: 0.80,
                    initial_state: state,
                    lr: learning_rate}
                state, _ = sess.run([final_state, train_op], feed)
            if (epoch_i * len(batches) + batch_i) % save_every_n_batches == 0:
                saver.save(sess, save_dir)
                print("Saved")

    # Save Model
    saver.save(sess, save_dir)
    print('Model Trained and Saved')

Epoch   0 Batch    0/10562   train_loss = 1.238
Saved
Epoch   0 Batch  100/10562   train_loss = 1.267
Epoch   0 Batch  200/10562   train_loss = 1.266
Epoch   0 Batch  300/10562   train_loss = 1.205
Epoch   0 Batch  400/10562   train_loss = 1.272
Epoch   0 Batch  500/10562   train_loss = 1.369
Saved
Epoch   0 Batch  600/10562   train_loss = 1.247
Epoch   0 Batch  700/10562   train_loss = 1.226
Epoch   0 Batch  800/10562   train_loss = 1.304
Epoch   0 Batch  900/10562   train_loss = 1.364
Epoch   0 Batch 1000/10562   train_loss = 1.241
Saved
Epoch   0 Batch 1100/10562   train_loss = 1.259
Epoch   0 Batch 1200/10562   train_loss = 1.262
Epoch   0 Batch 1300/10562   train_loss = 1.229
Epoch   0 Batch 1400/10562   train_loss = 1.415
Epoch   0 Batch 1500/10562   train_loss = 1.179
Saved
Epoch   0 Batch 1600/10562   train_loss = 1.210
Epoch   0 Batch 1700/10562   train_loss = 1.178
Epoch   0 Batch 1800/10562   train_loss = 1.129
Epoch   0 Batch 1900/10562   train_loss = 1.237
Epoch   0 Batch 

## Save Parameters
Save `seq_length` and `save_dir` for generating a new TV script.

In [20]:
# Save parameters for checkpoint
helper.save_params((seq_length, save_dir))

# Checkpoint

In [3]:
import tensorflow as tf
import numpy as np
import helper

int_lines, vocab_to_int, int_to_vocab = load_preprocess()
seq_length, load_dir = helper.load_params()

NameError: name 'load_preprocess' is not defined

In [22]:
def get_tensors(loaded_graph):
    """
    Get input, initial state, final state, and probabilities tensor from <loaded_graph>
    :param loaded_graph: TensorFlow graph loaded from file
    :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)
    """
    return loaded_graph.get_tensor_by_name("input:0"),\
           loaded_graph.get_tensor_by_name("initial_state:0"),\
           loaded_graph.get_tensor_by_name("final_state:0"),\
        loaded_graph.get_tensor_by_name("keep_prob:0"),\
           loaded_graph.get_tensor_by_name("probs:0")


### Choose Word
Implement the `pick_word()` function to select the next word using `probabilities`.

In [23]:
def symbol_lookup(word):
    table = {
        '.': '||DOT||',
        ',': '||COMMA||',
        ':': '||COLON||',
        ';': '||SEMICOLON||',
        '-': '||DASH||',
        '_': '||UNDERSCORE||',
        '!': '||EXCLAMATION||',
        '?': '||QUESTION||',
        '(': '||LEFTPARENTHESIS||',
        ')': '||RIGHTPARENTHESIS||'
    }
    try:
        key = next(key for key, value in table.items() if value == word)
        return key
    except:
        return word

In [30]:
def pick_word(probabilities, int_to_vocab):
    """
    Pick the next word in the generated text
    :param probabilities: Probabilites of the next word
    :param int_to_vocab: Dictionary of word ids as the keys and words as the values
    :return: String of the predicted word
    """
    choice = np.where(probabilities==max(probabilities))[0][0]
#     choice = np.random.choice(len(probabilities), 1, p=probabilities)[0]
    return int_to_vocab[choice]


## Conversation

This will generate the TV script for you.  Set `gen_length` to the length of TV script you want to generate.

In [41]:
gen_length = 200

prime_sentence = "||ADMIN:|| one hour until your appointment with ||CLIENT_FIRST_NAME|| ||CLIENT_LAST_NAME|| ||LEFTPARENTHESIS|| ||PHONENUMBER|| ||DOT|| ||ADMIN:|| hi ||GEEK_FIRST_NAME|| ||COMMA|| heelotech here ||DOT|| a client in san jose has requested for your service for computer repair & help ||DOT|| pay is ||OTHERPRICE|| ||DOT|| can you help ||QUESTION|| ||TECH:|| can you give me the time and date and details of the repair ||START|| ||ADMIN:|| it was one of your previous client ||COMMA|| judy arvidson ||DOT|| it's for setting up email on computer ||DOT|| ||END|| ||ADMIN:|| one hour until your appointment with ||CLIENT_FIRST_NAME|| ||CLIENT_LAST_NAME|| ||LEFTPARENTHESIS|| ||PHONENUMBER|| ||DOT|| ||ADMIN:|| hi ||GEEK_FIRST_NAME|| ||COMMA|| heelotech here ||DOT|| a client in san jose has requested for your service for computer repair & help ||DOT|| pay is ||OTHERPRICE|| ||DOT|| can you help ||QUESTION|| ||TECH:|| can you give me the time and date and details of the repair ||ADMIN:|| it was one of your previous client ||COMMA|| judy arvidson ||DOT|| it's for setting up email on computer ||DOT|| ||TECH:|| okay ||DOT|| sure ||DOT|| i'll take it ||START|| ||ADMIN:||"

def make_initial_sentences(sentence):
    words = sentence.split(' ')
    results = []
    for word in words:
        if word in vocab_to_int:
            results.append(word)
        else:
            results.append('||UNKNOWN||')
    return results

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_dir + '.meta')
    loader.restore(sess, load_dir)

    # Get Tensors from loaded model
    input_text, initial_state, final_state, keep_prob, probs = get_tensors(loaded_graph)

    # Sentences generation setup
    gen_sentences = make_initial_sentences(prime_sentence)
    prev_state = sess.run(initial_state, {input_text: np.array([[1]]), keep_prob: 1.0})

    # Generate sentences
    for n in range(gen_length):
        # Dynamic Input
        dyn_input = [[vocab_to_int[word] for word in gen_sentences[-seq_length:]]]
        dyn_seq_length = len(dyn_input[0])

        # Get Prediction
        probabilities, prev_state = sess.run(
            [probs, final_state],
            {input_text: dyn_input, initial_state: prev_state, keep_prob: 1.0})
        
        pred_word = pick_word(probabilities[dyn_seq_length-1], int_to_vocab)
        gen_sentences.append(pred_word)
        if pred_word == '||END||':
            break
    
    # Remove tokens
    admin_reply = ' '.join([symbol_lookup(word) for word in gen_sentences])
    print(admin_reply)

||ADMIN:|| one hour until your appointment with ||CLIENT_FIRST_NAME|| ||CLIENT_LAST_NAME|| ( ||PHONENUMBER|| . ||ADMIN:|| hi ||GEEK_FIRST_NAME|| , heelotech here . a client in san jose has requested for your service for computer repair & help . pay is ||OTHERPRICE|| . can you help ? ||TECH:|| can you give me the time and date and details of the repair ||START|| ||ADMIN:|| it was one of your previous client , judy arvidson . it's for setting up email on computer . ||END|| ||ADMIN:|| one hour until your appointment with ||CLIENT_FIRST_NAME|| ||CLIENT_LAST_NAME|| ( ||PHONENUMBER|| . ||ADMIN:|| hi ||GEEK_FIRST_NAME|| , heelotech here . a client in san jose has requested for your service for computer repair & help . pay is ||OTHERPRICE|| . can you help ? ||TECH:|| can you give me the time and date and details of the repair ||ADMIN:|| it was one of your previous client , judy arvidson . it's for setting up email on computer . ||TECH:|| okay . sure . i'll take it ||START|| ||ADMIN:|| great ! 