# Generating a Text using the Chatbot Model

In [1]:
# import libs
import pickle
import warnings
import nltk
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.layers.core import Dense
from distutils.version import LooseVersion

  from ._conv import register_converters as _register_converters


### Retrieve Preprocessed Data and Parameters

In [2]:
# Batch Size
batch_size = 256

In [3]:
def load_preprocess():
    """
    Load the Preprocessed Training data and return them
    """
    with open('models/preprocess.p', mode='rb') as in_file:
        return pickle.load(in_file)

In [4]:
((source_int_text, target_int_text),
(source_vocab_to_int, source_int_to_vocab),
(target_vocab_to_int, target_int_to_vocab)) = load_preprocess()

In [5]:
print('Vocabulary size of comments:', len( source_int_to_vocab))

Vocabulary size of comments: 13949


In [6]:
print('Vocabulary size of replays:', len(target_vocab_to_int))

Vocabulary size of replays: 13456


In [7]:
def load_params():
    """
    Load parameters from file
    """
    with open('models/params.p', mode='rb') as in_file:
        return pickle.load(in_file)

load_path = load_params()

### Sentence to Sequence
To feed a sentence into the chatbot model, I first need to preprocess it.

In [10]:
def sentence_to_seq(sentence, vocab_to_int):
    ''' Function to convert a sentence to a sequence of ids
            *args:
                sentence: raw string
                vocab_to_int: Dictionary to go from the words to an id
            *return:
                List of word ids
    '''   
    return [vocab_to_int.get(word, vocab_to_int.get('<UNK>')) for word in nltk.word_tokenize(sentence.lower())]

## Start Predicting

In [12]:


def predict_replay(comment_sentence):
    comment_sentence = sentence_to_seq(comment_sentence, source_vocab_to_int)

    loaded_graph = tf.Graph()
    with tf.Session(graph=loaded_graph) as sess:
        # Load saved model
        loader = tf.train.import_meta_graph(load_path + '.meta')
        loader.restore(sess, load_path)

        input_data = loaded_graph.get_tensor_by_name('input:0')
        logits = loaded_graph.get_tensor_by_name('predictions:0')
        target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
        source_sequence_length = loaded_graph.get_tensor_by_name('source_sequence_length:0')
        keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')

        replay_logits = sess.run(logits, {input_data: [comment_sentence]*batch_size,
                                             target_sequence_length: [len(comment_sentence)*2]*batch_size,
                                             source_sequence_length: [len(comment_sentence)]*batch_size,
                                             keep_prob: 1.0})[0]

    print('  Comment Words: {}'.format([source_int_to_vocab[i] for i in comment_sentence]))
    print('  replay Words: {}'.format(" ".join([target_int_to_vocab[i] for i in replay_logits])))
    print('\n')

### Predict testset

In [13]:
source_path = 'data/train.from'
target_path = 'data/test.to'

In [14]:
# read file data
def load_data(path):
    ''' Function to read training and testing files
            *args:
                path: file path as string 
            *return:
                data: raw string text
    '''
    input_file = os.path.join(path)
    with open(input_file, 'r', encoding='utf-8') as f:
        data = f.read()
    return data

In [15]:
source_test = load_data(source_path)
target_test = load_data(source_path)

In [20]:
for sentance in source_test.split("\n")[100:105]:
    #print(sentance)
    predict_replay(sentance)