In [None]:
import os
import tensorflow as tf
import numpy as np
import collections

tf.compat.v1.disable_eager_execution()

In [None]:
filenames = [format(i, '03d') + '.txt' for i in range(1, 101)]
dir_name = 'stories'

def read_data(filename):
    with open(filename) as f:
        data =  tf.compat.as_str(f.read())
        data = data.lower()
        data = list(data)
    return data

global documents
documents = []
num_files = 100
for i in range(num_files):
    print("processing file %s" % os.path.join(dir_name, filenames[i]))
    chars = read_data(os.path.join(dir_name, filenames[i]))

    # break into bigrams
    two_grams = [''.join(chars[ch_i:ch_i+2]) for ch_i in range(0, len(chars)-2, 2)]
    # Create document
    documents.append(two_grams)
    print("Data size (chars) (document %d) %d" % (i, len(two_grams)))
    print("Sample string %s\n" % (two_grams[:50]))

In [None]:
def build_dataset(documents):
    chars = []
    # list of lists
    data_list = []

    for d in documents:
        chars.extend(d)
    print('%d character found.' % len(chars))

    count = []
    # bigrams sorted by their frequency
    count.extend(collections.Counter(chars).most_common())

    # Create dict map word to id by given the current length of the dictionary
    # UNK is for two rare word
    dictionary = dict({'UNK': 0})
    for char, c in count:
        # Only add if its frequency is more than 10
        if c > 10:
            dictionary[char] = len(dictionary)
    unk_count = 0
    # replace word with id of word
    for d in documents:
        data = list()
        for char in d:
            # if word in dictionary use the id of word
            # otherwise use id of UNK
            if char in dictionary:
                index = dictionary[char]
            else:
                index = dictionary['UNK']
                unk_count += 1
            data.append(index)
        data_list.append(data)

    # dict map id to word
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data_list, count, dictionary, reverse_dictionary

data_list, count, dictionary, reverse_dictionary = build_dataset(documents)
print('Most common words (+UNK)', count[:5])
print('Least common words (+UNK)', count[-15:])
print('Sample data', data_list[0][:10])
print('Sample data', data_list[1][:10])
print('Vocabulary: ',len(dictionary))
vocabulary_size = len(dictionary)
del documents  # To reduce memory.

In [None]:
num_nodes = 128

batch_size = 64

# the number os time steps used in truncated BPTT
num_unrolling = 50

# regular
dropout = .0

In [None]:

train_inputs, train_labels = [], []
# Unroll training inputs
for ui in range(num_unrolling):
    train_inputs.append(tf.compat.v1.placeholder(tf.float32, shape=(batch_size, vocabulary_size),
                                                 name='train_inputs_%d' % ui))
    train_labels.append(tf.compat.v1.placeholder(tf.float32, shape=(batch_size, vocabulary_size),
                                                 name='train_label_%d' % ui))

# Validation data
valid_inputs = tf.compat.v1.placeholder(tf.float32, shape=(1, vocabulary_size), name='valid_inputs')
valid_labels = tf.compat.v1.placeholder(tf.float32, shape=(1, vocabulary_size), name='valid_labels')

# Test data
test_input = tf.compat.v1.placeholder(tf.float32, shape=(1, vocabulary_size), name='test_input')


# Input gate - How much memory to write to cell state
# connects current input to the input gate
ix = tf.Variable(tf.compat.v1.truncated_normal(shape=(vocabulary_size, num_nodes), stddev=.02))
# connects the previous hidden state to the input gate
im = tf.Variable(tf.compat.v1.truncated_normal((num_nodes, num_nodes), stddev=.02))
# bias of input gate
ib = tf.Variable(tf.compat.v1.random_uniform((1, num_nodes),-0.02, 0.02))

# Forget gate - how much memory to discard from cell state
# connect current input to he forget gate
fx = tf.Variable(tf.compat.v1.truncated_normal((vocabulary_size, num_nodes), stddev=.02))
# connect the previous hidden state to the forget gate
fm = tf.Variable(tf.random.truncated_normal((num_nodes, num_nodes), stddev=0.02))
# bias of forget gate
fb = tf.Variable(tf.random.uniform(shape=(1, num_nodes), minval=-0.02, maxval=0.02))

# Candidate - compute the current cell state
# connect current input to candidate
cx = tf.Variable(tf.random.truncated_normal((vocabulary_size, num_nodes), stddev=0.02))
# connect previous hidden state to the candidate
cm = tf.Variable(tf.random.truncated_normal((num_nodes, num_nodes), stddev=0.02))
# bias of candidate
cb = tf.Variable(tf.random.uniform((1, num_nodes), minval=-0.02, maxval=0.02))

# Output gate - how much memory to output from cell state
ox = tf.Variable(tf.random.truncated_normal((vocabulary_size, num_nodes), stddev=.02))
om = tf.Variable(tf.random.truncated_normal((num_nodes, num_nodes), stddev=.02))
ob = tf.Variable(tf.random.uniform((1, num_nodes), minval=-0.02, maxval=0.02))

# Softmax classifier weights and biases
w = tf.Variable(tf.random.truncated_normal((num_nodes, vocabulary_size), stddev=0.02))
b = tf.Variable(tf.random.uniform((vocabulary_size,), minval=-0.02, maxval=0.02))

saved_output = tf.Variable(tf.zeros((batch_size, num_nodes)), trainable=False, name="train_hidden")
# cell state
saved_state = tf.Variable(tf.zeros((batch_size, num_nodes)), trainable=False, name="train_cell")

# Variables for validation
saved_valid_output = tf.Variable(tf.zeros((1, num_nodes)), trainable=False, name='valid_hidden')
saved_valid_state = tf.Variable(tf.zeros((1, num_nodes)), trainable=False, name='valid_cell')

# Variables for testing
saved_test_output = tf.Variable(tf.zeros((1, num_nodes)), trainable=False, name="test_hidden")
saved_test_state = tf.Variable(tf.zeros((1, num_nodes)), trainable=False, name='test_cell')


In [None]:
def lstm_cell(i, o, state):
    """

    :param i: input
    :param o: output from previous cell or hidden state
    :param state: the previous cell state

    forget_gate = σ(Wf · concat(h_t-1, x_t) + bf)
    input_gate = σ(Wi · concat(h_t-1, x_t) + bi)
    candidate = tanh(Wc · concat(h_t-1, x_t) + bc)
    cell_state = forget_gate * previous_cell_state + input_gate * candidate
    output_gate = σ(Wo · concat(h_t-1, x_t) + bo)
    hidden_state = output_gate * tanh(cell_state)
    :return:
    """
    input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
    forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
    candidate_update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb)
    state = forget_gate * state + input_gate * candidate_update
    output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
    return output_gate * tf.tanh(state), state


In [None]:
test_output, test_state = lstm_cell(
    test_input, saved_test_output, saved_test_state
)
# Make sure state are updated
with tf.control_dependencies([saved_test_output.assign(test_output),
                              saved_test_state.assign(test_state)]):
    test_prediction = tf.nn.softmax(tf.matmul(test_output, w) + b)

reset_test_state = tf.group(saved_test_output.assign((tf.random.normal((1, num_nodes), stddev=.05))),
                            saved_test_state.assign((tf.random.normal((1, num_nodes), stddev=.05))))


In [None]:
def sample(distribution):
    """
    Greedy Sampling
    Pick the three best prob given by LSTM and sample one
    of them with very high prob of pick the best one
    :param distribution:
    :return:
    """

    best_indices = np.argsort(distribution)[-3:]
    best_probs = distribution[best_indices] / np.sum(distribution[best_indices])
    best_idx = np.random.choice(best_indices, p=best_probs)

    return best_idx


In [None]:

saver = tf.compat.v1.train.Saver()

segments_to_generate = 1
chars_in_segment = 500
new_session = tf.compat.v1.InteractiveSession()

saver.restore(new_session, "././my_model/my_saved_variable-20")

for _ in range(segments_to_generate):
            print("="*24 + "New text" + "="*24 )

            # start with random word
            test_word = np.zeros((1, vocabulary_size), dtype=np.float32)
            rand_doc = data_list[np.random.randint(0, num_files)]
            test_word[0, rand_doc[np.random.randint(0, len(rand_doc))]] = 1.0
            print(reverse_dictionary[np.argmax(test_word[0])], end='')

            # Generating words by feeding the previous prediction
            # as current input in a recursive manner
            for _ in range(chars_in_segment):
                sample_pred = new_session.run(test_prediction, feed_dict={test_input: test_word})
                next_ind = sample(sample_pred.ravel())
                test_word = np.zeros((1, vocabulary_size), dtype=np.float32)
                test_word[0, next_ind] = 1.0
                print(reverse_dictionary[next_ind], end='')

            # Reset train state
            new_session.run(reset_test_state)
            print("-" * 28)
