In [12]:
import tensorflow as tf
import numpy as np

#set hyperparameters
max_len = 40
step = 2
num_units = 128
learning_rate = 0.001
batch_size = 200
epoch = 30
temperature = 0.5

In [13]:
def read_data(file_name):
    '''
     open and read text file
    '''
    text = open(file_name, 'r').read()
    return text.lower()

In [14]:
def featurize(text):
    '''
     featurize the text to train and target dataset
    '''
    unique_chars = list(set(text))
    len_unique_chars = len(unique_chars)

    input_chars = []
    output_char = []

    for i in range(0, len(text) - max_len, step):
        input_chars.append(text[i:i+max_len])
        output_char.append(text[i+max_len])

    train_data = np.zeros((len(input_chars), max_len, len_unique_chars))
    target_data = np.zeros((len(input_chars), len_unique_chars))

    for i , each in enumerate(input_chars):
        for j, char in enumerate(each):
            train_data[i, j, unique_chars.index(char)] = 1
        target_data[i, unique_chars.index(output_char[i])] = 1
    return train_data, target_data, unique_chars, len_unique_chars


In [15]:
def rnn(x, weight, bias, len_unique_chars):
    '''
     define rnn cell and prediction
    '''
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, len_unique_chars])
    x = tf.split(x, max_len, 0)

    cell = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0)
    outputs, states = tf.contrib.rnn.static_rnn(cell, x, dtype=tf.float32)
    prediction = tf.matmul(outputs[-1], weight) + bias
    return prediction

In [16]:
def sample(predicted):
    '''
     helper function to sample an index from a probability array
    '''
    exp_predicted = np.exp(predicted/temperature)
    predicted = exp_predicted / np.sum(exp_predicted)
    probabilities = np.random.multinomial(1, predicted, 1)
    return probabilities


In [17]:
def run(train_data, target_data, unique_chars, len_unique_chars):
    '''
     main run function
    '''
    x = tf.placeholder("float", [None, max_len, len_unique_chars])
    y = tf.placeholder("float", [None, len_unique_chars])
    weight = tf.Variable(tf.random_normal([num_units, len_unique_chars]))
    bias = tf.Variable(tf.random_normal([len_unique_chars]))

    prediction = rnn(x, weight, bias, len_unique_chars)
    softmax = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)
    cost = tf.reduce_mean(softmax)
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

    init_op = tf.global_variables_initializer()
    saver = tf.train.Saver()    
    sess = tf.Session()
    sess.run(init_op)

    num_batches = int(len(train_data)/batch_size)

    for i in range(epoch):
        print "----------- Epoch {0}/{1} -----------".format(i+1, epoch)
        count = 0
        for _ in range(num_batches):
            train_batch, target_batch = train_data[count:count+batch_size], target_data[count:count+batch_size]
            count += batch_size
            sess.run([optimizer] ,feed_dict={x:train_batch, y:target_batch})
        #get on of training set as seed
        seed = train_batch[:1:]
#         save_path = saver.save(sess, "./tmp/rnn_model" + str(i) + ".ckpt")
        save_path = saver.save(sess, "./tmp/p_and_p/rnn_model" + str(i) + ".ckpt")

        #to print the seed 40 characters
        seed_chars = ''
        for each in seed[0]:
                seed_chars += unique_chars[np.where(each == max(each))[0][0]]
        print "Seed:", seed_chars

        #predict next 1000 characters
        for i in range(1000):
            if i > 0:
                remove_fist_char = seed[:,1:,:]
                seed = np.append(remove_fist_char, np.reshape(probabilities, [1, 1, len_unique_chars]), axis=1)
            predicted = sess.run([prediction], feed_dict = {x:seed})
            predicted = np.asarray(predicted[0]).astype('float64')[0]
            probabilities = sample(predicted)
            predicted_chars = unique_chars[np.argmax(probabilities)]
            seed_chars += predicted_chars
        print 'Result:', seed_chars
    sess.close()


In [18]:
def convert_to_seed(text, unique_chars):
    len_uniques = len(unique_chars)
    letters = []
    for i in text:
        letter = np.zeros((1, len_uniques))
        for j in range(len(unique_chars)):
            if i == unique_chars[j]:
                letter[0, j] = 1.
        letters.append(letter)
    return np.array([np.vstack(letters)])
    

In [19]:
def restore_model():
    text = read_data('p_and_p.txt')
#     text = read_data('shakespeare.txt')
    train_data, target_data, unique_chars, len_unique_chars = featurize(text)
    x = tf.placeholder("float", [None, max_len, len_unique_chars])
    y = tf.placeholder("float", [None, len_unique_chars])
    weight = tf.Variable(tf.random_normal([num_units, len_unique_chars]))
    bias = tf.Variable(tf.random_normal([len_unique_chars]))

    prediction = rnn(x, weight, bias, len_unique_chars)
    softmax = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)
    cost = tf.reduce_mean(softmax)
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

    init_op = tf.global_variables_initializer()
    saver = tf.train.Saver()    
    sess = tf.Session()
    sess.run(init_op)
    saver.restore(sess, "./tmp/rnn_model1.ckpt")
    
    return unique_chars, prediction, sess, x, len_unique_chars

In [20]:
def predict(text, unique_chars, prediction, sess, x, len_unique_chars):
    
    text_head = ''
    pad_head = 0
    if len(text) > 40:
        text_head = text[:-40]        
        text = text[-40:]
    elif len(text) < 40:
        pad_head  = 40 - len(text)
        text = ' ' * pad_head + text
        
    seed = convert_to_seed(text, unique_chars)
    #to print the seed 40 characters
    seed_chars = ''
    for each in seed[0]:
            seed_chars += unique_chars[np.where(each == max(each))[0][0]]
    print "Seed:", seed_chars

    
    #predict next 1000 characters
    for i in range(50):
        if i > 0:
            remove_fist_char = seed[:,1:,:]
            seed = np.append(remove_fist_char, 
                             np.reshape(probabilities, [1, 1, len_unique_chars]), axis=1)
        predicted = sess.run([prediction], feed_dict = {x:seed})
        predicted = np.asarray(predicted[0]).astype('float64')[0]
        probabilities = sample(predicted)
        predicted_chars = unique_chars[np.argmax(probabilities)]
        seed_chars += predicted_chars
        if predicted_chars == '.':
            return (text_head + seed_chars)[pad_head:]

    

In [21]:
# unique_chars, predictions, sess, x, len_unique_chars = restore_model()
# predict('out fane with you ', unique_chars, predictions, sess, x, len_unique_chars)

In [22]:
# text = read_data('shakespeare.txt')
# train_data, target_data, unique_chars, len_unique_chars = featurize(text)
# run(train_data, target_data, unique_chars, len_unique_chars)

In [23]:
# text = read_data('shakespeare.txt')
# train_data, target_data, unique_chars, len_unique_chars = featurize(text)
# train_batch, target_batch = train_data[1:5], target_data[1:5]
# seed = train_batch[:1:]
# print seed

In [24]:
# np.zeros((1, 5))

In [25]:
# def convert_to_seed(text, unique_chars):
#     len_uniques = len(unique_chars)
#     letters = []
#     for i in text:
#         letter = np.zeros((1, len_uniques))
#         for j in range(len(unique_chars)):
#             if i == unique_chars[j]:
#                 letter[0, j] = 1.
#         letters.append(letter)
#     return np.array([np.vstack(letters)])
    

In [26]:
# convert_to_seed('hello', unique_chars).shape

In [27]:
text = read_data('p_and_p.txt')
train_data, target_data, unique_chars, len_unique_chars = featurize(text)
run(train_data, target_data, unique_chars, len_unique_chars)


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

----------- Epoch 1/30 -----------
Seed:  a little further resistance on the part
Result:  a little further resistance on the partinel that bot har suster and wich of her bet in the wish as the her ting in the perting the
se on the pronted with the the prestered for the wener her enor sien the macthing the sersteren the the suth the the the frectons of her peling, ho her of her apppesse to the nors will to suas will in wellige sher were to siss preassine, and net her peressed his ot in the rever her wiss mish of her assint was sis the wast in every assen to her cantonten of her to then of her at and her son wish pitton an the fer. she shat was of sering the resiented the frome sent of to ghe prise to the her as the percementing the ser and what her was not re ther in the the was the pessed her seany 

Seed:  a little further resistance on the part
Result:  a little further resistance on the parter of her attention which had never the propes of the sister, of the lives with a miss bennet had she supposting the stantions. she is on the with the connection of her talking of her father that the fame of her that her two free him the perfect of her speak and frever than she hoped the proper of the last in the coust marriage her favour to any of the family before the parteness to leave to her lively perfectly and to mary sent, and merely by the propes of their will spoke of the provose of the propering to merety which her way. i do your really not on with the prositions, and any accept and such a proper of his acquaintance of i can of her family in her family was very of her prevent the place of the place really for her anxious to her sister resolute and some of the propose she convinced to be the supposing where to her refeither all the recopes. i shad ter miss bennet, with a more a visit

Seed:  a little further resistance on the part
Result:  a little further resistance on the particular and her mother is not love really the most alraid
of the actions. the expression of his instance of a was affection for the place to the family as each other with miss darcy's thought as her that for you will be expected her engagement her the being the occasions, the party as to receive and the assurance of her with repented in the conversation of the misfortuness of her sisters in the particular in her was never taken where she was
expressing her the entaring to sisters as the party of her father that her she could not by the particulars as the particular promose, and a was affected to me, and her favourite the place the earliest and
was affected. mrs. bennet was the particulars of the present to the librer was merely the more of many with him in to the destrance of her all marry and aunt, and the more said the was with the subject. it will not be exactly entreation and all the wish


Seed:  a little further resistance on the part
Result:  a little further resistance on the particular of her lately and deren a very little proper misery, and what darcy make him, and did you will addrace the confusions man as they were to make her the particular to her mother,
what do you like the present sincere with the confusion, i shall general relations, and not the man who was merely prevented on the first part of the ladies was of their own dear aunt, and probable to all the conversation as her confusion, and she said the proper place of the letter as the vight, that her sister with the possible of the beging of the subject of her life to instant and any person who ever spoke her man any of the presence was sitting the two should be in the name of her friend her sister than perhaps in a great deal mistaken a the other manner at least than the part of the proper more any self-inther in the wild in the places to the contempt or my power to make her friend had been really a genera