In [11]:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib import rnn
import os
import time
import math
import numpy as np
import my_txtutils as txt
import sys

tf.set_random_seed(0)
tf.reset_default_graph()

SEQLEN = 30
BATCHSIZE = 20
ALPHABETSIZE = txt.ALPHASIZE # 98 characters in vocab being used
INTERNALSIZE = 128
NLAYERS = 1

keep_prob = 0.8



In [24]:
# Load Shakespeare Data 

shakedir = "shakespeare/*.txt"

codetext, valitext, bookranges = txt.read_data_files(shakedir, validation=True)

# My_txtuils.py

    # uses glob for pathnames
    # convert_from_alphabet() - convert to ASCII values
    

Loading file shakespeare/2kinghenryvi.txt
Loading file shakespeare/asyoulikeit.txt
Loading file shakespeare/hamlet.txt
Loading file shakespeare/kingjohn.txt
Loading file shakespeare/loverscomplaint.txt
Loading file shakespeare/merchantofvenice.txt
Loading file shakespeare/othello.txt
Loading file shakespeare/sonnets.txt
Loading file shakespeare/titusandronicus.txt
Loading file shakespeare/various.txt


In [27]:
# Some statistics

epoch_size = len(codetext) // BATCHSIZE * SEQLEN

txt.print_data_stats(len(codetext), len(valitext), epoch_size)

Training text size is 0.93MB with 139.61KB set aside for validation. There will be 1458420 batches per epoch


In [None]:
# Placeholders

    # URL: https://learningtensorflow.com/lesson4/
    
    """Placeholder: a variable that we will assign data to at a later date. 
    It allows us to create our operations and build our computation
    graph, without needing the data. 
    
    In TensorFlow terminology, we then feed data into the graph
    through these placeholders.""""

keep_prob_placeholder(tf.float32, name = 'keep_prob') # dropout param
batchsoze = tf.placeholder(tf.int32, name = 'batchsize')

# inputs 

X = tf.placeholder(tf.uint8, [None, None], name = 'X')
Xo = tf.one_hot(X, ALPHABETSIZE, 1.0, 0.0)

# expected outputs = same sequence shifted by 1 since we are trying
# to predict next character

Y_ = tf.placeholder(tf.uint8, [None, None], name = 'Y_')
Yo_ = tf.one_hot(Y_, ALPHABETSIZE, 1.0, 0.0)

# Input state

Hin = tf.placeholder(tf.float32, [None, INTERNALSIZE*NLAYERS], name = 'Hin')

In [None]:
# using NLAYERS of GRU cells, unrolled SEQLEN=30 times
# dynamic_rnn infers SEQLEN from the size of the inputs Xo

cells = [rnn.GRUCell(INTERNALSIZE) for _ in range(NLAYERS)]

# "naive dropout" implementaiton
dropcells = [rnn.DropoutWrapper(cell, input_keep_prob=keep_prob_placeholder) for cell in cells]

multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)

#dropout for the softmax layer

multicell = rnn.DropoutWrapper(multicell, output_keep_prob = keep_prob_placeholder)

Yr, H = tf.nn.dynamic_rnn(cell, Xo, dype=tf.float32, initial_state = Hin)



In [None]:
# Softma layer implementation
# Flatten the first two dimension of the output
    # [BATCHSIZE, SEQLEN, ALPHABETSIZE] => [ BATCHSIZE*SEQLEN, ALPHASIZE]
# then apply softmax laye.
    # Weights and biases are shared across unrolled time steps
    
W = tf.Variable(tf.random_normal([INTERNALSIZE, ALPHABETSIZE]))
B = tf.Variable(tf.random_normal([ALPHABETSIZE]))

Yflat = tf.reshape(Yr, [-1, INTERNALSIZE]) # [BATCHSIZE*SEQLEN, INTERNALSIZE]
Ylogits = tf.matmul(Yflat, W) + B # [BATCHSIZE*SEQLEN, ALPHASIZE]

Yflat = tf.reshape(Yo_, [-1, ALPHABETSIZE])

loss = tf.nn.softmax_cross_entropy_with_logits(logits = Ylogits, labels = Yflat)
loss = tf.reshape(loss, [BATCHSIZE, -1])

Yo = tf.nn.softmax(Ylogits, name = 'Yo')
Y = tf.argmax(Yo, 1)
Y = tf.reshape(Y, [BATCHSIZE, -1], name = "Y")

train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

In [None]:
# Stats for display

seqloss = tf.reduce_mean(loss,1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))

# Proress bar

DISPLAY_FREQ = 50
_50_BATCHES = DISPLAY_FREQ * BATCHSIZE * SEQLEN

progress = text.Progress(DISPLAY_FREQ, size = 111+2,
                         msg = "Training on next" + str(DISPLAY_FREQ) +"batches")

In [None]:
# init

istate = np.zeroes([BATCHSIZE, INTERNALSIZE * NLAYERS])

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
step = 0

