In [28]:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib import rnn
import os
import time
import math
import numpy as np
import my_txtutils as txt
import sys

tf.set_random_seed(0)
tf.reset_default_graph()

SEQLEN = 30
BATCHSIZE = 20
ALPHABETSIZE = txt.ALPHASIZE # 98 characters in vocab being used
INTERNALSIZE = 128
NLAYERS = 1

keep_prob = 0.8



In [29]:
# Load Shakespeare Data 

shakedir = "shakespeare/*.txt"

codetext, valitext, bookranges = txt.read_data_files(shakedir, validation=True)

# My_txtuils.py

    # uses glob for pathnames
    # convert_from_alphabet() - convert to ASCII values
    

Loading file shakespeare/2kinghenryvi.txt
Loading file shakespeare/asyoulikeit.txt
Loading file shakespeare/hamlet.txt
Loading file shakespeare/kingjohn.txt
Loading file shakespeare/loverscomplaint.txt
Loading file shakespeare/merchantofvenice.txt
Loading file shakespeare/othello.txt
Loading file shakespeare/sonnets.txt
Loading file shakespeare/titusandronicus.txt
Loading file shakespeare/various.txt


In [30]:
# Some statistics

epoch_size = len(codetext) // BATCHSIZE * SEQLEN

txt.print_data_stats(len(codetext), len(valitext), epoch_size)

Training text size is 0.93MB with 139.61KB set aside for validation. There will be 1458420 batches per epoch


In [45]:
# Placeholders

    # URL: https://learningtensorflow.com/lesson4/
    
"""Placeholder: a variable that we will assign data to at a later date. 
It allows us to create our operations and build our computation
graph, without needing the data. 

In TensorFlow terminology, we then feed data into the graph
through these placeholders."""

# Define placeholders 

keep_prob_placeholder = tf.placeholder(tf.float32, name = 'keep_prob') # dropout param
batchsize = tf.placeholder(tf.int32, name = 'batchsize')

# inputs 

X = tf.placeholder(tf.uint8, [None, None], name = 'X')
Xo = tf.one_hot(X, ALPHABETSIZE, 1.0, 0.0)

# expected outputs = same sequence shifted by 1 since we are trying
# to predict next character

Y_ = tf.placeholder(tf.uint8, [None, None], name = 'Y_')
Yo_ = tf.one_hot(Y_, ALPHABETSIZE, 1.0, 0.0)

# Input state

Hin = tf.placeholder(tf.float32, [None, INTERNALSIZE*NLAYERS], name = 'Hin')

In [58]:
# using NLAYERS of GRU cells, unrolled SEQLEN=30 times
# dynamic_rnn infers SEQLEN from the size of the inputs Xo

cells = [rnn.GRUCell(INTERNALSIZE) for _ in range(NLAYERS)]

# "naive dropout" implementaiton
dropcells = [rnn.DropoutWrapper(cell, input_keep_prob=keep_prob_placeholder) for cell in cells]

multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False) 

multicell = rnn.DropoutWrapper(multicell,
                               output_keep_prob = keep_prob_placeholder)
                                #dropout for the softmax layer

Yr, H = tf.nn.dynamic_rnn(multicell, Xo, dtype=tf.float32, initial_state = Hin)



In [59]:
# Softmax layer implementation
# Flatten the first two dimension of the output
    # [BATCHSIZE, SEQLEN, ALPHABETSIZE] => [ BATCHSIZE*SEQLEN, ALPHASIZE]
# then apply softmax laye.
    # Weights and biases are shared across unrolled time steps
    
W = tf.Variable(tf.random_normal([INTERNALSIZE, ALPHABETSIZE]))
B = tf.Variable(tf.random_normal([ALPHABETSIZE]))

Yflat = tf.reshape(Yr, [-1, INTERNALSIZE]) # [BATCHSIZE*SEQLEN, INTERNALSIZE]
Ylogits = tf.matmul(Yflat, W) + B # [BATCHSIZE*SEQLEN, ALPHASIZE]

Yflat = tf.reshape(Yo_, [-1, ALPHABETSIZE])

loss = tf.nn.softmax_cross_entropy_with_logits(logits = Ylogits, labels = Yflat)
loss = tf.reshape(loss, [batchsize, -1])

Yo = tf.nn.softmax(Ylogits, name = 'Yo')
Y = tf.argmax(Yo, 1)
Y = tf.reshape(Y, [batchsize, -1], name = "Y")

train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

In [60]:
# Stats for display

seqloss = tf.reduce_mean(loss,1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))

# Proress bar

DISPLAY_FREQ = 50
_50_BATCHES = DISPLAY_FREQ * BATCHSIZE * SEQLEN

progress = txt.Progress(DISPLAY_FREQ, size = 111+2,
                         msg = "Training on next" + str(DISPLAY_FREQ) +"batches")

In [61]:
# init

istate = np.zeros([BATCHSIZE, INTERNALSIZE * NLAYERS])

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
step = 0



In [64]:
# training loop

for x, y_, epoch in txt.rnn_minibatch_sequencer(codetext, BATCHSIZE,
                                               SEQLEN, nb_epochs =2):
    
    # train on one minibatch
    
    feed_dict = {X: x, Y_: y_, Hin: istate, keep_prob_placeholder: keep_prob, batchsize: BATCHSIZE}

    _, y, ostate = sess.run([train_step, Y, H], feed_dict=feed_dict)
    
    # display a short text generated with current weights and biases
    
    if step // 3 % _50_BATCHES == 0:
        print
        print "Generating some random text..."
        
        ry = np.array([[txt.convert_from_alphabet(ord("K"))]])
        rh = np.zeros([1, INTERNALSIZE*NLAYERS])
        
        for k in range(1000):
            ryo, rh = sess.run([Yo, H], feed_dict = {X: ry,
                                                     keep_prob_placeholder: 1.0,
                                                    Hin: rh, batchsize: 1})
            
            rc = txt.sample_from_probabilities(ryo, topn=10 if epoch <= 1 else 2)
            
            sys.stdout.write(chr(txt.convert_to_alphabet(rc)))
            
            ry = np.array([[rc]])
            
        print
        print "FINISHED GENERATING RANDOM TEXT"
        print
        
    # display progress bar
    progress.step(reset = step % _50_BATCHES == 0)
    
    # loop state around
    istate = ostate
    step += BATCHSIZE * SEQLEN
    
print 
print "----FINISHED TRAINING"
            
            

0%                                         Training on next50batches                                         100%
0%                                         Training on next50batches                                         100%
Generating some random text...
osT oo mirenande higleaver morserstlen, ho titsit hor oourele sheare thy oue wang owaldalinors.

THeer this thesellatr thout our me an tlis wnd hithr ool ther how day howsthangordie ted mars, myorr to th r mesime aysuray.

OAnI mind sheslere, thear old the brathes. hie show, thet woure hive at he tlisoungt aser ase, ingear, aner the dies and sa litr hom on tured
	Wous ord aritirit.

AUSSIIA	O	Art som me heallsa and hand some ie seda andiglligin teere tingher womestis, fath wishy sourled if merart, ar tly sot of an ad ane at onaso ros.
	Lart at tht bay sallited ain ter chavellomar aderow art, my ar is, hourede hir at ime and ang t he s airle tur teee mon eryourdooull sampe lerd wary or sowhthe deighre or ie. he weratlig dad shathiss

	That shied stotles ne wall brinestligen
	Wo lee of henomathance mpradies fes than tot ast hene os a n t mat there, spore hal byee trase the s ar is totluthie s mand head, hremsen ling or ane and no thel sot ligherin, bess,nothoud dus sa t ardind nowr h nge in  ast aut ar moresith werd. on the melt, and lors heses ine,
	Bart sar tee, Itr
FINISHED GENERATING RANDOM TEXT

()
0%                                         Training on next50batches                                         100%
0%                                         Training on next50batches                                         100%
0%                                         Training on next50batches                                         100%
Generating some random text...
 monsther terattre te thor wothe bong,

	Sour thown d wher of ithen ins on, thaus.

SERLENCO	Be me me dy. I my thongistinot thist saved sadiord.

DES AERIN	Th my an you mesco dies, be tord, worte wriss, fuld hor wos  heamedard and ollithice is butt, i

SATENO	A a thim sary swis thant here, not me hir duce fieen sedran alice;
	Tie tout a lord anteso ardellie,
	Tis de liede felo tre sere, way and thin the cand,
	Houglle beld mon io seiven and, ave lie hat areas ias froce siee.
	CERe, not have and,
	Thay so m shall wiled on this hal stice selow,s noth oft of hise humerd on the fotrd ingsed
	Hes an ouedabl stiof the parsoulf to maie,
	Sonder have net bet, weth, bers s and in thromed mens howathot do shis ain hang das in will, an  o lert herd nom an savill nit henold no se in tot sting hut,
	And hat the cormale timy,
	A wir to than brigo hin whur, surencincedily ath inteant
	Foredomeron wir tea mone have tielinst,
	Aad, wat thteed, it than m thers heven ous, must bllich.

SEMNARDIANO CETIUINA Tim  IANG beat heas, thy chuse, ind, will and of liventer isiofe is ane,
	Moding a thas onese mink, the dee sofess ario,
	Ather her chonear,
	Mash of sion an what my hravengen oughis,
	As dred thee istit haveng wis and herd
FINISHED GENERATING RANDOM

0%                                         Training on next50batches                                         100%
0%                                         Training on next50batches                                         100%
Generating some random text...
No Grow;
For ar woll;
When e to dive mance ofe here and thear hen, and to hee.

	I GORIA	Tue mestad.


AMONS	And show may you with?

ACLIA	I wildong a dong ty sund
	And my lire, stand ber ar he to blest
	And it, wheminter ir io thue thes fourd atam.

	[Rexet to sa maks
HeMaris thoth the mongot with mures

And spil to s are shat ie ffilen

	Thay, word there this this aneenofe tore.

DODE	Or tis thos ffietones there have that fir thear bre frithay
	Aldens thy wirhtree.

HUMLET	To hene thee bet tead tee.


CETSA	The kistsemole ba sanot:
	In mome biss then hath reath



LONCRA	To tont her me,
	Andy mather wishounse as wase stotheroune.

	[Exit Ex]








	An                                   lir a  hen mine,

	A ngre the aspir oo herr

	A dund is soo de bid st do by wither wooks astit,
	Sperees st an mor is in amys my fare.

B[Sant]

	Whel the tortiong, is fort tinsed, I merstreee.

COSIA	And the hordsen,
	Iants all be owes ar ir sole sam.

LART	An    am hame bed dest wass is boulds
	Whrchionit im of the sord
	for mith do tiere and frt hil ds frre whel

	And be the pesiou madenthen

IA ther cenouly mo to beereat aits in hathess
	And if my lam salf and the lrenck
	The proods in m
FINISHED GENERATING RANDOM TEXT

()
0%                                         Training on next50batches                                         100%
0%                                         Training on next50batches                                         100%
0%                                         Training on next50batches                                         100%
Generating some random text...
oins, to to night
	Ad streine fay sase solat,
	Thirkither soors muse thould.

ORADONIPO	When the mort hill the fortother brealiens hir the 