In [1]:
import tensorflow as tf
import numpy as np
import time

with open('anna.txt', 'r') as f:
    text=f.read()
    
vocab=sorted(set(text))
vocab2int={j:i for i, j in enumerate(vocab)}
int2vocab=dict(enumerate(vocab))
encoded=np.array([vocab2int[c] for c in text])

In [2]:
def get_batches(arr, batch_size, n_steps):
    
    char_per_batch=batch_size*n_steps
    n_batches=len(arr)//char_per_batch
    arr=arr[:n_batches*char_per_batch] #no. of elements to keep in array to get full batches
    arr=arr.reshape((batch_size, -1)) # reshaping linear array to N X M type
    
    for i in range(0, arr.shape[1], n_steps):
        x=arr[:, i:i+n_steps]
        y_tmp=arr[:, i+1:i+n_steps+1]
        y=np.zeros(x.shape, dtype=x.dtype)
        y[:, :y_tmp.shape[1]]=y_tmp
        yield x, y

batches=get_batches(encoded, 12, 3)
x,y=next(batches)
z,p=next(batches)
type(x)
    

numpy.ndarray

In [3]:
def build_inputs(batch_size, n_steps):
    inputs=tf.placeholder(tf.int32, [batch_size, n_steps])
    targets=tf.placeholder(tf.int32, [batch_size, n_steps])
    keep_prob=tf.placeholder(tf.float32)
    
    return inputs, targets, keep_prob

def build_lstm(lstm_size, n_layers, batch_size, keep_prob):
    
    def build_cell(lstm_size, keep_prob):
        lstm_cells=tf.contrib.rnn.BasicLSTMCell(lstm_size)
        dropOut=tf.contrib.rnn.DropoutWrapper(lstm_cells, output_keep_prob=keep_prob)
        
        return dropOut
    
    #stacks up multiple LSTM cells
    cells=tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(n_layers)])
    initial_state=cells.zero_state(batch_size, tf.float32)
    
    return cells, initial_state

In [4]:
def build_outputs(lstm_output, in_size, out_size):
    #in_size=size of lstm cells
    #out_size=size of o/p softmax layer
    
    seq_out=tf.concat(lstm_output, axis=1)#concatenate NXMXL (3D) to (MXN)XL(2D)
    x=tf.reshape(seq_out, [-1, in_size])#reshapes seq_out such that 4 each (MXN rows) a column of lstm output
                                         #(in_size) occurs
    
    #connect RNN output to softmax layers
    with tf.variable_scope('softmax'):
        softmax_w=tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.1))
        softmax_b=tf.Variable(tf.zeros(out_size))
        
    logits=tf.add(tf.matmul(x, softmax_w), softmax_b)
    out= tf.nn.softmax(logits, name='predicctions')
    
    return logits, out
                            

In [5]:
def build_loss(logits, targets, lstm_size, n_classes):
    y_onehot=tf.one_hot(targets, n_classes)
    y_reshaped=tf.reshape(y_onehot, logits.get_shape())
    
    loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped))
    return loss


def build_optm(loss, learn_rate, grad_clip):
    tvars=tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learn_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    return optimizer

def pickTop_n(pred, vocab_size, top_n=5):
    p=np.squeeze(pred)
    p[np.argsort(p)[:-top_n]]=0
    p=p/np.sum(p)
    char=np.random.choice(vocab_size, 1, p=p)[0]
    return char

In [6]:
class CharRNN:
    def __init__(self, num_classes, batch_size=64, num_step=50, lstm_size=128,
                num_layers=2, learning_rate=1e-3, grad_clip=5, sampling=False):
        
        if sampling==True:
            num_step=1
            batch_size=1
        else:
            num_step=num_step
            batch_size=batch_size
            
            
        tf.reset_default_graph()
        
        self.inputs, self.targets, self.keep_prob=build_inputs(batch_size, num_step)
        cell, self.initial_state=build_lstm(lstm_size,num_layers,batch_size, keep_prob)
        
        x_oneHot=tf.one_hot(self.inputs, num_classes)
        
        outputs, state= tf.nn.dynamic_rnn(cell, x_oneHot, initial_state=self.initial_state)
        self.final_state=state
        
        self.logits, self.prediction=build_outputs(outputs, lstm_size, num_classes)
        self.loss=build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optmizer=build_optm(self.loss, learning_rate, grad_clip)
        
   
        

In [7]:
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5         # Dro_enter__pout keep probability
num_classes=len(vocab)

In [8]:
epochs=20
print_every=50
save_every=200
model=CharRNN(num_classes, batch_size, num_steps, lstm_size, num_layers, learning_rate)
saver=tf.train.Saver(max_to_keep=100)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    counter=0
    for epoch in range(epochs):
        new_state=sess.run(model.initial_state)
        batch_loss=0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter+=1
            start=time.time()
            feed={model.inputs:x, 
                  model.targets:y, 
                  model.keep_prob:keep_prob,
                  model.initial_state:new_state}
            
            batch_loss, new_state, _= sess.run([model.loss, 
                                                model.final_state, 
                                                model.optmizer], feed_dict=feed)
            
            
            if (counter%print_every==0):
                end=time.time()
                print('Epoch: {}/{}... '.format(epoch+1, epochs),
                      'Training Step: {}... '.format(counter),
                      'Training loss: {:.4f}... '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))


            if(counter%save_every==0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))







Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Epoch: 1/20...  Training Step: 50...  Training loss: 3.1757...  0.3157 sec/batch
Epoch: 1/20...  Training Step: 100...  Training loss: 3.1016...  0.3139 sec/batch
Epoch: 1/20...  Training Step: 150...  Training loss: 2.7698...  0.3164 sec/batch
Epoch: 2/20...  Training Step: 200...  Training loss: 2.4531...  0.3134 sec/batch
Epoch: 2/20...  Training Step: 250...  Training loss: 2.3361...  0.3130 sec/batch
Epoch: 2/20...  Training Step: 300...  Training loss: 2.2126...  0.3119 sec/batch
Epoch: 2/20...  Training Step: 350...  Training loss: 2.1740...  0.3162 sec/batch
Epoch: 3/20...  Training Step: 400...  Training loss: 2.0461...  0.3163 sec/batch
Epoch: 3/20...  Training Step: 450...  Training loss: 1.9830...  0.3169 sec/batch
Epoch: 3/20...  Training Step: 500...  Training loss: 1.9207...  0.3163

In [8]:
def generateSamples(checkpoints, n_samples, lstm_size, vocab_size, prime='the '):
    samples=[i for i in prime]
    model=CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver=tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
        new_state=sess.run(model.initial_state)
        for c in prime:
            x=np.zeros((1,1))
            x[0]=vocab2int[c]
            feed={model.inputs:x,
                 model.keep_prob:1.0,
                 model.initial_state:new_state}
            pred, new_state=sess.run([model.prediction,model.final_state], feed_dict=feed)
            
        c=pickTop_n(pred, len(vocab))
        samples.append(int2vocab[c])
        
        for i in range(n_samples):
            x[0,0]=c
            feed={model.inputs:x,
                 model.keep_prob:1.0,
                 model.initial_state:new_state}
            pred, new_state=sess.run([model.prediction,model.final_state], feed_dict=feed)
            c=pickTop_n(pred, len(vocab))
            samples.append(int2vocab[c])
    
    return ''.join(samples)

In [9]:
checkpoints=tf.train.latest_checkpoint('checkpoints')
samples=generateSamples(checkpoints, 2000, lstm_size, len(vocab), prime='may')
print(samples)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

INFO:tensorflow:Restoring parameters from checkpoints/i3960_l512.ckpt
may told you in a meaning show, that sudeend hange to meet the
trouble with all his which he stepped on the posision of the peasantry.
He had thenesely to beach the superations of childhe had been already
satts.

"When? You could not live human in the form in the counhonach the
subjects
of the project will, what is it you was looking always be there.
Wheleres all the district asparronsed the mensaigs, is in
a complation and morning. He sent that, to do it wan a sort
with which they had been the face in the children, and she was already,
she saw that this would secret her an absamety for shale to that in the
sight. She hed out that her long coulded, try off any, though the
peanant fingers were, she higher he had been an evening a