In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
tf.test.gpu_device_name()

'/device:GPU:0'

In [0]:
## Reading and processing text

with open('Shakespeare.txt', 'r', encoding='utf-8') as f: 
    text=f.read()

In [3]:
chars = set(text)
print(len(text))

178707


In [0]:
char2int = {ch:i for i,ch in enumerate(chars)}
int2char = dict(enumerate(chars))
text_ints = np.array([char2int[ch] for ch in text], 
                     dtype=np.int32)

In [6]:
def reshape_data(sequence, batch_size, num_steps):
    mini_batch_length = batch_size * num_steps
    num_batches = int(len(sequence) / mini_batch_length)
    print(num_batches)
    if num_batches*mini_batch_length + 1 > len(sequence):
        num_batches = num_batches - 1
    
    ## Truncate the sequence at the end to get rid of 
    ## remaining charcaters that do not make a full batch
    x = sequence[0 : num_batches*mini_batch_length]
    y = sequence[1 : num_batches*mini_batch_length + 1]
    
    ## Split x & y into a list batches of sequences: 
    x_batch_splits = np.split(x, batch_size)
    y_batch_splits = np.split(y, batch_size)
 
    
    ## Stack the batches together
    ## batch_size x mini_batch_length
    x = np.stack(x_batch_splits)
    y = np.stack(y_batch_splits)
    
    return x, y

## Testing:
train_x, train_y = reshape_data(text_ints, 64, 10)
print(train_x.shape)
print(train_x[0, :10])
print(train_y[0, :10])
print(''.join(int2char[i] for i in train_x[0, :50]))

279
(64, 2790)
[33 33 33 67 88 14 76 64 35 77]
[33 33 67 88 14 76 64 35 77  2]
***The Project Gutenberg's Etext of Shakespeare's 


In [9]:
np.random.seed(123)

def create_batch_generator(data_x, data_y, num_steps):
    batch_size, tot_batch_length = data_x.shape    
    num_batches = int(tot_batch_length/num_steps)

    for b in range(num_batches):
        yield (data_x[:, b*num_steps: (b+1)*num_steps], 
               data_y[:, b*num_steps: (b+1)*num_steps])
        
bgen = create_batch_generator(train_x[:,:100], train_y[:,:100], 15)
for b in bgen:
    print(b[0].shape, b[1].shape, end='  ')
    print(''.join(int2char[i] for i in b[0][0,:]).replace('\n', '*'), '    ',
          ''.join(int2char[i] for i in b[1][0,:]).replace('\n', '*'))

(64, 15) (64, 15)  ***The Project       **The Project G
(64, 15) (64, 15)  Gutenberg's Ete      utenberg's Etex
(64, 15) (64, 15)  xt of Shakespea      t of Shakespear
(64, 15) (64, 15)  re's First Foli      e's First Folio
(64, 15) (64, 15)  o**************      ***************
(64, 15) (64, 15)  ***********The       **********The T


In [0]:
class CharRNN(object):
  
    def __init__(self, num_classes, batch_size=64, 
                 num_steps=100, lstm_size=128, 
                 num_layers=2, learning_rate=0.001, 
                 keep_prob=0.5, grad_clip=5, 
                 sampling=False):
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.lstm_size = lstm_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.keep_prob = keep_prob
        self.grad_clip = grad_clip
        
        self.g = tf.Graph()
        with self.g.as_default():
            tf.set_random_seed(123)

            self.build(sampling=sampling)
            self.saver = tf.train.Saver()
            self.init_op = tf.global_variables_initializer()
    
    
    def build(self, sampling):
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size = self.batch_size
            num_steps = self.num_steps

        tf_x = tf.placeholder(tf.int32, 
                              shape=[batch_size, num_steps], 
                              name='tf_x')
        tf_y = tf.placeholder(tf.int32, 
                              shape=[batch_size, num_steps], 
                              name='tf_y')
        tf_keepprob = tf.placeholder(tf.float32, 
                              name='tf_keepprob')

        # One-hot encoding:
        x_onehot = tf.one_hot(tf_x, depth=self.num_classes)
        y_onehot = tf.one_hot(tf_y, depth=self.num_classes)
        
        ### Build the multi-layer RNN cells
        cells = tf.contrib.rnn.MultiRNNCell(
            [tf.contrib.rnn.DropoutWrapper(
                tf.contrib.rnn.BasicLSTMCell(self.lstm_size), 
                output_keep_prob=tf_keepprob) 
            for _ in range(self.num_layers)])
        
        ## Define the initial state
        self.initial_state = cells.zero_state(
                    batch_size, tf.float32)

        ## Run each sequence step through the RNN 
        lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
                    cells, x_onehot, 
                    initial_state=self.initial_state)
        
        print('  << lstm_outputs  >>', lstm_outputs)

        seq_output_reshaped = tf.reshape(
                    lstm_outputs, 
                    shape=[-1, self.lstm_size],
                    name='seq_output_reshaped')
        print("reshaped output",seq_output_reshaped)
        
        logits = tf.layers.dense(
                    inputs=seq_output_reshaped, 
                    units=self.num_classes,
                    activation=None,
                    name='logits')

        proba = tf.nn.softmax(
                    logits, 
                    name='probabilities')
        print("Prob",proba)

        y_reshaped = tf.reshape(
                    y_onehot, 
                    shape=[-1, self.num_classes],
                    name='y_reshaped')
        
        print("y",y_reshaped)
        cost = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, 
                        labels=y_reshaped),
                        name='cost')
        
        # Gradient clipping to avoid "exploding gradients"
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
                    tf.gradients(cost, tvars), 
                    self.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        train_op = optimizer.apply_gradients(
                    zip(grads, tvars),
                    name='train_op')
        
        
    def train(self, train_x, train_y, 
          num_epochs, ckpt_dir='./model/'):
        
          ## Create the checkpoint directory
          ## if does not exists
        
          if not os.path.exists(ckpt_dir):
            os.mkdir(ckpt_dir)
          
          print("in training")
          with tf.Session(graph=self.g) as sess:
            sess.run(self.init_op)

            n_batches = int(train_x.shape[1]/self.num_steps)
            iterations = n_batches * num_epochs

            for epoch in range(num_epochs):

                # Train network
                new_state = sess.run(self.initial_state)
                loss = 0
                
                ## Minibatch generator:
                bgen = create_batch_generator(
                        train_x, train_y, self.num_steps)
                for b, (batch_x, batch_y) in enumerate(bgen, 1):
                    
                    iteration = epoch*n_batches + b
                    
                    feed = {'tf_x:0': batch_x,
                            'tf_y:0': batch_y,
                            'tf_keepprob:0': self.keep_prob,
                            self.initial_state : new_state}
                    batch_cost, _, new_state = sess.run(
                            ['cost:0', 'train_op', 
                                self.final_state],
                            feed_dict=feed)
                    if iteration % 10 == 0:
                        print('Epoch %d/%d Iteration %d'
                              '| Training loss: %.4f' % (
                              epoch + 1, num_epochs, 
                              iteration, batch_cost))

                ## Save the trained model    
                self.saver.save(
                        sess, os.path.join(
                            ckpt_dir, 'language_modeling.ckpt'))
                
        
    def sample(self, output_length, ckpt_dir, starter_seq="The "):
          
            observed_seq = [ch for ch in starter_seq]        
            
            with tf.Session(graph=self.g) as sess:
              self.saver.restore(
                sess, 
                tf.train.latest_checkpoint(ckpt_dir))
            
              ## 1: run the model using the starter sequence
              new_state = sess.run(self.initial_state)
              for ch in starter_seq:
                x = np.zeros((1, 1))
                x[0,0] = char2int[ch]
                feed = {'tf_x:0': x,
                        'tf_keepprob:0': 1.0,
                        self.initial_state: new_state}
                proba, new_state = sess.run(
                        ['probabilities:0', self.final_state], 
                        feed_dict=feed)
                print("prob",proba)
              ch_id = get_top_char(proba, len(chars))
             
              observed_seq.append(int2char[ch_id])
              print("obsss",observed_seq)     
              ## 2: run the model using the updated observed_seq
              for i in range(output_length):
                x[0,0] = ch_id
                feed = {'tf_x:0': x,
                        'tf_keepprob:0': 1.0,
                        self.initial_state: new_state}
                proba, new_state = sess.run(
                        ['probabilities:0', self.final_state], 
                        feed_dict=feed)

                ch_id = get_top_char(proba, len(chars))
                observed_seq.append(int2char[ch_id])

              return ''.join(observed_seq)


In [0]:
def get_top_char(probas, char_size, top_n=5):
    p = np.squeeze(probas)
    p[np.argsort(p)[:-top_n]] = 0.0
    p = p / np.sum(p)
    ch_id = np.random.choice(char_size, 1, p=p)[0]
    return ch_id

In [0]:
batch_size = 64
num_steps = 100 
train_x, train_y = reshape_data(text_ints, 
                                batch_size, 
                                num_steps)

rnn = CharRNN(num_classes=len(chars), batch_size=batch_size)
print("instantiated done")
rnn.train(train_x, train_y, 
          num_epochs=200,
          ckpt_dir='./model-100/')

27
  << lstm_outputs  >> Tensor("rnn/transpose_1:0", shape=(64, 100, 128), dtype=float32)
reshaped output Tensor("seq_output_reshaped:0", shape=(6400, 128), dtype=float32)
Prob Tensor("probabilities:0", shape=(6400, 90), dtype=float32)
y Tensor("y_reshaped:0", shape=(6400, 90), dtype=float32)
instantiated done
in training
Epoch 1/200 Iteration 10| Training loss: 3.8259
Epoch 1/200 Iteration 20| Training loss: 3.4339
Epoch 2/200 Iteration 30| Training loss: 3.3509
Epoch 2/200 Iteration 40| Training loss: 3.3265
Epoch 2/200 Iteration 50| Training loss: 3.3058
Epoch 3/200 Iteration 60| Training loss: 3.3111
Epoch 3/200 Iteration 70| Training loss: 3.2488
Epoch 3/200 Iteration 80| Training loss: 3.3018
Epoch 4/200 Iteration 90| Training loss: 3.2563
Epoch 4/200 Iteration 100| Training loss: 3.2235
Epoch 5/200 Iteration 110| Training loss: 3.2337
Epoch 5/200 Iteration 120| Training loss: 3.2486
Epoch 5/200 Iteration 130| Training loss: 3.2199
Epoch 6/200 Iteration 140| Training loss: 3.2491

In [0]:
np.random.seed(123)
rnn = CharRNN(len(chars), sampling=True)

print(rnn.sample(ckpt_dir='./model-100/', 
                 output_length=500))

  << lstm_outputs  >> Tensor("rnn/transpose_1:0", shape=(1, 1, 128), dtype=float32)
reshaped output Tensor("seq_output_reshaped:0", shape=(1, 128), dtype=float32)
Prob Tensor("probabilities:0", shape=(1, 90), dtype=float32)
y Tensor("y_reshaped:0", shape=(1, 90), dtype=float32)
INFO:tensorflow:Restoring parameters from ./model-100/language_modeling.ckpt
The Selfe and she she dight, but word of and terperie thes seefes,
Thou thoughts to the past of the to see the was somes other
thy sheaths.

  Hor. That he so that shill shall haue the Prayes and sight,
Whos'd this will that heauen to thing all:
I that it is a Creature to so the mouth:
Both hare a fante it them there almows, to but,
Is the sonne to this, to to sill the what his whole in shate
I so some a more to his tread and meare

   King. I that to shald

   Ham. With me?
  Ophe. What thy stance t
