# Text generator based on RNN

## Brief
Generate fake abstract with RNN model under tensorflow r1.3.

### Import libraries

In [1]:
import tensorflow as tf
import numpy as np
import random
import os

### Configurations 

In [2]:
vocab = (" $%'()+,-./0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ"
            "\\^_abcdefghijklmnopqrstuvwxyz{|}\n")
graph_path=r"./graphs"
test_text_path=os.path.normpath(r"../DataSet/arvix_abstracts.txt")
batch_size=5
model_param_path=os.path.normpath(r"./model_checkpoints")

### Data encoding
#### Basic Assumption

* A full string sequence consists $START$ & $STOP$ signal with characters in the middle. 

#### Encoding policy
* A set $\mathcal{S}$ that consists of many characters is utilized to encode the characters.
* The $1^{st}$ entry of the vector corresponds to $UNKNOWN$ characters(l.e. characters that are beyond $\mathcal{S}$). 
* The last entry of the vector corresponds to $STOP$ signal of the sequence. 
* The entries in the middle corresponds to the indices of the characters within $\mathcal{S}$. 
* The $START$ signal is represented as a zero vector. 

#### Implementation & Test
##### Declaration

In [3]:
class TextCodec:
    def __init__(self, vocab):
        self.__vocab = vocab
        
    def encode(self, string, sess = None, start=True, stop=True):
        """
        Encode string.
        Each character is represented as a N-dimension one hot vector. 
        N = len(self.__vocab)+ 2
        
        Note:
        The first entry of the vector corresponds to unknown character. 
        The last entry of the vector corresponds to STOP signal of the sequence. 
        The entries in the middle corresponds to the index of the character. 
        The START signal is represented as a zero vector. 
        """
        tensor=[vocab.find(ch)+1 for ch in string]
        if stop:
             tensor.append(len(vocab)+1)  # String + STOP
        tensor=tf.one_hot(tensor,depth=len(vocab)+2,on_value=1.0,off_value=0.0,axis=-1, dtype=tf.float32)
        if start:
            tensor=tf.concat([tf.zeros([1, len(vocab)+2],dtype=tf.float32),tensor],axis=0)  # String + START
        if sess is None:
            with tf.Session() as sess:
                nparray=tensor.eval()
        elif type(sess) == tf.Session:
            nparray = tensor.eval(session=sess)
        else:
            raise TypeError('"sess" must be {}, got {}'.format(tf.Session, type(sess)))    
        return nparray

    def decode(self, nparray, default="[UNKNOWN]",start="[START]",stop="[STOP]",strip=False):
        text_list=[]
        indices=np.argmax(nparray,axis=1)
        for v, ch_i in zip(nparray,indices):
            if np.all(v==0):
                text_list.append(start if not strip else "")
            elif ch_i==0:
                text_list.append(default)
            elif ch_i==len(self.__vocab)+1:
                text_list.append(stop if not strip else "")
            else:
                text_list.append(vocab[ch_i-1])
        return "".join(text_list)

##### Test
See how encoding and decoding work. 

In [4]:
test_codec=TextCodec(vocab)
test_text_encoded=test_codec.encode("Hello world!")
print("Encoded text looks like:\n{}".format(test_text_encoded))
test_text_decoded=test_codec.decode(nparray=test_text_encoded,strip=False)
print("Decoded text looks like:\n{}".format(test_text_decoded))

Encoded text looks like:
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  1.]]
Decoded text looks like:
[START]Hello world[UNKNOWN][STOP]


### Batch Generator
#### Implementation

In [5]:
def batch_generator(file_path, batch_size, sequence_length, overlap=0.5, text_codec=None):
    step_size = int(sequence_length * (1-overlap))
    with open(os.path.normpath(file_path),"r") as f:
        text = "".join(f.readlines())
    batch = []
    for i in range(0,len(text)-sequence_length+1,step_size):
        if text_codec is None:
            batch.append(text[i:i+sequence_length])
        else:
            batch.append(text_codec.encode(text[i:i+sequence_length]))
        if len(batch) == batch_size:
            yield batch if text_codec is None else np.array(batch)
            batch = []

#### Test
See what each batch looks like. 

In [6]:
batches=batch_generator(test_text_path,2,30,0.5)
encoded_batches=batch_generator(test_text_path, batch_size, 30, 0.5, test_codec)  # For later use
print("The first 5 batches are:")
for batch, _ in zip(batches,range(5)):
    print(batch)
del batches

The first 5 batches are:
['In science and engineering, in', 'engineering, intelligent proce']
['telligent processing of comple', 'ssing of complex signals such ']
['x signals such as images, soun', 'as images, sound or language i']
['d or language is often perform', 's often performed by a paramet']
['ed by a parameterized hierarch', 'erized hierarchy of nonlinear ']


### Define model class

In [7]:
class MyRNN():
    def __init__(self, input_vector_dim, model_checkpoint_path, state_size=100, scope="RNN", output_vector_dim=None):
        if output_vector_dim is None:
            output_vector_dim=input_vector_dim
        self.__input_vector_dim=input_vector_dim
        self.__output_vector_dim=output_vector_dim
        self.__state_size=state_size
        self.__mdl_ckpt_path=model_checkpoint_path
        with tf.variable_scope(scope) as model_scope:
            with tf.variable_scope("structure") as structure_scope:
                # One batch at a time
                self.__inputs = tf.placeholder(dtype=tf.float32,shape=[None,None,input_vector_dim])
                self.__expected_outputs = tf.placeholder(dtype=tf.float32,shape=[None,None,output_vector_dim])
                self.__cell = tf.nn.rnn_cell.GRUCell(num_units=state_size)
                self.__init_state = tf.placeholder_with_default(tf.zeros([1,state_size],dtype=tf.float32),
                                                              [None,state_size])
                self.__wI2S = tf.get_variable(name="Weight_i2s",shape=[input_vector_dim,state_size],
                                            initializer=tf.truncated_normal_initializer())
                self.__bI2S = tf.get_variable(name="Bias_i2s",shape=[1,state_size],
                            initializer=tf.truncated_normal_initializer())
                self.__rnn_inputs=tf.add(tf.einsum("aij,jk->aik",self.__inputs,self.__wI2S), self.__bI2S)  
                # Didn't know why the broadcast mechanism work on tf.matmul(self.__inputs,self.__wI2S)
                
                # S20:State to output
                self.__wS2O = tf.get_variable(name="Weight_s2o",shape=[state_size,output_vector_dim],
                                            initializer=tf.truncated_normal_initializer())
                self.__bS2O = tf.get_variable(name="Bias_s2o",shape=[1,output_vector_dim],
                                            initializer=tf.truncated_normal_initializer())
                self.__state_output, self.final_state = tf.nn.dynamic_rnn(cell=self.__cell,inputs=self.__rnn_inputs,
                                                     dtype=tf.float32,initial_state=self.init_state)
                
                self.__net_outputs=tf.add(tf.einsum("aij,jk->aik",self.__state_output,self.__wS2O), self.__bS2O)
                # Same reason for usint tf.einsum here.
            with tf.name_scope("training"):
                # Need to utilize new loss function
                loss=tf.losses.softmax_cross_entropy(logits=self.__net_outputs,onehot_labels=self.__expected_outputs)
                loss=tf.reduce_mean(loss,name="loss")
                self.__global_step=tf.Variable(0,dtype=tf.int32,trainable=False,name='globalStep')
                self.optimizer=tf.train.AdamOptimizer(learning_rate=0.001,
                                                      name="optimizer").minimize(loss,global_step=self.__global_step)
            with tf.name_scope("summary") as sumScope:
                lossSum=tf.summary.scalar(tensor=loss,name="loss")
                self.summary_op=tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES,sumScope))

    def online_inference(self,timeSteps=100,state=None,seed=None,modelParamPath=None):
        feedDict={}
        saver=tf.train.Saver()
        if model_param_path is None:
            model_param_path=self.__mdl_ckpt_path
        if state is not None:
            feedDict[self.init_state]=state
        if seed is None:
            #Create a random seed
            seed=tf.one_hot([random.randint(0,self.__input_vector_dim)],
                            depth=self.__input_vector_dim, on_value=1.0, off_value=0.0, axis=-1, dtype=tf.float32)
            with tf.Session() as sess:
                seed=sess.run(seed)
        feed_dict[self.__inputs]=seed
        seq = []
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(self.__mdl_ckpt_path)
            print("Loading model")
            if ckpt and ckpt.model_checkpoint_path:  
                saver.restore(sess,ckpt.model_checkpoint_path)
                print("Successfully loaded")
            for length in range(timeSteps):
                input_,state=sess.run(fetches=[self.netOutputs,self.finalState],
                                      feed_dict=feedDict)
                #  Since the input vectors are discrete, so we need to convert it into a
                #  new one hot vector
                input_=tf.one_hot(indices=[int(np.argmax(input_[0]))],depth=self.__outputVetorDim,
                                  on_value=1.0,off_value=0.0,axis=-1,dtype=tf.float32).eval()
                seq.append(input_)
                feedDict[self.inputs],feedDict[self.initState]=input_,state
            seq=tf.concat(values=seq,axis=0).eval()
        return seq, state
    
    @property
    def input_vector_dim(self):
        return self.__input_vector_dim
    
    @property
    def output_vector_dim(self):
        return self.__output_vector_dim
    
    @property
    def global_step(self):
        return self.__global_step
    
    @property
    def inputs(self):
        return self.__inputs
    
    @property
    def expected_outputs(self):
        return self.__expected_outputs
    
    @property
    def init_state(self):
        return self.__init_state
    
    @property
    def net_outputs(self):
        return self.__net_outputs
    
    @property
    def init_state(self):
        return self.__init_state
    
    @classmethod
    def train(cls, model, n_epochs, batches, model_param_path, graph_path):  # To be fixed.
        with tf.Session() as sess:
            writer=tf.summary.FileWriter(logdir=graph_path,graph=sess.graph)
            saver=tf.train.Saver()
            sess.run(tf.global_variables_initializer())
            ckpt=tf.train.get_checkpoint_state(model_param_path)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess,ckpt.model_checkpoint_path)
                iteration=Test.globalStep.eval()
                print("Successfully loaded session at iteration {}. Continue training now. ".format(iteration))
            else:
                iteration=0
            for i in range(n_epochs):
                for batch in batches:
                    print(batch.shape)
                    X=batch[:,:-1,:]
                    Y=batch[:,1:,:]
                    iteration+=1
                    feed_dict={}
                    feed_dict[model.__init_state]=tf.zeros([np.shape(batch)[0], model.__state_size]).eval()
                    print(feed_dict[model.__init_state].shape)
                    feed_dict[model.__inputs]=X
                    feed_dict[model.__expected_outputs]=Y
                    _, summary, iteration=sess.run([model.optimizer, model.summary_op, model.__global_step], feed_dict=feed_dict)
                    if (iteration+1)%50==0:
                        writer.add_summary(summary,global_step=iteration)
                        saver.save(sess=sess,save_path=model_param_path+r"\rnnMDL",global_step=iteration)
                saver.save(sess=sess,save_path=modelParamPath+r"\rnnMDL",global_step=iteration)
            print("Session terminated. ")
            writer.close()
            saver.save(sess=sess,save_path=model_param_path+r"\rnnMDL", global_step=model.global_step)

### Make an instance

In [8]:
test_rnn=MyRNN(scope="RNN",input_vector_dim=len(vocab)+2,model_checkpoint_path=model_param_path,state_size=200)

### Training

In [9]:
n_epochs=6
MyRNN.train(test_rnn, n_epochs, encoded_batches, model_param_path, graph_path)

(5, 32, 86)
(5, 200)


InternalError: Blas GEMM launch failed : a.shape=(155, 86), b.shape=(86, 200), m=155, n=200, k=86
	 [[Node: RNN/structure/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](RNN/structure/Reshape, RNN/structure/transpose_1)]]
	 [[Node: RNN/training/loss/_27 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1082_RNN/training/loss", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'RNN/structure/MatMul', defined at:
  File "e:\python35\lib\runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "e:\python35\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "e:\python35\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "e:\python35\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "e:\python35\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "e:\python35\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "e:\python35\lib\site-packages\tornado\ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "e:\python35\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "e:\python35\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "e:\python35\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "e:\python35\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "e:\python35\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "e:\python35\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "e:\python35\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "e:\python35\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "e:\python35\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "e:\python35\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "e:\python35\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "e:\python35\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "e:\python35\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-0cab419dad85>", line 1, in <module>
    test_rnn=MyRNN(scope="RNN",input_vector_dim=len(vocab)+2,model_checkpoint_path=model_param_path,state_size=200)
  File "<ipython-input-7-fa1c54eb1b2b>", line 21, in __init__
    self.__rnn_inputs=tf.add(tf.einsum("aij,jk->aik",self.__inputs,self.__wI2S), self.__bI2S)
  File "e:\python35\lib\site-packages\tensorflow\python\ops\special_math_ops.py", line 203, in einsum
    axes_to_sum)
  File "e:\python35\lib\site-packages\tensorflow\python\ops\special_math_ops.py", line 325, in _einsum_reduction
    product = math_ops.matmul(t0, t1)
  File "e:\python35\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1844, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "e:\python35\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1289, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "e:\python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "e:\python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "e:\python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(155, 86), b.shape=(86, 200), m=155, n=200, k=86
	 [[Node: RNN/structure/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](RNN/structure/Reshape, RNN/structure/transpose_1)]]
	 [[Node: RNN/training/loss/_27 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1082_RNN/training/loss", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


### Test online inference

In [None]:
result,_=Test.onlineInference(timeSteps=500)
print(Decode(result))

### Evaluate transition performance of the model

In [None]:
with tf.Session() as sess:
    print("Loading model")
    ckpt=tf.train.get_checkpoint_state(modelParamPath)
    if ckpt and ckpt.model_checkpoint_path:  
        saver.restore(sess,ckpt.model_checkpoint_path)
        print("Successfully loaded")
    result=sess.run(Test.netOutputs,{Test.inputs:X})
print(Decode(result))