<h1> Character based text generation using a GRU </h1>

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import codecs
import re
EMBED_DIMENSION = 50
HIDDEN_SIZE = 256

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
with codecs.open('/tmp/kernel.txt', 'r', encoding='utf-8', errors='ignore') as kernel_file:
    raw_text = kernel_file.read()
kernel_words = re.split('(\-\>)|([\-\>+\=\<\/\&\|\(\)\:\*])',raw_text)
kernel_words = [w for w in kernel_words if w is not None]
kernel_words = kernel_words[0:300000]
kernel_words = set(kernel_words)
kword_to_int = dict((word, i) for i, word in enumerate(kernel_words))
int_to_kword = dict((i, word) for i, word in enumerate(kernel_words))
v_size = len(kword_to_int)
kword_to_int['<UNK>'] = v_size
int_to_kword[v_size] = '<UNK>'
v_size += 1
X_train = [kword_to_int[word] for word in kernel_words]
y_train = X_train[1:]
y_train.append(kword_to_int['<UNK>'])
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
X_train = np.expand_dims(X_train,axis=1)
y_train = np.expand_dims(y_train,axis=1)
print(X_train.shape, y_train.shape)

(52657, 1) (52657, 1)


In [3]:
def estimator_spec_for_generation(flayer_op, lbls, md):
    preds_cls = tf.argmax(flayer_op, 1)
    if md == tf.estimator.ModeKeys.PREDICT:
        prev_op = tf.reshape(flayer_op, [-1, 1, v_size])[:, -1, :]
        preds_op = tf.nn.softmax(prev_op)
        return tf.estimator.EstimatorSpec(
        mode=md,
        predictions={
            'preds_probs': preds_op
        })
    trng_loss = tf.losses.sparse_softmax_cross_entropy(labels=lbls, logits=flayer_op)
    if md == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
        trng_op = optimizer.minimize(trng_loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(md, loss=trng_loss, train_op=trng_op)
    ev_met_ops = {'accy': tf.metrics.accuracy(labels=lbls, predictions=preds_cls)}
    return tf.estimator.EstimatorSpec(md, loss=trng_loss, train_op=trng_op)

In [4]:
def rnn_model_fn(features, labels, mode):
    embedding = tf.Variable(tf.truncated_normal([v_size, EMBED_DIMENSION], 
                                                    stddev=1.0/np.sqrt(EMBED_DIMENSION)), 
                                name="word_embeddings")
    word_emb = tf.nn.embedding_lookup(embedding, features['word'])
    rnn_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
    
    outputs, _ = tf.nn.dynamic_rnn(rnn_cell, word_emb, dtype=tf.float32)
    outputs = tf.reshape(outputs, [-1, HIDDEN_SIZE])
    flayer_op = tf.layers.dense(outputs, v_size, name="linear")
    return estimator_spec_for_generation(flayer_op, labels, mode)

In [5]:
run_config = tf.contrib.learn.RunConfig()
run_config = run_config.replace(model_dir='/tmp/models/',save_summary_steps=10,log_step_count_steps=10)
generator = tf.estimator.Estimator(model_fn=rnn_model_fn,config=run_config)

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x119822390>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_secs': 600, '_log_step_count_steps': 10, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/models/'}


In [6]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={'word': X_train},
      y=y_train,
      batch_size=1024,
      num_epochs=None,
      shuffle=True)
generator.train(input_fn=train_input_fn, steps=300)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/models/model.ckpt.
INFO:tensorflow:loss = 10.871534, step = 1
INFO:tensorflow:global_step/sec: 0.64247
INFO:tensorflow:global_step/sec: 0.620477
INFO:tensorflow:global_step/sec: 0.617585
INFO:tensorflow:global_step/sec: 0.623787
INFO:tensorflow:global_step/sec: 0.613455
INFO:tensorflow:global_step/sec: 0.624856
INFO:tensorflow:global_step/sec: 0.623827
INFO:tensorflow:global_step/sec: 0.625256
INFO:tensorflow:global_step/sec: 0.61646
INFO:tensorflow:global_step/sec: 0.627149
INFO:tensorflow:loss = 10.203493, step = 101 (160.402 sec)
INFO:tensorflow:global_step/sec: 0.631025
INFO:tensorflow:global_step/sec: 0.630481
INFO:tensorflow:global_step/sec: 0.603987
INFO:tensorflow:global_step/sec: 0.609417
INFO:tensorflow:global_step/sec: 0.615121
INFO:tensorflow:global_step/sec: 0.627453
INFO:tensorflow:global_step/sec: 0.631844
INFO:tensorflow:global_step/sec: 0.620623
INFO:tensorflow:global_step/se

<tensorflow.python.estimator.estimator.Estimator at 0x11978ec18>

In [7]:
maxlen = 40
next_x = X_train[0:60]
text = "".join([int_to_kword[word] for word in next_x.flatten()])
for i in range(maxlen):
    test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={'word': next_x},
      num_epochs=1,
      shuffle=False)
    predictions = generator.predict(input_fn=test_input_fn)
    predictions = list(predictions)
    word = int_to_kword[np.argmax(predictions[-1]['preds_probs'])]
    text = text +  word
    next_x = np.concatenate((next_x,[[kword_to_int[word]]]))
    next_x = next_x[1:]

INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring parameters from /tmp/models/model.ckpt-300
INFO:tensorflow:Restoring paramete

In [8]:
print(text)

 for single ops connected being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
 
		set_current_stateevent;
	struct trace_mmiotrace_map  tracing_map_elt_allocops,
				    struct event_trigger_data  "migration;
		}
	}
	wakelocks_gc_count  NULL;
	spin_lock_irqstart,
			        store_gcov_u64  trace_output_rawevent,
				      struct event_filter "r
	     child
		dpm_resume_end
		param.args[i] entry, tmp, ;
	rcu_report_dead Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
 test_nop_accept, TRACE_NOP_OPT_ACCEPT
		return t_probe_showouter_duration,
			 
void pm_qos_remove_request any registered driver indicates it needs a VT switch
 
void free_ftrace_func_mappercurrent_func  {
		char comm[TASK_COMM_LEN];

		trace_find_cmdlinenamed_list"Compressing and saving image data sechdrs;
	char  "network_latency",
};


static BLOCKING_NOTIFIER_HEAD The command that needs to be done
  ops;
	opsor required key missing  @task   ftrace_start_up is true if we want ftrace running  then we byp