In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import chi2
from tensorflow.examples.tutorials.mnist import input_data

  from ._conv import register_converters as _register_converters


In [2]:
def neural_network(x):
    hidden_layer_1 = tf.layers.dense(x, n_hidden1, activation=tf.nn.relu)
    output_layer = tf.layers.dense(hidden_layer_1, n_output)
    return output_layer

In [3]:
def model_fn(features, labels, mode):
    
    output = neural_network(features['x'])
    pred_y = tf.argmax(output, axis=1)
    pred_prob = tf.nn.softmax(output)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_y)
    
    loss_fn = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer_ = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) 
    train_op = optimizer_.minimize(loss_fn, global_step=tf.train.get_global_step())   
    accuracy_ = tf.metrics.accuracy(labels=labels, predictions=pred_y)
    
    est_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=pred_y, loss=loss_fn, 
                                          train_op=train_op, eval_metric_ops={'accuracy':accuracy_})
    
    return est_spec
    

In [5]:
# data
X_train_data = fetch_20newsgroups(subset="train")
X_train_ng = X_train_data.data    
y_train_ng = X_train_data.target
X_test_data = fetch_20newsgroups(subset="test")
X_test_ng = X_test_data.data
y_test_ng = X_test_data.target

del X_train_data, X_test_data

vectorizer = CountVectorizer(dtype='float32')
TF_X_train = vectorizer.fit_transform(X_train_ng)
TF_X_test = vectorizer.transform(X_test_ng)

In [8]:
chi_2 , pval = chi2(TF_X_train,y_train_ng)

In [9]:
pairs = {i : w for w,i in vectorizer.vocabulary_.items()}
f_sel = np.argsort(chi_2)[::-1][:20000]
X_train = TF_X_train[:,f_sel]
X_test = TF_X_test[:,f_sel]
del TF_X_train, TF_X_test

In [13]:
X_train = X_train.todense()
X_test = X_test.todense()

n_input = 20000
n_output = 20
n_hidden1 = 1000

batch_size = 5657
learning_rate = 0.2

In [16]:
input_20_fn = tf.estimator.inputs.numpy_input_fn(x = {'x':X_train},
                                              y = y_train_ng, batch_size=batch_size, num_epochs=None, shuffle = True)
model = tf.estimator.Estimator(model_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp2gmxcc1r', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f93618a59e8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [17]:
%%time
model.train(input_20_fn, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp2gmxcc1r/model.ckpt.
INFO:tensorflow:loss = 3.256226, step = 0
INFO:tensorflow:global_step/sec: 0.628827
INFO:tensorflow:loss = 2.107969, step = 100 (159.027 sec)
INFO:tensorflow:global_step/sec: 0.628931
INFO:tensorflow:loss = 1.5700439, step = 200 (159.001 sec)
INFO:tensorflow:global_step/sec: 0.626786
INFO:tensorflow:loss = 0.5075001, step = 300 (159.544 sec)
INFO:tensorflow:Saving checkpoints for 378 into /tmp/tmp2gmxcc1r/model.ckpt.
INFO:tensorflow:global_step/sec: 0.622461
INFO:tensorflow:loss = 0.28585678, step = 400 (160.652 sec)
INFO:tensorflow:global_step/sec: 0.628415
INFO:tensorflow:loss = 0.1941176, step = 500 (159.131 sec)
INFO:tensorflow:global_step/sec: 0.628582
INFO:tensorflow:loss =

<tensorflow.python.estimator.estimator.Estimator at 0x7f9363a88908>

In [18]:
input_test_20_fn = tf.estimator.inputs.numpy_input_fn(x = {'x':X_test}, y = y_test_ng,
                                                   batch_size=batch_size, shuffle=False)

model.evaluate(input_test_20_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-03-24-05:17:48
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp2gmxcc1r/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-03-24-05:17:50
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.78359, global_step = 1000, loss = 1.0074589


{'accuracy': 0.78359, 'global_step': 1000, 'loss': 1.0074589}