In [None]:
# %%bash

# pip install tensorflow==1.7
# pip install tensorflow-transform
# pip install tensorflow-hub

# Text Classification using TensorFlow and Google Cloud - Part 2

This [bigquery-public-data:hacker_news](https://cloud.google.com/bigquery/public-data/hacker-news) contains all stories and comments from Hacker News from its launch in 2006.  Each story contains a story id, url, the title of the story, tthe author that made the post, when it was written, and the number of points the story received.

The objective is, given the title of the story, we want to build an ML model that can predict the source of this story.

## TF DNNClassifier with TF.Hub Sentence Embedding

This notebook illustrates how to build a TF premade estimator, namely DNNClassifier, while the input text will be repesented as sentence embedding, using a [tf.hub text embedding module](https://www.tensorflow.org/hub/modules/text). The model will be using the transformed data produced in part one. 
Note that, the tf.hub text embedding module will make use of only the the raw text feature (title). The overall steps are as follows:

1. Define the metadata
2. Define data input function
2. Create feature columns (use the tf.hub text embedding module)
3. Create the premade DNNClassifier estimator
4. Setup experiement
    * Hyper-parameters & RunConfig
    * Serving function (for exported model)
    * TrainSpec & EvalSpec
5. Run experiement
6. Evalute the model
7. Use SavedModel for prediction
    



### Setting Global Parameters

In [1]:
import os

class Params:
    pass

# Set to run on GCP
Params.GCP_PROJECT_ID = 'ksalama-gcp-playground'
Params.REGION = 'europe-west1'
Params.BUCKET = 'ksalama-gcs-cloudml'

Params.PLATFORM = 'local' # local | GCP

Params.DATA_DIR = 'data/news'  if Params.PLATFORM == 'local' else 'gs://{}/data/news'.format(Params.BUCKET)

Params.TRANSFORMED_DATA_DIR = os.path.join(Params.DATA_DIR, 'transformed')
Params.TRANSFORMED_TRAIN_DATA_FILE_PREFIX = os.path.join(Params.TRANSFORMED_DATA_DIR, 'train')
Params.TRANSFORMED_EVAL_DATA_FILE_PREFIX = os.path.join(Params.TRANSFORMED_DATA_DIR, 'eval')

Params.TEMP_DIR = os.path.join(Params.DATA_DIR, 'tmp')

Params.MODELS_DIR = 'models/news' if Params.PLATFORM == 'local' else 'gs://{}/models/news'.format(Params.BUCKET)

Params.TRANSFORM_ARTEFACTS_DIR = os.path.join(Params.MODELS_DIR,'transform')

Params.TRAIN = True

Params.RESUME_TRAINING = False

Params.EAGER = False

if Params.EAGER:
    tf.enable_eager_execution()

### Importing libraries

In [2]:
import tensorflow as tf
from tensorflow import data


from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
from tensorflow_transform.beam.tft_beam_io import transform_fn_io
from tensorflow_transform.tf_metadata import metadata_io
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import dataset_metadata
from tensorflow_transform.saved import saved_transform_io

print tf.__version__

Instructions for updating:
Use the retry module or similar alternatives.
1.7.0


## 1. Define Metadata

In [3]:
RAW_HEADER = 'key,title,source'.split(',')
RAW_DEFAULTS = [['NA'],['NA'],['NA']]
TARGET_FEATURE_NAME = 'source'
TARGET_LABELS = ['github', 'nytimes', 'techcrunch']
TEXT_FEATURE_NAME = 'title'
KEY_COLUMN = 'key'

VOCAB_SIZE = 20000
TRAIN_SIZE = 73124
EVAL_SIZE = 23079

DELIMITERS = '.,!?() '

raw_metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema({
    KEY_COLUMN: dataset_schema.ColumnSchema(
        tf.string, [], dataset_schema.FixedColumnRepresentation()),
    TEXT_FEATURE_NAME: dataset_schema.ColumnSchema(
        tf.string, [], dataset_schema.FixedColumnRepresentation()),
    TARGET_FEATURE_NAME: dataset_schema.ColumnSchema(
        tf.string, [], dataset_schema.FixedColumnRepresentation()),
}))


transformed_metadata = metadata_io.read_metadata(
    os.path.join(Params.TRANSFORM_ARTEFACTS_DIR,"transformed_metadata"))

raw_feature_spec = raw_metadata.schema.as_feature_spec()
transformed_feature_spec = transformed_metadata.schema.as_feature_spec()

print transformed_feature_spec

{u'source': FixedLenFeature(shape=[], dtype=tf.string, default_value=None), u'title': FixedLenFeature(shape=[], dtype=tf.string, default_value=None), u'weight': VarLenFeature(dtype=tf.float32), u'bow': VarLenFeature(dtype=tf.int64)}


## 2. Define Input Function

In [17]:
def parse_tf_example(tf_example):
    
    parsed_features = tf.parse_single_example(serialized=tf_example, features=transformed_feature_spec)
    target = parsed_features.pop(TARGET_FEATURE_NAME)
    
    return parsed_features, target


def generate_tfrecords_input_fn(files_pattern, 
                          mode=tf.estimator.ModeKeys.EVAL, 
                          num_epochs=1, 
                          batch_size=200):
    
    def _input_fn():
        
        file_names = data.Dataset.list_files(files_pattern)

        if Params.EAGER:
            print file_names

        dataset = data.TFRecordDataset(file_names )

        dataset = dataset.apply(
                tf.contrib.data.shuffle_and_repeat(count=num_epochs,
                                                   buffer_size=batch_size*2)
        )

        dataset = dataset.apply(
                tf.contrib.data.map_and_batch(parse_tf_example, 
                                              batch_size=batch_size, 
                                              num_parallel_batches=2)
        )

        datset = dataset.prefetch(batch_size)

        if Params.EAGER:
            return dataset

        iterator = dataset.make_one_shot_iterator()
        features, target = iterator.get_next()
        return features, target
    
    return _input_fn

## 3. Create feature columns

In [5]:
import tensorflow_hub as hub
print hub.__version__

0.1.0


In [6]:
def create_feature_columns(hparams):
    
    title_embeding_column = hub.text_embedding_column(
        "title", "https://tfhub.dev/google/universal-sentence-encoder/1",
        trainable=hparams.trainable_embedding)
    
    feature_columns = [title_embeding_column]
    
    print "feature columns: \n {}".format(feature_columns)
    print ""
    
    return feature_columns
    

## 4. Create a model using a premade DNNClassifer

In [13]:
def create_estimator(hparams, run_config):
    
    feature_columns = create_feature_columns(hparams)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
    
    estimator = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        n_classes =len(TARGET_LABELS),
        label_vocabulary=TARGET_LABELS,
        hidden_units=hparams.hidden_units,
        optimizer=optimizer,
        config=run_config
    )
    
    
    return estimator

## 5. Setup Experiment

### 5.1 HParams and RunConfig

In [8]:
NUM_EPOCHS = 10
BATCH_SIZE = 1000

TOTAL_STEPS = (TRAIN_SIZE/BATCH_SIZE)*NUM_EPOCHS
EVAL_EVERY_SEC = 60

hparams  = tf.contrib.training.HParams(
    num_epochs = NUM_EPOCHS,
    batch_size = BATCH_SIZE,
    trainable_embedding = False,
    learning_rate = 0.01,
    hidden_units=[128, 64],
    max_steps = TOTAL_STEPS,

)

MODEL_NAME = 'dnn_estimator_hub' 
model_dir = os.path.join(Params.MODELS_DIR, MODEL_NAME)

run_config = tf.estimator.RunConfig(
    tf_random_seed=19830610,
    log_step_count_steps=1000,
    save_checkpoints_secs=EVAL_EVERY_SEC,
    keep_checkpoint_max=1,
    model_dir=model_dir
)


print(hparams)
print("")
print("Model Directory:", run_config.model_dir)
print("Dataset Size:", TRAIN_SIZE)
print("Batch Size:", BATCH_SIZE)
print("Steps per Epoch:",TRAIN_SIZE/BATCH_SIZE)
print("Total Steps:", TOTAL_STEPS)

[('batch_size', 1000), ('hidden_units', [128, 64]), ('learning_rate', 0.01), ('max_steps', 730), ('num_epochs', 10), ('trainable_embedding', False)]

('Model Directory:', 'models/news/dnn_estimator_hub')
('Dataset Size:', 73124)
('Batch Size:', 1000)
('Steps per Epoch:', 73)
('Total Steps:', 730)


### 5.2 Serving function

In [9]:
def generate_serving_input_fn():
    
    def _serving_fn():
    
        receiver_tensor = {
          'title': tf.placeholder(dtype=tf.string, shape=[None])
        }

        return tf.estimator.export.ServingInputReceiver(
            receiver_tensor, receiver_tensor)
    
    return _serving_fn

### 5.3 TrainSpec & EvalSpec

In [18]:
train_spec = tf.estimator.TrainSpec(
    input_fn = generate_tfrecords_input_fn(
        Params.TRANSFORMED_TRAIN_DATA_FILE_PREFIX+"*",
        mode = tf.estimator.ModeKeys.TRAIN,
        num_epochs=hparams.num_epochs,
        batch_size=hparams.batch_size
    ),
    max_steps=hparams.max_steps,
    hooks=None
)

eval_spec = tf.estimator.EvalSpec(
    input_fn = generate_tfrecords_input_fn(
        Params.TRANSFORMED_EVAL_DATA_FILE_PREFIX+"*",
        mode=tf.estimator.ModeKeys.EVAL,
        num_epochs=1,
        batch_size=hparams.batch_size
    ),
    exporters=[tf.estimator.LatestExporter(
        name="estimate", # the name of the folder in which the model will be exported to under export
        serving_input_receiver_fn=generate_serving_input_fn(),
        exports_to_keep=1,
        as_text=False)],
    steps=None,
    throttle_secs=EVAL_EVERY_SEC
)

## 6. Run experiment

In [11]:
from datetime import datetime
import shutil

if Params.TRAIN:
    if not Params.RESUME_TRAINING:
        print("Removing previous training artefacts...")
        shutil.rmtree(model_dir, ignore_errors=True)
    else:
        print("Resuming training...") 


    tf.logging.set_verbosity(tf.logging.INFO)

    time_start = datetime.utcnow() 
    print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
    print(".......................................") 

    estimator = create_estimator(hparams, run_config)

    tf.estimator.train_and_evaluate(
        estimator=estimator,
        train_spec=train_spec, 
        eval_spec=eval_spec
    )

    time_end = datetime.utcnow() 
    print(".......................................")
    print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
    print("")
    time_elapsed = time_end - time_start
    print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))
else:
    print "Training was skipped!"

Removing previous training artefacts...
Experiment started at 12:25:01
.......................................
INFO:tensorflow:Using /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules to cache modules.
feature columns: 
 [_ModuleEmbeddingColumn(key='title', module_spec=<tensorflow_hub.native_module._ModuleSpec object at 0x107bc3ad0>, trainable=False)]

INFO:tensorflow:Using config: {'_save_checkpoints_secs': 60, '_session_config': None, '_keep_checkpoint_max': 1, '_tf_random_seed': 19830610, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x10d0e2c50>, '_model_dir': 'models/news/dnn_estimator_hub', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 1000, '_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_evaluation_master': '', '_service': None, '_save_summary_steps': 100, '_num_ps_replicas': 0}
INFO:tensorflow:Running 

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_0/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_0/weights
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_1/bias:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_1/bias
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_1/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modul

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_7:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_7
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_8:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_8
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_9:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_9
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_lay

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_12:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_12
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_13:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_13
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_14:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_14
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/inp

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SNLI/Classifier/LinearLayer/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SNLI/Classifier/LinearLayer/weights
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SNLI/Classifier/tanh_layer_0/bias:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SNLI/Classifier/tanh_layer_0/bias
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SNLI/Classifier/tanh_layer_0/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SNLI/Classifier/tanh_layer_0/weights
INFO:t

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Encoder_en/DNN/ResidualHidden_3/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Encoder_en/DNN/ResidualHidden_3/weights
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_0/bias:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_0/bias
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_0/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf2

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_4:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_4
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_5:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_5
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_6:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_6
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_lay

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_1:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_1
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_10:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_10
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_11:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_11
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_2/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_2/weights
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SNLI/Classifier/LinearLayer/bias:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SNLI/Classifier/LinearLayer/bias
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SNLI/Classifier/LinearLayer/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with 

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Encoder_en/DNN/ResidualHidden_2/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Encoder_en/DNN/ResidualHidden_2/weights
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Encoder_en/DNN/ResidualHidden_3/projection:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Encoder_en/DNN/ResidualHidden_3/projection
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Encoder_en/DNN/ResidualHidden_3/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Encoder_en/DNN

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_2:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_2
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_3:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_3
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_4:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_4
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_lay

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-14-12:30:08
INFO:tensorflow:Saving dict for global step 730: accuracy = 0.81403005, average_loss = 0.4607304, global_step = 730, loss = 443.0499
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_0:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_0
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/Embeddings_en/sharded_1:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with Embeddings_en/sharded_1
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_la

INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_1/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_1/weights
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_2/bias:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modules/c6f5954ffa065cdb2f2e604e740e8838bf21a2d3/variables/variables with SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_2/bias
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/title_hub_module_embedding/module/SHARED_RANK_ANSWER/response_encoder_0/tanh_layer_2/weights:0 from checkpoint /var/folders/hp/gzm_7hs931v5kt53p6rywh5w00fqrl/T/tfhub_modul

## 7. Evaluate the model

In [20]:
tf.logging.set_verbosity(tf.logging.ERROR)

estimator = create_estimator(hparams, run_config)

train_metrics = estimator.evaluate(
    input_fn = generate_tfrecords_input_fn(
        files_pattern= Params.TRANSFORMED_TRAIN_DATA_FILE_PREFIX+"*", 
        mode= tf.estimator.ModeKeys.EVAL,
        batch_size= TRAIN_SIZE), 
    steps=1
)


print("############################################################################################")
print("# Train Measures: {}".format(train_metrics))
print("############################################################################################")

eval_metrics = estimator.evaluate(
    input_fn=generate_tfrecords_input_fn(
        files_pattern= Params.TRANSFORMED_EVAL_DATA_FILE_PREFIX+"*", 
        mode= tf.estimator.ModeKeys.EVAL,
        batch_size= EVAL_SIZE), 
    steps=1
)
print("")
print("############################################################################################")
print("# Eval Measures: {}".format(eval_metrics))
print("############################################################################################")


feature columns: 
 [_ModuleEmbeddingColumn(key='title', module_spec=<tensorflow_hub.native_module._ModuleSpec object at 0x105d4bd90>, trainable=False)]

############################################################################################
# Train Measures: {'average_loss': 0.29258886, 'accuracy': 0.8818309, 'global_step': 730, 'loss': 21395.268}
############################################################################################

############################################################################################
# Eval Measures: {'average_loss': 0.4607301, 'accuracy': 0.81403005, 'global_step': 730, 'loss': 10633.19}
############################################################################################


## 8. Use Saved Model for Predictions

In [21]:
import os

export_dir = model_dir +"/export/estimate/"
saved_model_dir = os.path.join(export_dir, os.listdir(export_dir)[0])

print(saved_model_dir)
print("")

predictor_fn = tf.contrib.predictor.from_saved_model(
    export_dir = saved_model_dir,
    signature_def_key="predict"
)

output = predictor_fn(
    {
        'title':[
            'Microsoft and Google are joining forces for a new AI framework',
            'A new version of Python is mind blowing',
            'EU is investigating new data privacy policies'
        ]
        
    }
)
print(output)

models/news/dnn_estimator_hub/export/estimate/1526301016

{u'probabilities': array([[0.01274039, 0.10928415, 0.87797546],
       [0.9858834 , 0.00392233, 0.01019428],
       [0.00130374, 0.8739597 , 0.12473648]], dtype=float32), u'class_ids': array([[2],
       [0],
       [1]]), u'classes': array([['techcrunch'],
       ['github'],
       ['nytimes']], dtype=object), u'logits': array([[-2.3613505 , -0.21217617,  1.8714911 ],
       [ 3.0474007 , -2.4794528 , -1.5243107 ],
       [-3.5699093 ,  2.9378843 ,  0.9910533 ]], dtype=float32)}
