In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
from sklearn.datasets import fetch_california_housing

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


1.14.0
sys.version_info(major=3, minor=6, micro=5, releaselevel='final', serial=0)
matplotlib 3.0.3
numpy 1.18.1
pandas 0.24.1
sklearn 0.21.2
tensorflow 1.14.0
tensorflow.python.keras.api._v1.keras 2.2.4-tf


In [2]:
train_file = '../tf2/data/train.csv'
eval_file = '../tf2/data/eval.csv'

train_df = pd.read_csv(train_file)
eval_df = pd.read_csv(eval_file)

In [3]:
print(train_df.head(1))
print(eval_df.head(1))

   survived   sex   age  n_siblings_spouses  parch  fare  class     deck  \
0         0  male  22.0                   1      0  7.25  Third  unknown   

   embark_town alone  
0  Southampton     n  
   survived   sex   age  n_siblings_spouses  parch  fare  class     deck  \
0         0  male  35.0                   0      0  8.05  Third  unknown   

   embark_town alone  
0  Southampton     y  


In [4]:
y_trian = train_df.pop('survived')
y_eval = eval_df.pop('survived')

In [5]:
train_df.shape, eval_df.shape

((627, 9), (264, 9))

In [6]:
categorical_columns = ['sex', 'parch', 'class', 'deck', 'embark_town', 'alone']
numeric_columns = ['age', 'n_siblings_spouses', 'fare']
feature_columns = []
for categorical_column in categorical_columns:
    vocab = train_df[categorical_column].unique()
    print(categorical_column, vocab)
    categ_col = tf.feature_column.categorical_column_with_vocabulary_list(categorical_column, vocab)
    categ_col = tf.feature_column.indicator_column(categ_col)
    feature_columns.append(categ_col)
    
for numeric_column in numeric_columns:
    numeric_column = tf.feature_column.numeric_column(numeric_column, dtype=tf.float32)
    feature_columns.append(numeric_column)

sex ['male' 'female']
parch [0 1 2 5 3 4]
class ['Third' 'First' 'Second']
deck ['unknown' 'C' 'G' 'A' 'B' 'D' 'F' 'E']
embark_town ['Southampton' 'Cherbourg' 'Queenstown' 'unknown']
alone ['n' 'y']


In [7]:
feature_columns

[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategorica

In [8]:
def df_to_dataset(feature, target, epochs=10, shuffle=True, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((dict(feature), target))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(feature))
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset.make_one_shot_iterator().get_next()
      

In [12]:
output_dir = 'customized_estimator'
if os.path.exists(output_dir):
    os.mkdir(output_dir)
    
def model_fn(features, labels, mode, params):
    
    input_for_next_layer = tf.feature_column.input_layer(features, params['feature_columns'])
    for n_unit in params['hidden_units']:
        input_for_next_layer = tf.layers.dense(input_for_next_layer, units=n_unit, activation=tf.nn.relu)
    logits = tf.layers.dense(input_for_next_layer, params['n_classes'], activation=None)
    predicted_class = tf.argmax(logits, 1)
    
    # mode: model runtime state: [Train, Eval, Predict]
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted_class[:, tf.newaxis],
            'probabilities': tf.nn.softmax(logits),
            'logits': logits
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    accuracy = tf.metrics.accuracy(labels=labels, predictions=predicted_class, name='acc_op')
    metrics = {'accuracy': accuracy}
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
    
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
    


    

In [13]:
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=output_dir,
                                  params={
                                      'feature_columns': feature_columns,
                                      'hidden_units': [100, 100],
                                      'n_classes': 2
                                  })
estimator.train(input_fn=lambda : df_to_dataset(train_df, y_trian, epochs=100))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'customized_estimator', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7c8a6e7358>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
Instructions for updating:
The old _FeatureColumn APIs are

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into customized_estimator/model.ckpt.
INFO:tensorflow:loss = 2.7851, step = 1
INFO:tensorflow:global_step/sec: 356.298
INFO:tensorflow:loss = 0.5850322, step = 101 (0.283 sec)
INFO:tensorflow:global_step/sec: 554.064
INFO:tensorflow:loss = 0.52686894, step = 201 (0.179 sec)
INFO:tensorflow:global_step/sec: 577.31
INFO:tensorflow:loss = 0.32157117, step = 301 (0.173 sec)
INFO:tensorflow:global_step/sec: 423.045
INFO:tensorflow:loss = 0.5117313, step = 401 (0.236 sec)
INFO:tensorflow:global_step/sec: 401.97
INFO:tensorflow:loss = 0.5294246, step = 501 (0.249 sec)
INFO:tensorflow:global_step/sec: 404.063
INFO:tensorflow:loss = 0.60109615, step = 601 (0.248 sec)
INFO:tensorflow:global_step/sec: 376.974
INFO:tensorflow:loss = 0.42818558, step = 701 (0

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7f7c8a6e70f0>

In [14]:
estimator.evaluate(lambda : df_to_dataset(eval_df, y_eval, epochs=1))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-01-30T17:02:20Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from customized_estimator/model.ckpt-1960
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2020-01-30-17:02:20
INFO:tensorflow:Saving dict for global step 1960: accuracy = 0.79924244, global_step = 1960, loss = 0.48430216
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1960: customized_estimator/model.ckpt-1960


{'accuracy': 0.79924244, 'global_step': 1960, 'loss': 0.48430216}