In [49]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

In [50]:
import tensorflow as tf
import pandas as pd

In [51]:
print('Using TensorFlow version %s' % (tf.__version__))

Using TensorFlow version 2.4.0


In [52]:
train_data_path = '../data/adult.data.csv'
test_data_path = '../data/adult.test.csv'

export_dir = 'export'

model_path = str('wide-deep-model.pkl')

In [53]:
CATEGORICAL_COLUMNS = ["workclass", "education", "marital_status", "occupation", "relationship", "race", "gender", "native_country"]
NUMERIC_COLUMNS = ["age", "education_num", "capital_gain", "capital_loss", "hours_per_week"]
CSV_COLUMNS = ["age", "workclass", "fnlwgt", "education", "education_num", "marital_status", "occupation", "relationship", "race", "gender", "capital_gain", "capital_loss", "hours_per_week", "native_country", "income_bracket"]

In [54]:
X_train = pd.read_csv(train_data_path, header=None, names=CSV_COLUMNS)
X_test = pd.read_csv(test_data_path, header=None, names=CSV_COLUMNS)

In [55]:
income_train = X_train["income_bracket"]
income_test = X_test["income_bracket"]

X_train['income_bracket'] = (X_train['income_bracket'] == ' >50K').astype(int)
X_test['income_bracket'] = (X_test['income_bracket'] == ' >50K').astype(int)

Y_train = X_train["income_bracket"]
Y_test = X_test["income_bracket"]

X_train = X_train.drop('fnlwgt', axis=1)
X_test = X_test.drop('fnlwgt', axis=1)

X_train = X_train.drop('income_bracket', axis=1)
X_test = X_test.drop('income_bracket', axis=1)

Y_train.head()

0    0
1    0
2    0
3    0
4    0
Name: income_bracket, dtype: int64

In [56]:
feature_columns = []
categorical_columns = []
numeric_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = X_train[feature_name].unique()
    col = tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)
    categorical_columns.append(col)
    feature_columns.append(col)

for feature_name in NUMERIC_COLUMNS:
    col = tf.feature_column.numeric_column(feature_name, dtype=tf.float32)
    numeric_columns.append(col)
    feature_columns.append(col)

print(feature_columns)

[VocabularyListCategoricalColumn(key='workclass', vocabulary_list=(' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov', ' Local-gov', ' ?', ' Self-emp-inc', ' Without-pay', ' Never-worked'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='education', vocabulary_list=(' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th', ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th', ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th', ' Preschool', ' 12th'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='marital_status', vocabulary_list=(' Never-married', ' Married-civ-spouse', ' Divorced', ' Married-spouse-absent', ' Separated', ' Married-AF-spouse', ' Widowed'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='occupation', vocabulary_list=(' Adm-clerical', ' Exec-managerial', ' Handlers-cleaners', ' Prof-specialty', ' Other-service', ' Sal

In [57]:
def make_input_function(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)

        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function


In [59]:
train_input_function = make_input_function(X_train, Y_train)
eval_input_function = make_input_function(X_test, Y_test, num_epochs = 1, shuffle=False)

In [60]:
deep_columns = []
for col in categorical_columns:
    deep_columns.append(tf.feature_column.indicator_column(col))

for col in numeric_columns:
    deep_columns.append(col)

hidden_units = [100, 75, 50, 25]

wide_deep = tf.estimator.DNNLinearCombinedClassifier(  
        model_dir = model_path,              
        linear_feature_columns=feature_columns,
        dnn_feature_columns=deep_columns,
        dnn_hidden_units=hidden_units)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'wide-deep-model.pkl', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [61]:
input_fn = make_input_function(X_train, Y_train)
wide_deep.train(input_fn=input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wide-deep-model.pkl/model.ckpt-208952
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 208952...
INFO:tensorflow:Saving checkpoints for 208952 into wide-deep-model.pkl/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 208952...
INFO:tensorflow:loss = 0.2863754, step = 208952
INFO:tensorflow:global_step/sec: 159.465
INFO:tensorflow:loss = 0.260288, step = 209052 (0.628 sec)
INFO:tensorflow:global_step/sec: 284.73
INFO:tensorflow:loss = 0.3113423, step = 209152 (0.352 sec)
INFO:tensorflow:global_step/sec: 291.165
INFO:tensorflow:loss = 0.39931726, step = 209252 (0.343 sec)
INFO:tensorflow:global_step/sec: 294.086
INFO:tensorflow:loss = 0.55965734, step = 209352 (

<tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifierV2 at 0x7f1c2beafa30>

In [62]:
eval_fn = make_input_function(X_test, Y_test, num_epochs=1, shuffle= False)
result = wide_deep.evaluate(input_fn = eval_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-01-05T03:17:19Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wide-deep-model.pkl/model.ckpt-219132
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 2.45656s
INFO:tensorflow:Finished evaluation at 2021-01-05-03:17:21
INFO:tensorflow:Saving dict for global step 219132: accuracy = 0.8559405, accuracy_baseline = 0.7637916, auc = 0.91029245, auc_precision_recall = 0.7776968, average_loss = 0.3114694, global_step = 219132, label/mean = 0.23620838, loss = 0.31137538, precision = 0.730344, prediction/mean = 0.2421283, recall = 0.61846554
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 219132: wide-deep-model.pkl/model.ckpt-219132


In [63]:
print("Accuracy: {accuracy:0.3f}\n".format(**result))

Accuracy: 0.856



In [65]:
def prediction_input_function(features, batch_size = 256):
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)


In [68]:
income_type = ['<=50K', '>50K']

i = 5312
newX = X_test.iloc[i:i+1]
newY = income_test[i]
predictions = wide_deep.predict(input_fn = lambda: prediction_input_function(newX))

for pred_dict in predictions: 
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]   
        
    print('Prediction is "{}" ({:.1f}%), Real value: "{}""'.format(income_type[class_id], 100 * probability, newY))


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wide-deep-model.pkl/model.ckpt-219132
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Prediction is "<=50K" (97.3%), Real value: " <=50K""


In [70]:
feature_spec = tf.feature_column.make_parse_example_spec(deep_columns)

# Build receiver function, and export.
serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
export_dir = wide_deep.export_saved_model('export', serving_input_receiver_fn)
print('export dir:', export_dir)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Restoring parameters from wide-deep-model.pkl/model.ckpt-219132
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: export/temp-1609816788/saved_model.pb
export dir: b'export/1609816788'


In [71]:
imported = tf.saved_model.load(export_dir)

In [72]:
def predict(i):
    inputs = X_test[i:i+1]
    example = tf.train.Example()
    for col in inputs:
        # if col in CATEGORICAL_COLUMNS:  
        #     cat_value = np.where(categorial_map[col] ==  inputs[col].tolist()[0])[0]
        #     example.features.feature[col].float_list.value.extend(cat_value)
        if col in CATEGORICAL_COLUMNS:  
            cat_value = inputs[col].tolist()[0][0].encode('utf8')
            example.features.feature[col].bytes_list.value.extend([cat_value])
        if col in NUMERIC_COLUMNS:
            example.features.feature[col].float_list.value.extend(inputs[col])
    return imported.signatures["predict"](examples=tf.constant([example.SerializeToString()]))
 

In [73]:
predictions = predict(334)
 
class_ids = predictions['class_ids'].numpy()[0][0]
probability = predictions['probabilities'].numpy()[0][class_ids]
print('Prediction is "{}" ({:.1f}%)'.format(income_type[class_ids], 100 * probability))


Prediction is ">50K" (93.1%)
