## Import libraries 

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
model_path = "./chk"

## Define feature columns

In [3]:
_HASH_BUCKET_SIZE = 1000

# Continuous variable columns
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education-num')
capital_gain = tf.feature_column.numeric_column('capital-gain')
capital_loss = tf.feature_column.numeric_column('capital-loss')
hours_per_week = tf.feature_column.numeric_column('hours-per-week')
gender = tf.feature_column.categorical_column_with_vocabulary_list(
      'sex', ['Male', 'Female'])
education = tf.feature_column.categorical_column_with_vocabulary_list(
      'education', ['Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
          'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
          '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])

marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
      'marital-status', ['Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
          'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])

relationship = tf.feature_column.categorical_column_with_vocabulary_list(
      'relationship', ['Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
          'Other-relative'])

workclass = tf.feature_column.categorical_column_with_vocabulary_list(
      'workclass', ['Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
          'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])

# To show an example of hashing:
occupation = tf.feature_column.categorical_column_with_hash_bucket(
      'occupation', hash_bucket_size=_HASH_BUCKET_SIZE)

# Transformations.
age_buckets = tf.feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

# Wide columns and deep columns.
base_columns = [education, marital_status, relationship, workclass, occupation,age_buckets,gender,]

crossed_columns = [
      tf.feature_column.crossed_column(['education', 'occupation'], hash_bucket_size=_HASH_BUCKET_SIZE),
      tf.feature_column.crossed_column([age_buckets, 'education', 'occupation'],hash_bucket_size=_HASH_BUCKET_SIZE),]

wide_columns = base_columns + crossed_columns

deep_columns = [
      age,
      education_num,
      capital_gain,
      capital_loss,
      hours_per_week,
      tf.feature_column.indicator_column(gender),
      tf.feature_column.indicator_column(workclass),
      tf.feature_column.indicator_column(education),
      tf.feature_column.indicator_column(marital_status),
      tf.feature_column.indicator_column(relationship),
      # To show an example of embedding
      tf.feature_column.embedding_column(occupation, dimension=8),
  ]


In [4]:
hidden_units = [100, 75, 50, 25]

## Use DNN classifier

In [None]:
classifier = tf.estimator.DNNClassifier(
        feature_columns=deep_columns,
        hidden_units=hidden_units,
        n_classes=2)

## Use Deep & Wide model

In [5]:
classifier = tf.estimator.DNNLinearCombinedClassifier(
        model_dir=model_path,
        linear_feature_columns=wide_columns,
        dnn_feature_columns=deep_columns,
        dnn_hidden_units=hidden_units,
        warm_start_from=model_path)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './chk', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f6c60f8ef98>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## Setup training & evaluation pipeline

In [6]:
NUM_EPOCHS = 30
BUFFER_SIZE= 32561

def parse_func(s_example):
    features = {
                'workclass': tf.FixedLenFeature((), tf.string),
                'age': tf.FixedLenFeature((), tf.int64),
                'fnlwgt': tf.FixedLenFeature((), tf.int64),
                'education': tf.FixedLenFeature((), tf.string),
                'education-num': tf.FixedLenFeature((), tf.int64),
                'marital-status': tf.FixedLenFeature((), tf.string),
                'occupation': tf.FixedLenFeature((), tf.string),
                'relationship': tf.FixedLenFeature((), tf.string),
                'race': tf.FixedLenFeature((), tf.string),
                'sex': tf.FixedLenFeature((), tf.string),
                'capital-gain': tf.FixedLenFeature((), tf.int64),
                'capital-loss': tf.FixedLenFeature((), tf.int64),
                'hours-per-week': tf.FixedLenFeature((), tf.int64),
                'native-country': tf.FixedLenFeature((), tf.string),
                'label': tf.FixedLenFeature((), tf.string)
                 }
    example = tf.parse_single_example(s_example, features=features)
    return example

def transform(features):
    for key in ['workclass','education','marital-status','occupation','relationship','race','sex','native-country','label']:
        features[key] = tf.strings.strip(features[key])
    labels= features.pop('label')
    return features,tf.cast(tf.logical_or(tf.equal(labels,'>50K'),tf.equal(labels,'>50K.')),tf.int32)


def eval_input_fn(batch_size):
    return tf.data.TFRecordDataset('census_test.tfrecord').map(parse_func).map(transform).batch(batch_size)
def train_input_fn(batch_size):
    return tf.data.TFRecordDataset('census.tfrecord').shuffle(BUFFER_SIZE).repeat(NUM_EPOCHS).map(parse_func).map(transform).batch(batch_size)



## Train the model for 30 epochs

In [11]:
r = classifier.train(input_fn=lambda:train_input_fn(256))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='./chk', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: ('./chk',)
INFO:tensorflow:Warm-starting variable: dnn/input_from_feature_columns/input_layer/occupation_embedding/embedding_weights; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dnn/hiddenlayer_0/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dnn/hiddenlayer_0/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dnn/hiddenlayer_1/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dnn/hiddenlayer_1/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dnn/hiddenlayer_2/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dnn/hiddenlayer_2/bias; prev_var_name: Unchanged
INFO:

## Evaluate the model 

In [7]:
eval_result = classifier.evaluate(input_fn=lambda:eval_input_fn(256))
eval_result

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-12-08-23:11:56
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./chk/model.ckpt-7632
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-12-08-23:11:58
INFO:tensorflow:Saving dict for global step 7632: accuracy = 0.85559857, accuracy_baseline = 0.76377374, auc = 0.9103634, auc_precision_recall = 0.78067905, average_loss = 0.31023094, global_step = 7632, label/mean = 0.23622628, loss = 78.919846, precision = 0.7240036, prediction/mean = 0.24358582, recall = 0.62818515
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 7632: ./chk/model.ckpt-7632


{'accuracy': 0.85559857,
 'accuracy_baseline': 0.76377374,
 'auc': 0.9103634,
 'auc_precision_recall': 0.78067905,
 'average_loss': 0.31023094,
 'label/mean': 0.23622628,
 'loss': 78.919846,
 'precision': 0.7240036,
 'prediction/mean': 0.24358582,
 'recall': 0.62818515,
 'global_step': 7632}

## Save the model

In [None]:
feature_spec = {
                'workclass': tf.FixedLenFeature((), tf.string),
                'age': tf.FixedLenFeature((), tf.int64),
                'education': tf.FixedLenFeature((), tf.string),
                'education-num': tf.FixedLenFeature((), tf.int64),
                'marital-status': tf.FixedLenFeature((), tf.string),
                'occupation': tf.FixedLenFeature((), tf.string),
                'relationship': tf.FixedLenFeature((), tf.string),
                'sex': tf.FixedLenFeature((), tf.string),
                'capital-gain': tf.FixedLenFeature((), tf.int64),
                'capital-loss': tf.FixedLenFeature((), tf.int64),
                'hours-per-week': tf.FixedLenFeature((), tf.int64),
                 }
"""
my_feature_columns = [age,education_num,capital_gain,capital_loss,occupation,workclass,relationship,marital_status,
                  education,hours_per_week]
feature_spec= tf.feature_column.make_parse_example_spec(my_feature_columns)
"""
export_input_fn= tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
export_dir = classifier.export_savedmodel(export_dir_base="./saved_model",serving_input_receiver_fn=export_input_fn)

### Show the path to the saved model

In [None]:
export_dir