## References

* [Intro to feature engineering with TensorFlow](https://www.youtube.com/watch?v=d12ra3b_M-0) (Josh Gordon, YouTube)

In [None]:
import tensorflow as tf

from problem import column_defaults

tf.set_random_seed(0)

In [None]:
defaults = column_defaults(categorical_names=['Income'])

In [None]:
def clean(line):
    return tf.not_equal(tf.size(tf.string_split([line], ',').values), 0)

def decode(line):
    parsed = tf.decode_csv(line, list(defaults.values()))
    return dict(zip(defaults.keys(), parsed))

def create_train_input_fn(path, batch_size=32, buffer_size=1000):
    def function():    
        dataset = (
            tf.data.TextLineDataset(path)
                .filter(clean)
                .map(decode)
                .shuffle(buffer_size=buffer_size)
                .repeat()
                .batch(batch_size)
        )
        columns = dataset.make_one_shot_iterator().get_next()
        income = tf.equal(columns.pop('Income'), ' >50K') 
        return columns, income
    return function

def create_test_input_fn(path, batch_size=32):
    def function():    
        dataset = (
            tf.contrib.data.TextLineDataset(path)
                .skip(1)
                .filter(clean)
                .map(decode)
                .batch(batch_size)
        )
        columns = dataset.make_one_shot_iterator().get_next()
        income = tf.equal(columns.pop('Income'), ' >50K.') 
        return columns, income
    return function

In [None]:
train_input_fn = create_train_input_fn('data/train.csv')
test_input_fn = create_test_input_fn('data/test.csv')

In [None]:
feature_columns = [
    tf.feature_column.numeric_column('Age'),
]

estimator = tf.estimator.DNNClassifier(
    hidden_units=[256, 128, 64], feature_columns=feature_columns, 
    n_classes=2, model_dir='/tmp/model/deep')

estimator.train(input_fn=train_input_fn, steps=2000)

In [None]:
estimator.evaluate(input_fn=test_input_fn)