# Setup

- uses functions from [utils](https://github.com/IanQS/blogpostcode/blob/master/src/Tf_Exploration/exploration/utils.py)

# Custom Estimator

Do yourself a favor and read [Creating Custom Estimators](https://www.tensorflow.org/guide/custom_estimators) which work BEAUTIFULLY with the `tf.data.Datasets`. 

Yeah, it's not as "low level" but if all you care about is defining your custom #leet #complex model, the custom estimator handles all the cruft of getting it from experiment to production

But of course, I'll show you how to make one here ;) 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

#tf.enable_eager_execution()
import numpy as np
import os
import datetime
import tqdm
import sys
import pprint

from Tf_Exploration.exploration.utils import FeatureProto, dataset_config

1.12.0


# Setup

1) Get the training and testing data

2) get the feature columns

In [3]:
# Get the training data

filename_list = []
evaluation_list = []
for root, dirs, files in os.walk('processed_data/'):
    for f in files:
        path = os.path.join(root, f)
        if "tfrecord" in f:
            if "train" in f:
                filename_list.append(path)
            elif "test" in f:
                evaluation_list.append(path)
            else:
                print('Unmatched: {}'.format(path))
        
print("Training Data")
pprint.pprint(filename_list)
print('*' * 10)
print("\nEvaluation Data")
pprint.pprint(evaluation_list)


# Instantiate our FeatureProto from Tutorial 1: https://github.com/IanQS/blogpostcode/blob/master/src/Tf_Exploration/exploration/Notebook1-data_exploration.ipynb
# to get the columns

feature_proto = FeatureProto(one_hot=False)
columns = feature_proto.get_feature_columns()

Training Data
['processed_data/covtype_train_1_2019-01-06_02:00:00.tfrecord',
 'processed_data/covtype_train_3_2019-01-06_02:00:00.tfrecord',
 'processed_data/covtype_train_0_2019-01-06_02:00:00.tfrecord',
 'processed_data/covtype_train_2_2019-01-06_02:00:00.tfrecord']
**********

Evaluation Data
['processed_data/covtype_test_2019-01-06_02:00:00.tfrecord']


# Canned Estimator

In [4]:
# Util functions
def wrap_training_data(tf_record_train_list, feature_proto, num_cpus):
    def input_fn_train(): # returns x, y (where y represents label's class index).
        return dataset_config(filenames=tf_record_train_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus, return_dataset=True)
    return input_fn_train

def wrap_testing_data(tf_record_test_list, feature_proto, num_cpus):
    def input_fn_eval(): # returns x, y (where y represents label's class index).
        return dataset_config(filenames=tf_record_test_list, batch_size=2048, mapper=feature_proto.unpack, num_cpus=num_cpus, return_dataset=True)
    return input_fn_eval


num_cpus = os.cpu_count()

input_fn_train = wrap_training_data(filename_list, feature_proto, num_cpus)
input_fn_eval = wrap_testing_data(evaluation_list, feature_proto, num_cpus)

def evaluate(estimator):
    # Fit model.
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)
    estimator.train(input_fn=input_fn_train)
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)

In [5]:
from tensorflow.estimator import DNNClassifier

In [6]:


estimator = DNNClassifier(
    feature_columns=columns,
    model_dir='save_dir/canned_fuk/',
    n_classes=8,
    hidden_units=[256, 16],
    optimizer=lambda: tf.train.AdamOptimizer(
        learning_rate=tf.train.exponential_decay(
            learning_rate=0.1,
            global_step=tf.train.get_global_step(),
            decay_steps=10000,
            decay_rate=0.96)
    )
)

evaluate(estimator)

{'accuracy': 0.32639432, 'average_loss': 190.72383, 'loss': 388818.97, 'global_step': 0}
{'accuracy': 0.48942798, 'average_loss': 1.205737, 'loss': 2458.0745, 'global_step': 5701}


# Baseline Estimator

In [7]:
from tensorflow.estimator import BaselineClassifier

classifier = BaselineClassifier(n_classes=8, model_dir='save_dir/baseline')

evaluate(classifier)

{'accuracy': 0.0, 'average_loss': 2.079439, 'loss': 4239.246, 'global_step': 0}
{'accuracy': 0.48942798, 'average_loss': 1.2047182, 'loss': 2455.9978, 'global_step': 5701}


# Custom Estimator

In [8]:
# Util functions
def wrap_training_data(tf_record_train_list, feature_proto, num_cpus):
    def input_fn_train(): # returns x, y (where y represents label's class index).
        return dataset_config(filenames=tf_record_train_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus)
    return input_fn_train

def wrap_testing_data(tf_record_test_list, feature_proto, num_cpus):
    def input_fn_eval(): # returns x, y (where y represents label's class index).
        return dataset_config(filenames=tf_record_test_list, batch_size=2048, mapper=feature_proto.unpack, num_cpus=num_cpus)
    return input_fn_eval


num_cpus = os.cpu_count()

input_fn_train = wrap_training_data(filename_list, feature_proto, num_cpus)
input_fn_eval = wrap_testing_data(evaluation_list, feature_proto, num_cpus)

def evaluate(estimator):
    # Fit model.
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)
    estimator.train(input_fn=input_fn_train)
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)

In [11]:
def model_definition(features, feature_columns, labels):
    """
    Implementation of your #leet model
    
    Params:
        input_layer: tf.feature_column.input_layer
    
    returns logits
    """
    
    # Define your network
    print(features)
    print(feature_columns)
    print(labels)
    input_layer = tf.feature_column.input_layer(features, feature_columns)
    layer_1 = tf.layers.Dense(256, activation=tf.nn.relu)(input_layer)
    layer_2 = tf.layers.Dense(16, activation=tf.nn.relu)(layer_1)
    logits = tf.layers.Dense(8)(layer_2)
    
    
    # Define your prediction, loss, accuracy, and train_op
    predictions = {'Class_ID': tf.argmax(input=logits, axis=1)}

    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits))
    accuracy = tf.metrics.accuracy(labels, predictions['Class_ID'])

    optimizer = tf.train.AdamOptimizer(0.01)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return predictions, loss, accuracy, train_op
    

def model_wrapper(feature_proto, abstract_model):
    def custom_model(features, labels, mode):
        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.logging.info("my_model_fn: PREDICT, {}".format(mode))
        elif mode == tf.estimator.ModeKeys.EVAL:
            tf.logging.info("my_model_fn: EVAL, {}".format(mode))
        elif mode == tf.estimator.ModeKeys.TRAIN:
            tf.logging.info("my_model_fn: TRAIN, {}".format(mode))
        
        predictions, loss, accuracy, train_op = model_definition(
            features, feature_proto.get_feature_columns(), labels
        )

        # Prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, 
                loss=loss, 
                eval_metric_ops={'custom_accuracy': accuracy}
            )
        
        # Track the accuracy while in training mode
        tf.summary.scalar('my_accuracy', accuracy[1])
        return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
    return custom_model


classifier = tf.estimator.Estimator(
    model_fn=model_wrapper(feature_proto, model_definition),
    model_dir='save_dir/custom/'
)

evaluate(classifier)


{'Aspect': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float32>, 'Elevation': <tf.Tensor 'IteratorGetNext:1' shape=(?,) dtype=float32>, 'Hillshade_3pm': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=float32>, 'Hillshade_9am': <tf.Tensor 'IteratorGetNext:3' shape=(?,) dtype=float32>, 'Hillshade_Noon': <tf.Tensor 'IteratorGetNext:4' shape=(?,) dtype=float32>, 'Horizontal_Distance_To_Fire_Points': <tf.Tensor 'IteratorGetNext:5' shape=(?,) dtype=float32>, 'Horizontal_Distance_To_Hydrology': <tf.Tensor 'IteratorGetNext:6' shape=(?,) dtype=float32>, 'Horizontal_Distance_To_Roadways': <tf.Tensor 'IteratorGetNext:7' shape=(?,) dtype=float32>, 'Slope': <tf.Tensor 'IteratorGetNext:8' shape=(?,) dtype=float32>, 'Soil_Type': <tf.Tensor 'IteratorGetNext:9' shape=(?, 40) dtype=float32>, 'Vertical_Distance_To_Hydrology': <tf.Tensor 'IteratorGetNext:10' shape=(?,) dtype=float32>, 'Wilderness_Area': <tf.Tensor 'IteratorGetNext:11' shape=(?,) dtype=float32>}
[_NumericColumn(key='Elevation', shape=

InvalidArgumentError: logits and labels must be broadcastable: logits_size=[2048,8] labels_size=[256,8]
	 [[node softmax_cross_entropy_with_logits (defined at <ipython-input-11-e7ef0127e24f>:24)  = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](dense_2/BiasAdd, softmax_cross_entropy_with_logits/Reshape_1)]]

Caused by op 'softmax_cross_entropy_with_logits', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3191, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-e7ef0127e24f>", line 67, in <module>
    evaluate(classifier)
  File "<ipython-input-8-a53461c50d8d>", line 20, in evaluate
    fit = estimator.evaluate(input_fn=input_fn_eval)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 478, in evaluate
    return _evaluate()
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 460, in _evaluate
    self._evaluate_build_graph(input_fn, hooks, checkpoint_path))
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1484, in _evaluate_build_graph
    self._call_model_fn_eval(input_fn, self.config))
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1520, in _call_model_fn_eval
    features, labels, model_fn_lib.ModeKeys.EVAL, config)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1195, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "<ipython-input-11-e7ef0127e24f>", line 42, in custom_model
    features, feature_proto.get_feature_columns(), labels
  File "<ipython-input-11-e7ef0127e24f>", line 24, in model_definition
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits))
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1864, in softmax_cross_entropy_with_logits_v2
    precise_logits, labels, name=name)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 7210, in softmax_cross_entropy_with_logits
    name=name)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/itq/.virtualenvs/test/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): logits and labels must be broadcastable: logits_size=[2048,8] labels_size=[256,8]
	 [[node softmax_cross_entropy_with_logits (defined at <ipython-input-11-e7ef0127e24f>:24)  = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](dense_2/BiasAdd, softmax_cross_entropy_with_logits/Reshape_1)]]
