# Setup

- uses functions from [utils](https://github.com/IanQS/blogpostcode/blob/master/src/Tf_Exploration/exploration/utils.py)

# Custom Estimator

Do yourself a favor and read [Creating Custom Estimators](https://www.tensorflow.org/guide/custom_estimators) which work BEAUTIFULLY with the `tf.data.Datasets`. 

Yeah, it's not as "low level" but if all you care about is defining your custom #leet #complex model, the custom estimator handles all the cruft of getting it from experiment to production

But of course, I'll show you how to make one here ;) 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

#tf.enable_eager_execution()
import numpy as np
import os
import datetime
import tqdm
import sys
import pprint

from Tf_Exploration.exploration.utils import FeatureProto, dataset_config

1.12.0


# Setup

1) Get the training and testing data

2) get the feature columns

In [3]:
# Get the training data

filename_list = []
evaluation_list = []
for root, dirs, files in os.walk('processed_data/'):
    for f in files:
        path = os.path.join(root, f)
        if "tfrecord" in f:
            if "train" in f:
                filename_list.append(path)
            elif "test" in f:
                evaluation_list.append(path)
            else:
                print('Unmatched: {}'.format(path))
        
print("Training Data")
pprint.pprint(filename_list)
print('*' * 10)
print("\nEvaluation Data")
pprint.pprint(evaluation_list)


# Instantiate our FeatureProto from Tutorial 1: https://github.com/IanQS/blogpostcode/blob/master/src/Tf_Exploration/exploration/Notebook1-data_exploration.ipynb
# to get the columns

feature_proto = FeatureProto(one_hot=False)
columns = feature_proto.get_feature_columns()

Training Data
['processed_data/covtype_train_1_2019-01-06_02:00:00.tfrecord',
 'processed_data/covtype_train_3_2019-01-06_02:00:00.tfrecord',
 'processed_data/covtype_train_0_2019-01-06_02:00:00.tfrecord',
 'processed_data/covtype_train_2_2019-01-06_02:00:00.tfrecord']
**********

Evaluation Data
['processed_data/covtype_test_2019-01-06_02:00:00.tfrecord']


# Canned Estimator

In [4]:
# Util functions
def wrap_training_data(tf_record_train_list, feature_proto, num_cpus):
    def input_fn_train(): # returns x, y (where y represents label's class index).
        return dataset_config(filenames=tf_record_train_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus)
    return input_fn_train

def wrap_testing_data(tf_record_test_list, feature_proto, num_cpus):
    def input_fn_eval(): # returns x, y (where y represents label's class index).
        return dataset_config(filenames=tf_record_test_list, batch_size=2048, mapper=feature_proto.unpack, num_cpus=num_cpus)
    return input_fn_eval


num_cpus = os.cpu_count()

input_fn_train = wrap_training_data(filename_list, feature_proto, num_cpus)
input_fn_eval = wrap_testing_data(evaluation_list, feature_proto, num_cpus)

def evaluate(estimator):
    # Fit model.
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)
    estimator.train(input_fn=input_fn_train)
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)

In [5]:
from tensorflow.estimator import DNNClassifier

In [6]:


estimator = DNNClassifier(
    feature_columns=columns,
    model_dir='save_dir/canned_fuk/',
    n_classes=8,
    hidden_units=[256, 16],
    optimizer=lambda: tf.train.AdamOptimizer(
        learning_rate=tf.train.exponential_decay(
            learning_rate=0.1,
            global_step=tf.train.get_global_step(),
            decay_steps=10000,
            decay_rate=0.96)
    )
)

evaluate(estimator)

{'accuracy': 0.48739704, 'average_loss': 223.28908, 'loss': 455208.06, 'global_step': 0}
{'accuracy': 0.48942798, 'average_loss': 1.211182, 'loss': 2469.175, 'global_step': 5701}


# Baseline Estimator

In [7]:
from tensorflow.estimator import BaselineClassifier

classifier = BaselineClassifier(n_classes=8, model_dir='save_dir/baseline')

evaluate(classifier)

{'accuracy': 0.0, 'average_loss': 2.079439, 'loss': 4239.246, 'global_step': 0}
{'accuracy': 0.48942798, 'average_loss': 1.2046517, 'loss': 2455.862, 'global_step': 5701}


# Custom Estimator

In [5]:
def model_definition(features, feature_columns, labels):
    """
    Implementation of your #leet model
    
    Params:
        input_layer: tf.feature_column.input_layer
    
    returns logits
    """
    
    # Define your network
    input_layer = tf.feature_column.input_layer(features, feature_columns)
    layer_1 = tf.layers.Dense(256, activation=tf.nn.relu)(input_layer)
    layer_2 = tf.layers.Dense(16, activation=tf.nn.relu)(layer_1)
    logits = tf.layers.Dense(8)(layer_2)
    
    
    # Define your prediction, loss, accuracy, and train_op
    predictions = {'Class_ID': tf.argmax(input=logits, axis=1)}

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    accuracy = tf.metrics.accuracy(labels, predictions['Class_ID'])

    optimizer = tf.train.AdamOptimizer(0.01)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return predictions, loss, accuracy, train_op
    

def model_wrapper(feature_proto, abstract_model):
    def custom_model(features, labels, mode):
        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.logging.info("my_model_fn: PREDICT, {}".format(mode))
        elif mode == tf.estimator.ModeKeys.EVAL:
            tf.logging.info("my_model_fn: EVAL, {}".format(mode))
        elif mode == tf.estimator.ModeKeys.TRAIN:
            tf.logging.info("my_model_fn: TRAIN, {}".format(mode))
        
        predictions, loss, accuracy, train_op = model_definition(
            features, feature_proto.get_feature_columns(), labels
        )

        # Prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, 
                loss=loss, 
                eval_metric_ops={'custom_accuracy': accuracy}
            )
        
        # Track the accuracy while in training mode
        tf.summary.scalar('my_accuracy', accuracy[1])
        return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
    return custom_model


classifier = tf.estimator.Estimator(
    model_fn=model_wrapper(feature_proto, model_definition),
    model_dir='save_dir/custom/'
)

evaluate(classifier)


{'custom_accuracy': 0.32595545, 'loss': 132.07243, 'global_step': 0}
{'custom_accuracy': 0.48942798, 'loss': 1.2051108, 'global_step': 5701}
