In [1]:
# This is training the breakfast model, but using dnn instead of linear regressor

In [2]:
import tensorflow as tf
import numpy as np
import shutil
print(tf.__version__)

1.10.0


In [3]:
CSV_COLUMNS = ['num_sandwitch', 'num_salad','num_coke','total_cost']
LABEL_COLUMN = 'total_cost'
DEFAULTS = [[0], [0], [0], [0]]

def read_dataset(filename, mode, batch_size = 512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults= DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            return features, label if mode != tf.estimator.ModeKeys.PREDICT else features
        
        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        filenames_dataset = tf.data.Dataset.list_files(filename)
        # Read lines from text files
        textlines_dataset = filenames_dataset.flat_map(tf.data.TextLineDataset)
        # Parse text lines as comma-separated values (CSV)
        dataset = textlines_dataset.map(decode_csv)
        
        if mode == tf.estimator.ModeKeys.TRAIN:        # if training
            num_epochs = None # indefinitely
            dataset = dataset.shuffle(buffer_size = 10 * batch_size)
        else:        # if evaluating or validating, epochs = 1 since it doesn't have to repeat
            num_epochs = 1 # end-of-input after this
        
        # make the dataset repeat as the number of epochs, and then batch them to specific batch size
        dataset = dataset.repeat(num_epochs).batch(batch_size)
    
        return dataset.make_one_shot_iterator().get_next()        # # Creates an Iterator for enumerating the elements of this dataset
    return _input_fn

In [4]:
INPUT_COLUMNS = [
    tf.feature_column.numeric_column('num_sandwitch'),
    tf.feature_column.numeric_column('num_salad'),
    tf.feature_column.numeric_column('num_coke'),
]

def add_more_features(feats):
  # Nothing to add (yet!)
  return feats

feature_cols = add_more_features(INPUT_COLUMNS)

In [5]:
# Serving Input Function
# Defines the expected shape of the JSON feed that the model
# will receive once deployed behind a REST API in production.
def serving_input_fn():
  feature_placeholders = {
    'num_sandwitch' : tf.placeholder(tf.int32, shape=[None]),
    'num_salad' : tf.placeholder(tf.int32, shape=[None]),
    'num_coke' : tf.placeholder(tf.int32, shape=[None]),
  }
  # You can transforma data here from the input format to the format expected by your model.
  features = feature_placeholders # no transformation needed
  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)  # input_fn for training and for commercial use

In [6]:
def train_and_evaluate(output_dir, num_train_steps):
    estimator = tf.estimator.DNNRegressor(
        model_dir=output_dir,
        feature_columns=feature_cols,
        hidden_units=[32,16,8],
        optimizer=tf.train.AdamOptimizer(
            learning_rate=0.1,
        )
    )

    # train specs are parameters to run training
    train_spec=tf.estimator.TrainSpec(
                       input_fn = read_dataset('./data/breakfast_train*.csv', mode = tf.estimator.ModeKeys.TRAIN),
                       max_steps = num_train_steps)        # use max_steps here since epochs is infinite
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)

    # eval specs are parameters to run evaluation
    eval_spec=tf.estimator.EvalSpec(
                       input_fn = read_dataset('./data/breakfast_valid*.csv', mode = tf.estimator.ModeKeys.EVAL),
                       steps = None,        # no stepping because evaluation is for testing accuracy, not training
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 1,  # evaluate every N seconds
                       exporters = exporter)        # eval is designed to be realistic, so exporter instead of training dataset will be used here
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    
    return estimator

In [7]:
OUTDIR = './output_breakfast2'
#TensorBoard().start(OUTDIR)

In [12]:
# load model
estimator = tf.estimator.DNNRegressor(
    model_dir=OUTDIR,
    feature_columns=feature_cols,
    hidden_units=[32,16,8],
    optimizer=tf.train.AdamOptimizer(
        learning_rate=0.1,
    )
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './output_breakfast2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f663c770c18>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [15]:
def make_predict_input_fn(num_san, num_sal, num_cok):
    def predict_input_fn():
        """An input function for prediction"""
        # Convert the inputs to a Dataset.
        dataset = tf.data.Dataset.from_tensor_slices({
                'num_sandwitch' : [num_san],
                'num_salad' : [num_sal],
                'num_coke' : [num_cok]
            })
        dataset = dataset.batch(1)
        # Return the dataset.
        return dataset.make_one_shot_iterator().get_next()
    return predict_input_fn

In [None]:
# predict according to input
num_san = int(input('Sandwitch: '))
num_sal = int(input('Salad: '))
num_cok = int(input('Coke: '))

predictions = estimator.predict(input_fn=make_predict_input_fn(num_san,num_sal,num_cok))
print(next(predictions))

Sandwitch: 10
Salad: 10


In [None]:
# Run training
# only run this when you want to re-train
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
estimator = train_and_evaluate(OUTDIR, num_train_steps = 80000)