In [1]:
# This is a test to use tf.estimator.linearregressor to predict the price of a breakfast
# The cost of the breakfast are shown:
#      Sandwich Salad Coke
# Cost    60     50    25
# All could be bought at the same time, but not more than 100 each

In [2]:
import tensorflow as tf
import numpy as np
import shutil
#import tensorboard.TensorBoard
print(tf.__version__)

1.10.0


In [3]:
CSV_COLUMNS = ['num_sandwitch', 'num_salad','num_coke','total_cost']
LABEL_COLUMN = 'total_cost'
DEFAULTS = [[0], [0], [0], [0]]

def read_dataset(filename, mode, batch_size = 512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults= DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            return features, label if mode != tf.estimator.ModeKeys.PREDICT else features
        
        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        filenames_dataset = tf.data.Dataset.list_files(filename)
        # Read lines from text files
        textlines_dataset = filenames_dataset.flat_map(tf.data.TextLineDataset)
        # Parse text lines as comma-separated values (CSV)
        dataset = textlines_dataset.map(decode_csv)
        
        if mode == tf.estimator.ModeKeys.TRAIN:        # if training
            num_epochs = None # indefinitely
            dataset = dataset.shuffle(buffer_size = 10 * batch_size)
        else:        # if evaluating or validating, epochs = 1 since it doesn't have to repeat
            num_epochs = 1 # end-of-input after this
        
        # make the dataset repeat as the number of epochs, and then batch them to specific batch size
        dataset = dataset.repeat(num_epochs).batch(batch_size)
    
        return dataset.make_one_shot_iterator().get_next()        # # Creates an Iterator for enumerating the elements of this dataset
    return _input_fn
    

In [4]:
INPUT_COLUMNS = [
    tf.feature_column.numeric_column('num_sandwitch'),
    tf.feature_column.numeric_column('num_salad'),
    tf.feature_column.numeric_column('num_coke'),
]

def add_more_features(feats):
  # Nothing to add (yet!)
  return feats

feature_cols = add_more_features(INPUT_COLUMNS)

In [5]:
# Serving Input Function
# Defines the expected shape of the JSON feed that the model
# will receive once deployed behind a REST API in production.
def serving_input_fn():
  feature_placeholders = {
    'num_sandwitch' : tf.placeholder(tf.int32, shape=[None]),
    'num_salad' : tf.placeholder(tf.int32, shape=[None]),
    'num_coke' : tf.placeholder(tf.int32, shape=[None]),
  }
  # You can transforma data here from the input format to the format expected by your model.
  features = feature_placeholders # no transformation needed
  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)  # input_fn for training and for commercial use

In [31]:
def train_and_evaluate(output_dir, num_train_steps):
    estimator = tf.estimator.LinearRegressor(
                       model_dir = output_dir,
                       feature_columns = feature_cols)

    # train specs are parameters to run training
    train_spec=tf.estimator.TrainSpec(
                       input_fn = read_dataset('./data/breakfast_train*.csv', mode = tf.estimator.ModeKeys.TRAIN),
                       max_steps = num_train_steps)        # use max_steps here since epochs is infinite
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)

    # eval specs are parameters to run evaluation
    eval_spec=tf.estimator.EvalSpec(
                       input_fn = read_dataset('./data/breakfast_valid*.csv', mode = tf.estimator.ModeKeys.EVAL),
                       steps = None,        # no stepping because evaluation is for testing accuracy, not training
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 2,  # evaluate every N seconds
                       exporters = exporter)        # eval is designed to be realistic, so exporter instead of training dataset will be used here
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    
    return estimator

In [7]:
OUTDIR = './output_breakfast'
#TensorBoard().start(OUTDIR)

In [33]:
# Run training    
#shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
estimator = train_and_evaluate(OUTDIR, num_train_steps = 120000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './output_breakfast', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4b7a094c88>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
INFO:tensorflow:Calling 

INFO:tensorflow:loss = 78065100.0, step = 86801 (0.949 sec)
INFO:tensorflow:global_step/sec: 102.952
INFO:tensorflow:loss = 86783570.0, step = 86901 (0.971 sec)
INFO:tensorflow:global_step/sec: 106.302
INFO:tensorflow:loss = 79901280.0, step = 87001 (0.941 sec)
INFO:tensorflow:global_step/sec: 103.767
INFO:tensorflow:loss = 81114290.0, step = 87101 (0.964 sec)
INFO:tensorflow:global_step/sec: 103.026
INFO:tensorflow:loss = 78979630.0, step = 87201 (0.971 sec)
INFO:tensorflow:global_step/sec: 105.22
INFO:tensorflow:loss = 84840720.0, step = 87301 (0.950 sec)
INFO:tensorflow:global_step/sec: 103.266
INFO:tensorflow:loss = 82291460.0, step = 87401 (0.968 sec)
INFO:tensorflow:global_step/sec: 105.061
INFO:tensorflow:loss = 79996140.0, step = 87501 (0.952 sec)
INFO:tensorflow:global_step/sec: 104.88
INFO:tensorflow:loss = 82912580.0, step = 87601 (0.953 sec)
INFO:tensorflow:global_step/sec: 105.372
INFO:tensorflow:loss = 85779420.0, step = 87701 (0.949 sec)
INFO:tensorflow:global_step/sec: 

INFO:tensorflow:global_step/sec: 104.774
INFO:tensorflow:loss = 75774320.0, step = 95001 (0.954 sec)
INFO:tensorflow:global_step/sec: 106.328
INFO:tensorflow:loss = 74497680.0, step = 95101 (0.940 sec)
INFO:tensorflow:global_step/sec: 104.899
INFO:tensorflow:loss = 75069410.0, step = 95201 (0.954 sec)
INFO:tensorflow:global_step/sec: 106.153
INFO:tensorflow:loss = 70693090.0, step = 95301 (0.942 sec)
INFO:tensorflow:global_step/sec: 105.18
INFO:tensorflow:loss = 69526100.0, step = 95401 (0.951 sec)
INFO:tensorflow:global_step/sec: 105.359
INFO:tensorflow:loss = 76321896.0, step = 95501 (0.949 sec)
INFO:tensorflow:global_step/sec: 105.194
INFO:tensorflow:loss = 76736220.0, step = 95601 (0.951 sec)
INFO:tensorflow:global_step/sec: 112.468
INFO:tensorflow:loss = 69455256.0, step = 95701 (0.889 sec)
INFO:tensorflow:global_step/sec: 99.1073
INFO:tensorflow:loss = 60549770.0, step = 95801 (1.009 sec)
INFO:tensorflow:global_step/sec: 104.647
INFO:tensorflow:loss = 77023890.0, step = 95901 (0.

INFO:tensorflow:global_step/sec: 105.028
INFO:tensorflow:loss = 64953320.0, step = 103101 (0.952 sec)
INFO:tensorflow:global_step/sec: 113.597
INFO:tensorflow:loss = 67711416.0, step = 103201 (0.880 sec)
INFO:tensorflow:global_step/sec: 99.0343
INFO:tensorflow:loss = 63989550.0, step = 103301 (1.010 sec)
INFO:tensorflow:global_step/sec: 105.418
INFO:tensorflow:loss = 68924100.0, step = 103401 (0.948 sec)
INFO:tensorflow:global_step/sec: 106.116
INFO:tensorflow:loss = 64000440.0, step = 103501 (0.942 sec)
INFO:tensorflow:global_step/sec: 99.505
INFO:tensorflow:loss = 62298490.0, step = 103601 (1.005 sec)
INFO:tensorflow:global_step/sec: 90.03
INFO:tensorflow:loss = 65521988.0, step = 103701 (1.111 sec)
INFO:tensorflow:global_step/sec: 94.3621
INFO:tensorflow:loss = 62798456.0, step = 103801 (1.059 sec)
INFO:tensorflow:global_step/sec: 97.927
INFO:tensorflow:loss = 61786590.0, step = 103901 (1.021 sec)
INFO:tensorflow:global_step/sec: 105.47
INFO:tensorflow:loss = 58627070.0, step = 1040

INFO:tensorflow:global_step/sec: 106.417
INFO:tensorflow:loss = 57145196.0, step = 111201 (0.940 sec)
INFO:tensorflow:global_step/sec: 105.765
INFO:tensorflow:loss = 58962196.0, step = 111301 (0.945 sec)
INFO:tensorflow:global_step/sec: 109.563
INFO:tensorflow:loss = 53539548.0, step = 111401 (0.913 sec)
INFO:tensorflow:global_step/sec: 100.319
INFO:tensorflow:loss = 53552770.0, step = 111501 (0.997 sec)
INFO:tensorflow:global_step/sec: 93.8767
INFO:tensorflow:loss = 54094260.0, step = 111601 (1.065 sec)
INFO:tensorflow:global_step/sec: 107.334
INFO:tensorflow:loss = 58410104.0, step = 111701 (0.932 sec)
INFO:tensorflow:global_step/sec: 104.842
INFO:tensorflow:loss = 53876150.0, step = 111801 (0.954 sec)
INFO:tensorflow:global_step/sec: 104.358
INFO:tensorflow:loss = 52969504.0, step = 111901 (0.958 sec)
INFO:tensorflow:global_step/sec: 104.232
INFO:tensorflow:loss = 54414590.0, step = 112001 (0.959 sec)
INFO:tensorflow:global_step/sec: 106.864
INFO:tensorflow:loss = 57694576.0, step =

INFO:tensorflow:global_step/sec: 104.993
INFO:tensorflow:loss = 47161000.0, step = 119301 (0.952 sec)
INFO:tensorflow:global_step/sec: 105.573
INFO:tensorflow:loss = 46496840.0, step = 119401 (0.947 sec)
INFO:tensorflow:global_step/sec: 103.938
INFO:tensorflow:loss = 48380656.0, step = 119501 (0.962 sec)
INFO:tensorflow:global_step/sec: 106.092
INFO:tensorflow:loss = 46483830.0, step = 119601 (0.943 sec)
INFO:tensorflow:global_step/sec: 104.849
INFO:tensorflow:loss = 47615736.0, step = 119701 (0.954 sec)
INFO:tensorflow:global_step/sec: 105.018
INFO:tensorflow:loss = 52835628.0, step = 119801 (0.952 sec)
INFO:tensorflow:global_step/sec: 97.817
INFO:tensorflow:loss = 48690804.0, step = 119901 (1.022 sec)
INFO:tensorflow:Saving checkpoints for 120000 into ./output_breakfast/model.ckpt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-26-05:52:00
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters

In [1]:
def predict_input_fn():
    """An input function for prediction"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices({
            'num_sandwitch' : [1],
            'num_salad' : [1],
            'num_coke' : [1]
        })
    dataset = dataset.batch(1)
    # Return the dataset.
    return dataset.make_one_shot_iterator().get_next()  

In [2]:
predictions = estimator.predict(input_fn=predict_input_fn)
print(list(predictions))

NameError: name 'estimator' is not defined

In [9]:
# predictions 
predictions = estimator.predict(input_fn=read_dataset('./data/breakfast_test*.csv', mode=tf.estimator.ModeKeys.PREDICT, batch_size=1))
for i in range(20):
    print(next(predictions))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./output_breakfast/model.ckpt-40000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'predictions': array([5533.984], dtype=float32)}
{'predictions': array([3498.2986], dtype=float32)}
{'predictions': array([4799.309], dtype=float32)}
{'predictions': array([8631.371], dtype=float32)}
{'predictions': array([4545.279], dtype=float32)}
{'predictions': array([5836.6973], dtype=float32)}
{'predictions': array([7047.3623], dtype=float32)}
{'predictions': array([7040.2373], dtype=float32)}
{'predictions': array([10368.211], dtype=float32)}
{'predictions': array([6215.503], dtype=float32)}
{'predictions': array([3406.0999], dtype=float32)}
{'predictions': array([5516.6665], dtype=float32)}
{'predictions': array([5288.248], dtype=float32)}
{'predictions': array([7645.789], dtype=float32)}
{'predictions': array(

In [None]:
# to list Tensorboard instances
#TensorBoard().list()

In [8]:
# load last checkpoint
estimator = tf.estimator.LinearRegressor(model_dir=OUTDIR, feature_columns=feature_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './output_breakfast', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4b806f9cf8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
