本节主要目的是学习自定义input_fn。其目的在于，在把数据提供给模型之前，可以进行更多的逻辑处理。

In [1]:
def my_input_fn():
    # Preprocess your data here...
    
    # ... then return 1) a mapping of feature columns to Tensors with 
    # the corresponding feature data, and 
    # 2) a Tensor containing labels 
    return feature_cols, labels

pandas dataframes和numpy arrays需先转化为tensor才能作为输入数据

In [2]:
import tensorflow as tf

In [3]:
# for continuous data, you can create and populate a Tensor using tf.constant:
feature_column_data = [1, 2.4, 0, 9.9, 3, 120]
feature_tensor = tf.constant(feature_column_data)

In [5]:
# for sparse, catgoricaldata(data where the majority of values are 0), you'll instead want to populate a SparseTensor, 
# tf.SparseTensor(indices, values, dense_shape)
# indices: the indices of the elements in your tensor that contain nonzero values. Takes a list of terms,
#   where each term is itself a list containing the index of a nonzero element.
sparse_tensor = tf.SparseTensor(indices = [[0, 1], [2, 4]],
                                values = [6, 0.5],
                                dense_shape = [3, 5])

To feed data to your model for training, you simply pass the input function you've create to you fit operation as the value of the input_fn parameter.<br>
Also note that the input_fn parameter receive a function object, not the return of a function call

In [6]:
# However, if you'd like to be able to parameterize your input function, there are other methods for doing so.
def my_input_function_training_set():
    return my_input_function(training_set)
classifier.fit(input_fn = my_input_fn_training_set, steps = 2000)

# alternatively, you can use Python's functools.partial function to construct a new function object with all 
# parameter values fixed
classifier.fit(input_fn = functools.partial(my_input_function, data_set = training_set), steps = 2000)

# a third option is to wrap your input_fn invocation in a lambda and pass it to the input_fn parameter:
classifier.fit(input_fn=lambda: my_input_fn(training_set), steps = 2000)

NameError: name 'classifier' is not defined

## A neural network model for Boston House Values

In [14]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from urllib.request import urlopen

import os
import itertools
import pandas as pd
import tensorflow as tf

In [11]:
tf.logging.set_verbosity(tf.logging.INFO) #设定log等级，及等级一下的就不用log了

In [15]:
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio", "medv"]
FEATURES = ["crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio"]
LABEL = "medv"

BOSTON_TRAININGSET = "boston_train.csv"
BOSTON_TESTSET = "boston_test.csv"
BOSTON_PREDICTSET = "boston_predict.csv"
BOSTON_TRAININGSET_URL = "http://download.tensorflow.org/data/boston_train.csv"
BOSTON_TESTSET_URL = "http://download.tensorflow.org/data/boston_test.csv"
BOSTON_PREDICTSET_URL = "http://download.tensorflow.org/data/boston_predict.csv"

In [25]:
if not os.path.exists(BOSTON_TRAININGSET):
    raw = urlopen(BOSTON_TRAININGSET_URL).read()
    with open(BOSTON_TRAININGSET, "w") as f:
        f.write(raw.decode("utf-8"))

if not os.path.exists(BOSTON_TESTSET):
    raw = urlopen(BOSTON_TESTSET_URL).read()
    with open(BOSTON_TESTSET, "w") as f:
        f.write(raw.decode("utf-8"))

if not os.path.exists(BOSTON_PREDICTSET): 
    raw = urlopen(BOSTON_PREDICTSET_URL).read()
    with open(BOSTON_PREDICTSET, "w") as f:
        f.write(raw.decode("utf-8"))

In [27]:
training_set = pd.read_csv(BOSTON_TRAININGSET, skipinitialspace = True,
                          skiprows = 1, names = COLUMNS)
test_set = pd.read_csv(BOSTON_TESTSET, skipinitialspace = True,
                      skiprows = 1, names = COLUMNS)
prediction_set = pd.read_csv(BOSTON_PREDICTSET, skipinitialspace = True,
                            skiprows = 1, names = COLUMNS)
prediction_set

Unnamed: 0,crim,zn,indus,nox,rm,age,dis,tax,ptratio,medv
0,0.03359,75.0,2.95,0.428,7.024,15.8,5.4011,252,18.3,
1,5.09017,0.0,18.1,0.713,6.297,91.8,2.3682,666,20.2,
2,0.1265,25.0,5.13,0.453,6.762,43.4,7.9809,284,19.7,
3,0.05515,33.0,2.18,0.472,7.236,41.1,4.022,222,18.4,
4,8.15174,0.0,18.1,0.7,5.39,98.9,1.7281,666,20.2,
5,0.24522,0.0,9.9,0.544,5.782,71.7,4.0317,304,18.4,


In [28]:
feature_cols = [tf.contrib.layers.real_valued_column(k) for k in FEATURES]

In [29]:
regressor = tf.contrib.learn.DNNRegressor(feature_columns = feature_cols,
                                         hidden_units = [10, 10],
                                         model_dir = "./boston_model/")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1148f3400>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': None}


In [30]:
def input_fn(data_set):
    feature_cols = {k: tf.constant(data_set[k].values) for k in FEATURES}
    labels = tf.constant(data_set[LABEL].values)
    return feature_cols, labels

In [31]:
regressor.fit(input_fn = lambda: input_fn(training_set), steps = 5000)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ./boston_model/model.ckpt.
INFO:tensorflow:loss = 229.514, step = 1
INFO:tensorflow:global_step/sec: 1020.35
INFO:tensorflow:loss = 86.5402, step = 101 (0.099 sec)
INFO:tensorflow:global_step/sec: 1051.77
INFO:tensorflow:loss = 78.5526, step = 201 (0.095 sec)
INFO:tensorflow:global_step/sec: 1066.35
INFO:tensorflow:loss = 72.7036, step = 301 (0.094 sec)
INFO:tensorflow:global_step/sec: 1000.15
INFO:tensorflow:loss = 69.2047, step = 401 (0.099 sec)
INFO:tensorflow:global_step/sec: 961.714
INFO:tensorflow:loss = 66.3272, step = 501 (0.107 sec)
INFO:tensorflow:global_step/sec: 

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x114254ac8>, 'hidden_units': [10, 10], 'feature_columns': (_RealValuedColumn(column_name='crim', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='zn', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='indus', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='nox', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='rm', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='age', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='dis', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='tax', dimension=1, default_value=None, dtype=tf.float32,

In [32]:
ev = regressor.evaluate(input_fn = lambda: input_fn(test_set), steps = 1)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Starting evaluation at 2017-06-12-09:38:08
INFO:tensorflow:Restoring parameters from ./boston_model/model.ckpt-5000
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-06-12-09:38:08
INFO:tensorflow:Saving dict for global step 5000: global_step = 5000, loss = 13.7506


In [36]:
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))

Loss: 13.750644


In [39]:
y = regressor.predict(input_fn = lambda: input_fn(prediction_set))
# .predict() returns an iterator; convert to a list and print predictions
predictions = list(itertools.islice(y, 6))
print("Predictions: {}".format(str(predictions)))

Instructions for updating:
Please switch to predict_scores, or set `outputs` argument.
INFO:tensorflow:Restoring parameters from ./boston_model/model.ckpt-5000
Predictions: [36.897224, 19.84107, 22.817165, 38.114002, 16.066898, 19.218407]


### 要知道如何读取数据（注意了quickstart中的进行区别），推荐用lambda
### 懂得contrib.learn的建模，训练，评估和预测