In [33]:
import itertools

import os
import urllib.request
import pandas as pd
import tensorflow as tf

In [34]:
tf.logging.set_verbosity(tf.logging.INFO)

In [35]:
# Dataset files and urls
BOSTON_TRAINING="./BOSTON_data/boston_train.csv"
BOSTON_TRAINING_URL="http://download.tensorflow.org/data/boston_train.csv"

BOSTON_TEST="./BOSTON_data/boston_test.csv"
BOSTON_TEST_URL="http://download.tensorflow.org/data/boston_test.csv"

BOSTON_PREDICT="./BOSTON_data/boston_predict.csv"
BOSTON_PREDICT_URL="http://download.tensorflow.org/data/boston_predict.csv"

In [36]:
# Define columns
COLUMNS = [
    "crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio", "medv"
]
# Define features
FEATURES = ["crim", "zn", "indus", "nox", "rm", "age", "dis", "tax", "ptratio"]
# Define Label
LABEL = "medv"

In [37]:
# Download dataset
datasets = [{
    "file": BOSTON_TRAINING,
    "url": BOSTON_TRAINING_URL
}, {
    "file": BOSTON_TEST,
    "url": BOSTON_TEST_URL
}, {
    "file": BOSTON_PREDICT,
    "url": BOSTON_PREDICT_URL
}]

for dataset in datasets:
    # print(os.path.dirname(dataset["file"]))
    if not os.path.exists(os.path.dirname(dataset["file"])):
        os.mkdir(os.path.dirname(dataset["file"]))

    if not os.path.exists(dataset["file"]):
        raw = urllib.request.urlopen(dataset["url"]).read()
        with open(dataset["file"], "wb") as f:
            f.write(raw)

In [38]:
training_set = pd.read_csv(
    BOSTON_TRAINING,
    skipinitialspace=True,
    skiprows=1,
    names=COLUMNS)

test_set = pd.read_csv(
    BOSTON_TEST,
    skipinitialspace=True,
    skiprows=1,
    names=COLUMNS)

prediction_set = pd.read_csv(
    BOSTON_PREDICT,
    skipinitialspace=True,
    skiprows=1,
    names=COLUMNS)

In [39]:
# Define FeatureColumns
feature_cols=[tf.feature_column.numeric_column(k) for k in FEATURES]

In [40]:
# Define Regressor
regressor = tf.estimator.DNNRegressor(
    feature_columns=feature_cols,
    hidden_units=[10, 10],
    model_dir="./models/boston_model/")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_log_step_count_steps': 100, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_save_summary_steps': 100, '_tf_random_seed': 1, '_model_dir': './models/boston_model/'}


In [41]:
# building the input_fn
def get_input_fn(data_set, num_epochs=None, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
        x=pd.DataFrame({k: data_set[k].values
                        for k in FEATURES}),
        y=pd.Series(data_set[LABEL].values),
        num_epochs=num_epochs,
        shuffle=shuffle)

In [42]:
# Training the Regressor
regressor.train(input_fn=get_input_fn(training_set),steps=5000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./models/boston_model/model.ckpt-5000
INFO:tensorflow:Saving checkpoints for 5001 into ./models/boston_model/model.ckpt.
INFO:tensorflow:step = 5001, loss = 6370.14
INFO:tensorflow:global_step/sec: 344.645
INFO:tensorflow:step = 5101, loss = 4409.19 (0.291 sec)
INFO:tensorflow:global_step/sec: 331.99
INFO:tensorflow:step = 5201, loss = 2791.37 (0.301 sec)
INFO:tensorflow:global_step/sec: 334.333
INFO:tensorflow:step = 5301, loss = 3718.72 (0.301 sec)
INFO:tensorflow:global_step/sec: 327.502
INFO:tensorflow:step = 5401, loss = 2339.64 (0.304 sec)
INFO:tensorflow:global_step/sec: 334.533
INFO:tensorflow:step = 5501, loss = 3954.76 (0.299 sec)
INFO:tensorflow:global_step/sec: 335.454
INFO:tensorflow:step = 5601, loss = 4198.36 (0.300 sec)
INFO:tensorflow:global_step/sec: 325.116
INFO:tensorflow:step = 5701, loss = 2285.23 (0.306 sec)
INFO:tensorflow:global_step/sec: 340.01
INFO:tensorflow:step = 5801, lo

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7f1ba80eb4a8>

In [43]:
# Evaluating the Model
ev = regressor.evaluate(input_fn=get_input_fn(test_set,num_epochs=1,shuffle=False))

INFO:tensorflow:Starting evaluation at 2017-09-10-15:32:40
INFO:tensorflow:Restoring parameters from ./models/boston_model/model.ckpt-10000
INFO:tensorflow:Finished evaluation at 2017-09-10-15:32:40
INFO:tensorflow:Saving dict for global step 10000: average_loss = 12.2415, global_step = 10000, loss = 1224.15


In [44]:
# print evaluation
loss_score=ev["loss"]
print("Loss: {0:f}".format(loss_score))

Loss: 1224.151855


In [45]:
# Making Predictions
y = regressor.predict(input_fn=get_input_fn(prediction_set,num_epochs=1,shuffle=False))
# .predict() returns an iterator of dicts; convert to a list and print 
# predictions

predictions=list(p["predictions"][0] for p in itertools.islice(y,6))
print("Predictions: {}".format(str(predictions)))

INFO:tensorflow:Restoring parameters from ./models/boston_model/model.ckpt-10000
Predictions: [33.977001, 18.957952, 23.900221, 34.674038, 15.781827, 19.159594]
