# Boston Housing Data
We are going to solve this data by predicting the Median value of owner-occupied homes in 1000s. I'm going to use a Neural Network'd regressor.

## Data
samples: 506
features: real, positive
Total size = [506, 14]

We need to pull out the label
x: [506, 13]
y: [506, 1]

data = { [x, y] }

Then we need to split the data
80%/20%

Training set: 80%
training_set = { x_train, y_train }

Test set: 20%
test_set = { x_test, y_test }

## Implementation
1. Get the data
2. Preprocess the data
3. Setup model
4. Setup input pipelines
5. Train the model
6. Test the model
7. Make a prediction

In [1]:
# Dependancies
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import os
import collections
import time
import numpy as np
import requests
import csv

## New stuff
from sklearn import datasets
from sklearn import model_selection

In [2]:
# Configurations
# instanitate dataset object
Dataset = collections.namedtuple('Dataset', ['data', 'target'])
    
# TF Logging
tf.logging.set_verbosity(tf.logging.INFO)

# Directories
model_dir_root = '/tmp/housing'
# model_dir = os.path.join(model_dir_root, "linear_" + str(int(time.time())))
model_dir = os.path.join(model_dir_root, "deep_" + str(int(time.time())))
data_file = 'boston_housing.csv'

# hyper parameters
batch_size = 50
num_steps = 10000
lr=0.1
rs=0.001

In [3]:
def get_data ():
#     raw_data = '[0,0,0,0,0,0,0,0,0,0,0,0,0,0]'
    raw_data = datasets.load_boston()
    
    return raw_data 

In [4]:
def explore_data (data):
    print('Description: {}\n'.format(data.DESCR))
    print('Features: {}\n'.format(data.feature_names))
    print('Example 0: {}\n'.format(data.data[0]))
    print('Target 0: {}\n'.format(data.target[0]))

In [5]:
def split_dataset (data, test_size):
    # set x and y
    x, y = data.data, data.target
    
    # split data
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        x, y, test_size=test_size, random_state=42)
    
    # Create the training set
    train_set = create_dataset(x_train, y_train)
    
    # Create the test set
    test_set = create_dataset(x_test, y_test)
    
    return test_set, train_set

In [6]:
def create_dataset (x, y):
    # convert to np
    target = np.array(y, dtype=np.int)
    data = np.array(x)
    
    # merge into Dataset object
    new_dataset = Dataset(data=data, target=target)
    
    return new_dataset

In [7]:
def construct_dataset ():
    # get the data
    raw_data = get_data()
    
    # explore the data
#     explore_data(raw_data)
    
    # split the dataset 
    # 80%/20%
    test_set, training_set = split_dataset(raw_data, .2)

    return test_set, training_set

In [8]:
def log_data(dataset, example, all=False):
    if all == True:
        # log the training dataset
        print(dataset)

    # log 1 example and 1 answer
    print("X: {}".format(dataset[0][example]))
    print("Y: {}".format(dataset[1][example]))

In [9]:
def generate_input_fn(dataset, batch_size=batch_size):
    def _input_fn():
        X = tf.constant(dataset[0])
        Y = tf.constant(dataset[1], dtype=tf.float32)
        
        X_batch, Y_batch = tf.train.shuffle_batch(
            [X,Y],
            batch_size=batch_size,
            capacity=8*batch_size,
            min_after_dequeue=4*batch_size,
            enqueue_many=True
        )
        
        return {'features': X_batch}, Y_batch
    return _input_fn

In [None]:
def define_and_run_dnn_regressor(num_steps, logdir, lr=.1, batch_size=batch_size):
    feature_columns = [tf.contrib.layers.real_valued_column('features', dimension=13)]
    
    estimator = tf.estimator.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=[10,10,10],
        model_dir=logdir,
        optimizer=tf.train.ProximalAdagradOptimizer(
            learning_rate=lr,
            l1_regularization_strength=rs
        ),
        activation_fn=tf.nn.relu)
    
    # Train the estimator
    print("Starting deep training")
    estimator.train(
        input_fn=generate_input_fn(
            training_set,
            batch_size=batch_size
        ),
        steps=num_steps)
    print("Finished deep training")
    print("evaluating DNN regressor accuracy")
    
    # Test classifer
    accuracy_score = estimator.evaluate(
        input_fn=generate_input_fn(
            test_set,
            batch_size=batch_size
        ),
        steps=10
    )['average_loss']
    
    print("DNN regressor average loss: {0}".format(accuracy_score))
    
    # Make a prediction
    predictions = estimator.predict(
        input_fn=generate_input_fn(
            test_set,
            batch_size=batch_size
        )
    )
    
    # make a prediction
    print("DNN regressor prediction: ")
    for i in range(10):
        prediction = predictions.__next__()['predictions'][0]
        print("\nPrediction for example {0}: {1}".format(i, prediction))
        log_data(training_set, i)  

In [None]:
# Main Program
test_set, training_set = construct_dataset()

define_and_run_dnn_regressor(
    num_steps,
    model_dir
)

In [None]:
# tensorboard --logdir=/tmp/housing