## Regression Problem (Housing dataset)

The task is to approximate the medain house value of each block from the values of the rest of the variables.

The Features:
    - longitude
    - latitude
    - housingmMeadianAge: continuous
    - totalRooms: continuous
    - totalBedrooms: continuous
    - population: continuous
    - households: conitnuous
    - meadianIncome: continuous
    - medianHouseValue: continuous

In [1]:
# Importing the dataset

import pandas as pd
dataset = pd.read_csv('housing.csv')

In [2]:
# Checking the head values of dataset 

dataset.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [3]:
# Getting the feature matrix and target vector

features = dataset.drop('median_house_value', axis=1)
targets = dataset['median_house_value']

In [4]:
# Spliting the dataset

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(features, targets, test_size=0.2, random_state=101)

In [5]:
# Scaling the features and targets

import numpy as np
from sklearn.preprocessing import MinMaxScaler
scalerX = MinMaxScaler()
scalerY = MinMaxScaler()

X_train_sc = pd.DataFrame(data=scalerX.fit_transform(X_train.iloc[:, :-1]), columns=X_train.columns[:-1], index=X_train.index)
X_test_sc = pd.DataFrame(data=scalerX.fit_transform(X_test.iloc[:, :-1]), columns=X_test.columns[:-1], index=X_test.index)

X_train = pd.concat((X_train_sc, X_train[X_train.columns[-1]]), axis=1)
X_test = pd.concat((X_test_sc, X_test[X_test.columns[-1]]), axis=1)

In [6]:
# Encoding the Categorical Data (if needed)

# X_train = pd.concat((X_train, pd.get_dummies(X_train['ocean_proximity'], prefix='ocean_proximity', dummy_na=True)), axis=1).drop('ocean_proximity', axis=1)
# X_train.head()

In [7]:
# Creating the feature columns

import tensorflow as tf
feature_cols = []

for cols in X_train.columns[:-1]:
        feature_cols.append(tf.feature_column.numeric_column(cols))
        
feature_cols.append(tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket(X_train.columns[-1], 
                                                                                                             hash_bucket_size=8),
                                                      dimension=len(np.unique(X_train.iloc[:,-1].values))))

In [8]:
# Creating the input function

input_fn = tf.estimator.inputs.pandas_input_fn(x=X_train, y=Y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [17]:
# Create the estimator model (DNNRegressor)

model = tf.estimator.DNNRegressor(hidden_units=[9,9,9], feature_columns=feature_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\SILVER~1\\AppData\\Local\\Temp\\tmp9llpxfan', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000028768DF0208>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [18]:
# Training the model

model.train(input_fn=input_fn, steps=4000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\SILVER~1\AppData\Local\Temp\tmp9llpxfan\model.ckpt.
INFO:tensorflow:loss = 464870770000.0, step = 0
INFO:tensorflow:global_step/sec: 201.753
INFO:tensorflow:loss = 412676650000.0, step = 100 (0.496 sec)
INFO:tensorflow:global_step/sec: 267.487
INFO:tensorflow:loss = 427797050000.0, step = 200 (0.374 sec)
INFO:tensorflow:global_step/sec: 270.619
INFO:tensorflow:loss = 992228540000.0, step = 300 (0.370 sec)
INFO:tensorflow:global_step/sec: 267.38
INFO:tensorflow:loss = 866871400000.0, step = 400 (0.374 sec)
INFO:tensorflow:global_step/sec: 268.956
INFO:tensorflow:loss = 567555260000.0, step = 500 (0.372 sec)
INFO:tensorflow:global_step/sec: 271.483
INFO:tensorflow:loss = 1122715000000.0, step = 600 (0

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x2876e2aeeb8>

In [23]:
# Creating the evaluation function

eval_input_fn = tf.estimator.inputs.pandas_input_fn(x=X_test, y=Y_test, batch_size=10, num_epochs=1, shuffle=False)

In [24]:
# Getting the evaluation results

results = model.evaluate(eval_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-04-29T18:42:54Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\SILVER~1\AppData\Local\Temp\tmp9llpxfan\model.ckpt-4000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-04-29-18:42:55
INFO:tensorflow:Saving dict for global step 4000: average_loss = 55394628000.0, global_step = 4000, label/mean = 207858.81, loss = 553678000000.0, prediction/mean = 3222.702
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 4000: C:\Users\SILVER~1\AppData\Local\Temp\tmp9llpxfan\model.ckpt-4000


In [25]:
# Displaying the results of the evaluation

results

{'average_loss': 55394628000.0,
 'label/mean': 207858.81,
 'loss': 553678000000.0,
 'prediction/mean': 3222.702,
 'global_step': 4000}

In [20]:
# Making the prediction function

predict_input_fn = tf.estimator.inputs.pandas_input_fn(x=X_test, batch_size=10, num_epochs=1, shuffle=False)

In [21]:
# Getting the predictions

predictions_gen = model.predict(predict_input_fn)

In [22]:
# Displaying the Predictions

predictions = list(predictions_gen)
predictions

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\SILVER~1\AppData\Local\Temp\tmp9llpxfan\model.ckpt-4000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[{'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=float32)},
 {'predictions': array([3222.7083], dtype=floa