In [None]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
%tensorflow_version 1.x
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

heart_dataframe = pd.read_csv("8_heart-c.csv", sep=";")

heart_dataframe = heart_dataframe.reindex(
    np.random.permutation(heart_dataframe.index))

def preprocess_features(heart_dataframe):
  selected_features = heart_dataframe[
    ["att1",
     "att2",
     "att3",
     "att4",
     "att5",
     "att6",
     "att7",
     "att8",
     "att9",
     "att10",
     "att11",
     "att12",
     "att13",
     "att14",
     "att15",
     "att16",
     "att17",
     "att18",
     "att19",
     "att20",
     "att21",
     "att22",
     "att23",
     "att24",
     "att25"]]
  processed_features = selected_features.copy()
  return processed_features

def preprocess_targets(heart_dataframe):
  output_targets = pd.DataFrame()
  output_targets["label"] = (
    heart_dataframe["label"])
  return output_targets

training_examples = preprocess_features(heart_dataframe.head(243))
training_targets = preprocess_targets(heart_dataframe.head(243))

validation_examples = preprocess_features(heart_dataframe.tail(61))
validation_targets = preprocess_targets(heart_dataframe.tail(61))


In [None]:
def construct_feature_columns(input_features):
  return set([tf.feature_column.numeric_column(my_feature)
              for my_feature in input_features])

In [None]:
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    # Convert pandas data into a dict of np arrays.
    features = {key:np.array(value) for key,value in dict(features).items()}                                             
 
    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified.
    if shuffle:
      ds = ds.shuffle(10000)
    
    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [None]:
def train_nn_regression_model(
    learning_rate,
    steps,
    batch_size,
    hidden_units,
    training_examples,
    training_targets,
    validation_examples,
    validation_targets):

  periods = 10
  steps_per_period = steps / periods
  
  # Create a DNNRegressor object.
  my_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
  my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
  dnn_regressor = tf.estimator.DNNRegressor(
      feature_columns=construct_feature_columns(training_examples),
      hidden_units=hidden_units,
      optimizer=my_optimizer,
  )
  
  # Create input functions.
  training_input_fn = lambda: my_input_fn(training_examples, 
                                          training_targets["label"], 
                                          batch_size=batch_size)
  predict_training_input_fn = lambda: my_input_fn(training_examples, 
                                                  training_targets["label"], 
                                                  num_epochs=1, 
                                                  shuffle=False)
  predict_validation_input_fn = lambda: my_input_fn(validation_examples, 
                                                    validation_targets["label"], 
                                                    num_epochs=1, 
                                                    shuffle=False)

  # Train the model, but do so inside a loop so that we can periodically assess
  # loss metrics.
  print("Training model...")
  print("RMSE (on training data):")
  training_rmse = []
  validation_rmse = []
  for period in range (0, periods):
    # Train the model, starting from the prior state.
    dnn_regressor.train(
        input_fn=training_input_fn,
        steps=steps_per_period
    )
    # Take a break and compute predictions.
    training_predictions = dnn_regressor.predict(input_fn=predict_training_input_fn)
    training_predictions = np.array([item['predictions'][0] for item in training_predictions])
    
    validation_predictions = dnn_regressor.predict(input_fn=predict_validation_input_fn)
    validation_predictions = np.array([item['predictions'][0] for item in validation_predictions])
    
    # Compute training and validation loss.
    training_root_mean_squared_error = math.sqrt(
        metrics.mean_squared_error(training_predictions, training_targets))
    validation_root_mean_squared_error = math.sqrt(
        metrics.mean_squared_error(validation_predictions, validation_targets))
    # Occasionally print the current loss.
    print("  period %02d : %0.2f" % (period, training_root_mean_squared_error))
    # Add the loss metrics from this period to our list.
    training_rmse.append(training_root_mean_squared_error)
    validation_rmse.append(validation_root_mean_squared_error)
  print("Model training finished.")

  print("Final RMSE (on training data):   %0.2f" % training_root_mean_squared_error)
  print("Final RMSE (on validation data): %0.2f" % validation_root_mean_squared_error)

  return dnn_regressor

In [None]:
dnn_regressor = train_nn_regression_model(
    learning_rate=0.1,
    steps=500,
    batch_size=20,
    hidden_units=[2, 4],
    training_examples=training_examples,
    training_targets=training_targets,
    validation_examples=validation_examples,
    validation_targets=validation_targets)

Training model...
RMSE (on training data):
  period 00 : 0.53
  period 01 : 0.45
  period 02 : 0.44
  period 03 : 0.70
  period 04 : 0.42
  period 05 : 0.35
  period 06 : 0.75
  period 07 : 0.41
  period 08 : 0.77
  period 09 : 0.55
Model training finished.
Final RMSE (on training data):   0.55
Final RMSE (on validation data): 0.55


In [None]:
heart_data = pd.read_csv("8_heart-c.csv", sep=";")

test_examples = preprocess_features(heart_data)
test_targets = preprocess_targets(heart_data)

predict_testing_input_fn = lambda: my_input_fn(test_examples, 
                                               test_targets["label"], 
                                               num_epochs=1, 
                                               shuffle=False)

test_predictions = dnn_regressor.predict(input_fn=predict_testing_input_fn)
test_predictions = np.array([item['predictions'][0] for item in test_predictions])

root_mean_squared_error = math.sqrt(metrics.mean_squared_error(test_predictions, test_targets))

print("Final RMSE (on test data): ", root_mean_squared_error)

Final RMSE (on test data):  0.13177688001559304
