In [1]:
"""
    Gradient boosted decision tree
"""
import os
import tensorflow as tf
import numpy as np
import copy
os.environ['CUDA_VISIBLE_DEVICES'] = ""
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "1"

num_classes = 2  # Total classes, > 23000 and otherwise
num_features = 13  # 13 columns data features

max_steps = 2000
batch_size = 256
learning_rate = 1.0
l1_regular = 0.0
l2_regular = 0.1

# GBDT parameters
num_batches_per_layer = 1000
num_trees = 10
max_depth = 4



In [2]:
# Prepare data set
from tensorflow.keras.datasets import boston_housing
# (x: (404, 13) - (samples, features), y: (404,) - (samples) with price value
(x_train, y_train), (x_test, y_test) = boston_housing.load_data()


def to_binary_class(y):
    """
    2 classes for one have price grater than 23000 and otherwise
    :param y:
    :return:
    """
    for i, label in enumerate(y):
        if label > 23.0:
            y[i] = 1
        else:
            y[i] = 0
    return y

# train: (404, ) - test: (102,)
y_train_binary = to_binary_class(copy.deepcopy(y_train))
y_test_binary = to_binary_class(copy.deepcopy(y_test))

In [3]:
# GBDT Classifier
train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"x": x_train},
    y=y_train_binary,
    batch_size=batch_size,
    num_epochs=None,
    shuffle=True
)

test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"x": x_test},
    y=y_test_binary,
    batch_size=batch_size,
    num_epochs=None,
    shuffle=False
)

test_train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"x": x_train},
    y=y_train_binary,
    batch_size=batch_size,
    num_epochs=None,
    shuffle=False
)

feature_columns = [tf.feature_column.numeric_column(key='x', shape=(num_features,))]
feature_columns

[NumericColumn(key='x', shape=(13,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [4]:
gbdt_classifier = tf.estimator.BoostedTreesClassifier(
    feature_columns=feature_columns,
    n_batches_per_layer=num_batches_per_layer,
    n_classes=num_classes,
    learning_rate=learning_rate,
    n_trees=num_trees,
    max_depth=max_depth,
    l1_regularization=l1_regular,
    l2_regularization=l2_regular,
)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\NGUYEN~1\\AppData\\Local\\Temp\\tmp5fzd8ezc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [5]:
gbdt_classifier.train(input_fn=train_input_fn, max_steps=max_steps)


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
'_Resource' object has no attribute 'name'
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
'_Resource' object has no attribute 'name'
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\NGUYEN~1\AppData\Local\Temp\tmp5fzd8ezc\model.ckpt.
'_Resource' object has no attribute 'name'
INFO:tensorflow:loss = 0.6931472, step = 0
INF

<tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesClassifier at 0x22b995bc898>

In [6]:
gbdt_classifier.evaluate(test_train_input_fn, steps=max_steps)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
The value of AUC returned by this may race with the update so this is deprected. Please use tf.keras.metrics.AUC instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-11-06T11:39:53Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\NGUYEN~1\AppData\Local\Temp\tmp5fzd8ezc\model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [200/2000]
INFO:tensorflow:Evaluation [400/2000]
INFO:tensorflow:Evaluation [600/2000]
INFO:tensorflow:Evaluation [800/2000]
INFO:tensorflow:Evaluation [1000/2000]
INFO:tensorflow:Evaluation [1200/2000]
INFO:tensorflow:Evaluation [1400/2000]
INFO:tensorflow:Evaluation [1600/2000]
INFO:tensorflow:Evaluation [1800/2000]
INFO:tensorflow:Evaluation [2000/2000]
INFO:tensorflow:Inference T

{'accuracy': 0.87624806,
 'accuracy_baseline': 0.638625,
 'auc': 0.9161989,
 'auc_precision_recall': 0.9092197,
 'average_loss': 0.3771418,
 'label/mean': 0.361375,
 'loss': 0.3771418,
 'precision': 0.8809421,
 'prediction/mean': 0.36962026,
 'recall': 0.7603068,
 'global_step': 2000}

In [7]:
# GDBT regressor
gdbt_regressor = tf.estimator.BoostedTreesRegressor(
    feature_columns=feature_columns,
    n_batches_per_layer=num_batches_per_layer,
    learning_rate=learning_rate,
    n_trees=num_trees,
    max_depth=max_depth,
    l1_regularization=l1_regular,
    l2_regularization=l2_regular
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\NGUYEN~1\\AppData\\Local\\Temp\\tmpz56xy_yb', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [8]:
gdbt_regressor.train(train_input_fn, max_steps=max_steps)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
'_Resource' object has no attribute 'name'
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
'_Resource' object has no attribute 'name'
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\NGUYEN~1\AppData\Local\Temp\tmpz56xy_yb\model.ckpt.
'_Resource' object has no attribute 'name'
INFO:tensorflow:loss = 0.4140625, step = 0
INFO:tensorflow:loss = 0.390625, step = 0 (1.323 sec)
INFO:tensorflow:loss = 0.35546875, step = 0 (0.860 sec)
INFO:tensorflow:loss = 0.37109375, step = 0 (0.958 sec)
INFO:tensorflow:loss = 0.40234375, step = 0 (0.925 sec)
INFO:tensorflow:loss = 0.32421875, step = 0 (0.897 sec)
INFO:tensorflow:loss = 0.38671875, step = 0 (0.891 sec)
INFO:tensorflow:loss = 0.37890625, step = 0 (0.943 sec)
INFO:tensorflow:loss = 0.34765625, step = 0 (0.810 sec)
INFO:tensorflow:loss = 0.32421875, ste

<tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesRegressor at 0x22c287add30>

In [None]:
gdbt_regressor.evaluate(test_input_fn)