In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

!pip install -q tensorflow==2.0.0-alpha0 treegrad
import tensorflow as tf
import numpy as np

In [None]:
tf.__version__

'2.0.0-alpha0'

In this example, we will perform scoring on an Iris dataset using TreeGrad related formulation to demonstrate the application of a 3-layer neural network.

In [None]:
from sklearn.datasets import load_iris
from sklearn import tree
from sklearn.tree import _tree
import lightgbm as lgb
from sklearn.metrics import accuracy_score

def tree_to_code(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    print("def tree({}):".format(", ".join(feature_names)))

    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            print("{}if {} <= {}:".format(indent, name, threshold))
            recurse(tree_.children_left[node], depth + 1)
            print("{}else:  # if {} > {}".format(indent, name, threshold))
            recurse(tree_.children_right[node], depth + 1)
        else:
            print("{}return {}".format(indent, tree_.value[node]))

    recurse(0, 1)
    
iris = load_iris()
X = iris.data
y = iris.target

clf = tree.DecisionTreeClassifier(max_leaf_nodes=4)
clf.fit(X, y)

tree_to_code(clf, iris.feature_names)

def tree(sepal length (cm), sepal width (cm), petal length (cm), petal width (cm)):
  if petal length (cm) <= 2.449999988079071:
    return [[50.  0.  0.]]
  else:  # if petal length (cm) > 2.449999988079071
    if petal width (cm) <= 1.75:
      if petal length (cm) <= 4.950000047683716:
        return [[ 0. 47.  1.]]
      else:  # if petal length (cm) > 4.950000047683716
        return [[0. 2. 4.]]
    else:  # if petal width (cm) > 1.75
      return [[ 0.  1. 45.]]


In [None]:
# assume inter is always 1 for convenience
param = [
    np.array([-1.25, -0.57, -0.20]),
    np.array([1, 1, 1]), 
    np.array([[50, 0., 0.], 
     [0., 47, 1],
     [0, 2, 4],
     [0, 1, 45]])
]

route_array = np.array([
    [1, 0, 0, 0, 0, 0], 
    [0, 1, 1, 1, 0, 0],
    [0, 1, 0, 1, 0, 1],
    [0, 0, 0, 1, 1, 0]
])

sparse_info = np.array([
    [0, 0, 0, 1],
    [0, 0, 0, 1],
    [0, 0, 1, 0]
]).T

coef, inter, leaf = param
coef_sparse = sparse_info*coef

In [None]:
# to build tree model, it is just a three layer neural network - lets see how we go...

class NodeLayer(tf.keras.layers.Layer):
  def __init__(self, num_nodes):
    super(NodeLayer, self).__init__()
    self.num_nodes = num_nodes
    
  def build(self, input_shape):
    # we may want a sparse one later...ignore it for now
    self.kernel = self.add_variable("kernel", 
                                    shape=[int(input_shape[-1]), 
                                           self.num_nodes])
    self.bias = self.add_variable("bias", shape=[self.num_nodes,])
    
  def call(self, input):
    return tf.matmul(input, tf.concat([self.kernel, -self.kernel], 1)) + tf.concat([self.bias, -self.bias], 0)

  
def gumbel_softmax(x, tau=0.01):
  x_temp = tf.clip_by_value(x/tau, -32, 32)
  return 1/(1+tf.keras.backend.exp(-(x)))

def activation1(x):
  return tf.keras.backend.log(gumbel_softmax(x)+tf.keras.backend.epsilon())

def activation2(x):
  return tf.keras.backend.exp(x)

# route layer is Dense(num_nodes+1)
# leaf layer is Dense(1)
num_nodes = 3
decision_tree = tf.keras.Sequential([
    NodeLayer(num_nodes),
    tf.keras.layers.Lambda(activation1),
    tf.keras.layers.Dense(num_nodes+1, trainable=False, use_bias=False),
    tf.keras.layers.Lambda(activation2),
    tf.keras.layers.Dense(3, use_bias=False)
])

In [None]:
iris_pred = decision_tree(X)

In [None]:
decision_tree.layers

[<__main__.NodeLayer at 0x7fab59955518>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7fab59955780>,
 <tensorflow.python.keras.layers.core.Dense at 0x7fab599558d0>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7fab59955c18>,
 <tensorflow.python.keras.layers.core.Dense at 0x7fab59955d68>]

In [None]:
# set parameters for iris dataset
decision_tree.layers[0].set_weights([coef_sparse, inter])
decision_tree.layers[2].set_weights([route_array.T])
decision_tree.layers[4].set_weights([leaf])

In [None]:
# prediction from neural network
np.argmax(np.array(decision_tree(X)), axis=1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

If we want to build a sparse tree, we just need to alter the construction of the `NodeLayer`

In [None]:
# to build tree model, it is just a three layer neural network - lets see how we go...

class NodeLayer(tf.keras.layers.Layer):
  def __init__(self, num_nodes):
    super(NodeLayer, self).__init__()
    self.num_nodes = num_nodes
    
  def build(self, input_shape):
    # we may want a sparse one later...ignore it for now
    self.sparse = self.add_variable("sparse", 
                                    shape=[int(input_shape[-1]), 
                                           self.num_nodes],
                                    trainable=False)
    self.kernel = self.add_variable("kernel", shape=[self.num_nodes,])
    self.bias = self.add_variable("bias", shape=[self.num_nodes,])
    
  def call(self, input):
    coef = self.kernel * self.sparse
    return tf.matmul(input, tf.concat([coef, -coef], 1)) + tf.concat([self.bias, -self.bias], 0)

# route layer is Dense(num_nodes+1)
# leaf layer is Dense(1)
num_nodes = 3
decision_tree = tf.keras.Sequential([
    NodeLayer(num_nodes),
    tf.keras.layers.Lambda(activation1),
    tf.keras.layers.Dense(num_nodes+1, trainable=False, use_bias=False),
    tf.keras.layers.Lambda(activation2),
    tf.keras.layers.Dense(3, use_bias=False)
])

In [None]:
iris_pred = decision_tree(X)

In [None]:
decision_tree.layers[0].set_weights([coef, inter, sparse_info])
decision_tree.layers[2].set_weights([route_array.T])
decision_tree.layers[4].set_weights([leaf])

In [None]:
# set parameters for iris dataset
np.argmax(np.array(decision_tree(X)), axis=1)


array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])