In [1]:
from sklearn.datasets import load_iris
from sklearn import tree
from sklearn.tree import _tree
import lightgbm as lgb
from sklearn.metrics import accuracy_score

def tree_to_code(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    print("def tree({}):".format(", ".join(feature_names)))

    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            print("{}if {} <= {}:".format(indent, name, threshold))
            recurse(tree_.children_left[node], depth + 1)
            print("{}else:  # if {} > {}".format(indent, name, threshold))
            recurse(tree_.children_right[node], depth + 1)
        else:
            print("{}return {}".format(indent, tree_.value[node]))

    recurse(0, 1)
    
iris = load_iris()
X = iris.data
y = iris.target

clf = tree.DecisionTreeClassifier(max_leaf_nodes=4)
clf.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=4,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [2]:
# dot -Tpng tree.dot -o tree.png 
# tree.export_graphviz(clf, out_file='tree_draft.dot')  

In [3]:
# !dot -Tpng tree.dot -o tree.png 

In [4]:
clf.score(X, y)

0.9733333333333334

In [5]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [6]:
tree_to_code(clf, iris.feature_names)

# note for boundaries...
# 1/0.8 = 1.25
# 1/1.75 = 0.57
# 1/4.95 = 0.20

def tree(sepal length (cm), sepal width (cm), petal length (cm), petal width (cm)):
  if petal width (cm) <= 0.800000011920929:
    return [[50.  0.  0.]]
  else:  # if petal width (cm) > 0.800000011920929
    if petal width (cm) <= 1.75:
      if petal length (cm) <= 4.950000047683716:
        return [[ 0. 47.  1.]]
      else:  # if petal length (cm) > 4.950000047683716
        return [[0. 2. 4.]]
    else:  # if petal width (cm) > 1.75
      return [[ 0.  1. 45.]]


In [7]:
# assume inter is always 1 for convenience
param = [
    np.array([-1.25, -0.57, -0.20]),
    np.array([1, 1, 1]), 
    np.array([[50, 0., 0.], 
     [0., 47, 1],
     [0, 2, 4],
     [0, 1, 45]])
]

route_array = np.array([
    [1, 0, 0, 0, 0, 0], 
    [0, 1, 1, 1, 0, 0],
    [0, 1, 0, 1, 0, 1],
    [0, 0, 0, 1, 1, 0]
])

sparse_info = np.array([
    [0, 0, 0, 1],
    [0, 0, 0, 1],
    [0, 0, 1, 0]
]).T

In [8]:
# hard code all things
def sigmoid(z):
    return 1. / ( 1 + np.exp(-z) )

# softmax by axis...
def gumbel_softmax(x, tau=1.0, eps=np.finfo(float).eps):
    # element-wise gumbel softmax
    # return np.exp(np.log(X+eps)/temp)/np.sum(np.exp(np.log(X+eps)/temp), axis=1)[:, np.newaxis]
    return 1/(1+np.exp(-(x)/tau))

In [9]:
coef, inter, leaf = param

In [10]:
coef_sparse = coef * sparse_info

In [11]:
clf.score(X, y)

0.9733333333333334

In [12]:
# short version
decisions = np.dot(X, np.hstack([coef_sparse, -coef_sparse]))+np.hstack([inter, -inter])
decision_soft = np.log(gumbel_softmax(decisions, tau=0.01))
route_probas = np.exp(np.dot(decision_soft, route_array.T))
proba = np.dot(route_probas, leaf)

accuracy_score(y, np.argmax(proba, axis=1))

0.9733333333333334

In [13]:
from tensorflow.keras.layers import Conv1D, Input, Lambda, GlobalMaxPooling1D, LocallyConnected1D, Dense, Concatenate
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow.keras as keras
from tensorflow.keras.constraints import non_neg

In [14]:
def keras_gumbel_softmax(x, tau=1.0, eps=np.finfo(float).eps):
    # element-wise gumbel softmax
    # return np.exp(np.log(X+eps)/temp)/np.sum(np.exp(np.log(X+eps)/temp), axis=1)[:, np.newaxis]
    x_clip = K.clip(-(x+eps)/tau, -32, 32)
    
    return 1/(1+K.exp(x_clip))

In [15]:
iris_input = Input(shape=(4,))
iris_axis = Lambda(lambda x: K.expand_dims(x, -1))(iris_input)
iris_conv = Conv1D(3, (4), strides=4, padding='same', name='nodes')(iris_axis)
iris_pooled = GlobalMaxPooling1D()(iris_conv)

iris_left = Lambda(lambda x: keras_gumbel_softmax(x, 0.1))(iris_pooled)
iris_right = Lambda(lambda x: keras_gumbel_softmax(-x, 0.1))(iris_pooled)
iris_decisions = Concatenate(name='decision')([iris_left, iris_right])

iris_log_decisions = Lambda(lambda x: K.log(x))(iris_decisions)
iris_route = Dense(4, use_bias=False, trainable=False, name='route')(iris_log_decisions) # this is normally a dense layer which is not trained - i.e. a fix adj. matrix in graphcnn
iris_exp_route = Lambda(lambda x: K.exp(x))(iris_route)
iris_leaf = Dense(3, activation='softmax', name='leaf', use_bias=False) (iris_exp_route) # this is the output leaves

iris_model = Model(inputs=iris_input, outputs=iris_leaf)
iris_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
iris_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 4)]          0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 4, 1)         0           input_1[0][0]                    
__________________________________________________________________________________________________
nodes (Conv1D)                  (None, 1, 3)         15          lambda[0][0]                     
__________________________________________________________________________________________________
global_max_pooling1d (GlobalMax (None, 3)            0           nodes[0][0]                      
______________________________________________________________________________________________

In [16]:
tf_coef = np.expand_dims(coef_sparse, axis=1)

iris_model.get_layer('nodes').set_weights([tf_coef, inter])
iris_model.get_layer('route').set_weights([route_array.T])
iris_model.get_layer('leaf').set_weights([leaf])

In [17]:
accuracy_score(y, np.argmax(iris_model.predict(X), axis=1))

0.96

In [19]:
y_cat = keras.utils.to_categorical(y)

In [22]:
iris_model.fit(X, y_cat, epochs=10, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x1f0c24bb128>

In [23]:
accuracy_score(y, np.argmax(iris_model.predict(X), axis=1))

0.96