In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import math
import os
import numpy as np

In [2]:
class NeuralDecisionTree(keras.Model):
    def __init__(self, depth, num_features, used_features_rate, num_classes):
        super(NeuralDecisionTree, self).__init__()
        self.depth = depth
        self.num_leaves = 2 ** depth
        self.num_classes = num_classes

        # Create a mask for the randomly selected features.
        num_used_features = int(num_features * used_features_rate)
        one_hot = np.eye(num_features)
        sampled_feature_indicies = np.random.choice(
            np.arange(num_features), num_used_features, replace=False
        )
        self.used_features_mask = one_hot[sampled_feature_indicies]

        # Initialize the weights of the classes in leaves.
        self.pi = tf.Variable(
            initial_value=tf.random_normal_initializer()(
                shape=[self.num_leaves, self.num_classes]
            ),
            dtype="float32",
            trainable=True,
        )

        # Initialize the stochastic routing layer.
        self.decision_fn = layers.Dense(
            # units=self.num_leaves, activation="sigmoid", name="decision"
            units=self.num_leaves, activation="relu", name="decision"
        )

    def call(self, features):
        batch_size = tf.shape(features)[0]

        features = tf.matmul(
            features, self.used_features_mask, transpose_b=True
        )  
        
        decisions = tf.expand_dims(
            self.decision_fn(features), axis=2
        )  
        
        decisions = layers.concatenate(
            [decisions, 1 - decisions], axis=2
        )  # [batch_size, num_leaves, 2]

        mu = tf.ones([batch_size, 1, 1])

        begin_idx = 1
        end_idx = 2
        # Traverse the tree in breadth-first order.
        for level in range(self.depth):
            mu = tf.reshape(mu, [batch_size, -1, 1])  # [batch_size, 2 ** level, 1]
            mu = tf.tile(mu, (1, 1, 2))  # [batch_size, 2 ** level, 2]
            level_decisions = decisions[
                :, begin_idx:end_idx, :
            ]  # [batch_size, 2 ** level, 2]
            mu = mu * level_decisions  # [batch_size, 2**level, 2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (level + 1)

        mu = tf.reshape(mu, [batch_size, self.num_leaves])  # [batch_size, num_leaves]
        # probabilities = keras.activations.softmax(self.pi)  # [num_leaves, num_classes]
        # probabilities = keras.activations.relu(self.pi)  # [num_leaves, num_classes] - ate agr o menos errado
        outputs = tf.matmul(mu, self.pi)  # [batch_size, num_classes]
        return outputs

In [3]:
NUMBER_NODES = 7

def get_train_dataset():
    train_df = pd.read_csv(os.path.join('..', 'datasets', f'dataset_{NUMBER_NODES}_train.csv'))
    val_df = pd.read_csv(os.path.join('..', 'datasets', f'dataset_{NUMBER_NODES}_val.csv'))

    featuresNumber = (NUMBER_NODES * NUMBER_NODES - NUMBER_NODES) // 2 
    def get_tuple_tensor_dataset(row):
        X = row[0 : featuresNumber].astype('float32')
        Y = row[featuresNumber + 1: ].astype('float32') # Inclui a banda otima na posicao 0
        return X, Y

    train_dataset = list(map(get_tuple_tensor_dataset, train_df.to_numpy()))
    val_dataset = list(map(get_tuple_tensor_dataset, val_df.to_numpy()))

    X = []
    Y = []
    for x, y in train_dataset:
        X.append(x)
        Y.append(y)
    x_train = np.array(X)
    y_train = np.array(Y)

    X = []
    Y = []
    for x, y in val_dataset:
        X.append(x)
        Y.append(y)
    x_val = np.array(X)
    y_val = np.array(Y)

    x_train = np.concatenate((x_train, x_val))
    y_train = np.concatenate((y_train, y_val))

    return x_train, y_train

def get_test_dataset():
    test_df = pd.read_csv(os.path.join('..', 'datasets', f'dataset_{NUMBER_NODES}_test.csv'))

    featuresNumber = (NUMBER_NODES * NUMBER_NODES - NUMBER_NODES) // 2 
    def get_tuple_tensor_dataset(row):
        X = row[0 : featuresNumber].astype('int32')
        Y = row[featuresNumber + 1: ].astype('float32') # Inclui a banda otima na posicao 0
        return X, Y

    test_dataset = list(map(get_tuple_tensor_dataset, test_df.to_numpy()))

    X = []
    Y = []
    for x, y in test_dataset:
        X.append(x)
        Y.append(y)
    x_test = np.array(X)
    y_test = np.array(Y)


    return x_test, y_test

In [4]:
learning_rate = 0.01
batch_size = 32
# num_epochs = 10
num_epochs = 500
# hidden_units = [64, 64]
# hidden_units = [64, 64]

def loss_fn(targets, outputs):
    return tf.sqrt(tf.reduce_mean((targets - outputs)**2))

def run_experiment(model):

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss=loss_fn,
        metrics=['accuracy'],
    )

    x_train, y_train = get_train_dataset()

    model.fit(x=x_train, y=y_train, epochs=num_epochs)

In [5]:
num_trees = 10
depth = 10
used_features_rate = 1.0
num_classes = 7

def create_tree_model():
    inputs = tf.keras.Input(shape=(21,), dtype=tf.float32)
    # features = encode_inputs(inputs)
    features = layers.BatchNormalization()(inputs)
    num_features = features.shape[1]

    tree = NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)

    outputs = tree(features)
    # outputs = tree(features)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model


tree_model = create_tree_model()
run_experiment(tree_model)


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [6]:
x, y = get_test_dataset()
pred = tree_model.predict(x)

def count_repeats(output):
    x = [x for x in output if x > 6]
    greaterSix = len(x)
    true_shape = NUMBER_NODES
    counts = np.unique(np.round(output))
    repeated = true_shape - counts.shape[0]
    return repeated, greaterSix

count = 0
greaterSix = 0
for i in pred:
    c, g = count_repeats(i)
    count += c
    greaterSix += g
print(count)
print(greaterSix)

print(pred[0] )
print(np.round(pred))

173
13
[ 1.1283594  5.6571155  6.1145854 -1.0802135  3.2431638  3.429164
  2.9971232]
[[ 1.  6.  6. -1.  3.  3.  3.]
 [ 2.  2.  4.  3.  6.  2.  3.]
 [ 2.  4.  5.  2.  2.  3.  4.]
 [ 4.  2.  2.  4.  5.  2.  3.]
 [ 2.  5.  3.  3.  3.  4.  3.]
 [ 3.  3.  2.  5.  3.  2.  3.]
 [ 2.  2.  4.  5.  3.  2.  3.]
 [ 2.  5.  6.  1.  2.  5.  3.]
 [ 1.  4.  5.  3.  3.  3.  3.]
 [ 1.  6.  5.  1.  2.  5.  3.]
 [ 1.  5.  0.  6.  3.  3.  5.]
 [ 4.  5.  0.  2.  5.  4.  2.]
 [ 3.  3.  3.  3.  4.  3.  2.]
 [ 3.  2.  5.  4.  4.  2.  3.]
 [ 3.  1.  4.  5.  3.  3.  3.]
 [ 4.  1.  4.  5.  5.  1.  2.]
 [ 2.  1.  6.  5.  1.  4.  3.]
 [ 3.  1.  3.  5.  5.  1.  2.]
 [ 3.  3.  2.  2.  6.  3.  2.]
 [ 3.  5.  5.  1.  1.  5.  2.]
 [ 2.  2.  4.  4.  4.  3.  2.]
 [ 4.  4.  2.  3.  3.  2.  3.]
 [ 3.  4.  3.  4.  1.  3.  3.]
 [ 3.  3.  3.  5.  3.  2.  3.]
 [ 2.  3.  4.  7.  1.  2.  3.]
 [ 2.  4.  3.  4.  3.  3.  3.]
 [ 3.  1.  3.  4.  5.  2.  3.]
 [ 2.  2.  4.  5.  4.  2.  3.]
 [ 3.  3.  2.  4.  5.  4.  2.]
 [ 2.  2.  5.  

In [7]:
class NeuralDecisionForest(keras.Model):
    def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):
        super(NeuralDecisionForest, self).__init__()
        self.ensemble = []
        # Initialize the ensemble by adding NeuralDecisionTree instances.
        # Each tree will have its own randomly selected input features to use.
        for _ in range(num_trees):
            self.ensemble.append(
                NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)
            )

    def call(self, inputs):
        # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.
        batch_size = tf.shape(inputs)[0]
        outputs = tf.zeros([batch_size, num_classes])

        # Aggregate the outputs of trees in the ensemble.
        for tree in self.ensemble:
            outputs += tree(inputs)
        # Divide the outputs by the ensemble size to get the average.
        outputs /= len(self.ensemble)
        return outputs

In [8]:
num_trees = 50
depth = 4
used_features_rate = 0.5

def create_forest_model():
    inputs = tf.keras.Input(shape=(21,), dtype=tf.float32)
    features = layers.BatchNormalization()(inputs)
    num_features = features.shape[1]

    forest_model = NeuralDecisionForest(num_trees, depth, num_features, used_features_rate, num_classes)

    outputs = forest_model(features)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

forest_model = create_forest_model()

run_experiment(forest_model)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [9]:
x, y = get_test_dataset()
pred = forest_model.predict(x)

def count_repeats(output):
    x = [x for x in output if x > 6]
    greaterSix = len(x)
    true_shape = NUMBER_NODES
    counts = np.unique(np.round(output))
    repeated = true_shape - counts.shape[0]
    return repeated, greaterSix

count = 0
greaterSix = 0
for i in pred:
    c, g = count_repeats(i)
    count += c
    greaterSix += g
print(count)
print(greaterSix)

print(pred[0])
print(np.round(pred))

127
42
[0.8765546 4.7932796 4.8948913 2.401752  3.4139464 2.1362793 2.334311 ]
[[ 1.  5.  5.  2.  3.  2.  2.]
 [ 2.  0.  4.  2.  7.  2.  3.]
 [ 3.  4.  4.  1.  1.  5.  3.]
 [ 3.  3.  3.  5.  4.  0.  4.]
 [ 3. -0. -1.  6.  8.  1.  3.]
 [ 4.  3.  1.  6.  3.  1.  3.]
 [ 2.  2.  4.  4.  5.  3.  2.]
 [ 2.  5.  3.  2.  1.  6.  3.]
 [ 2.  5.  1.  5.  2.  2.  4.]
 [ 1.  4.  5. -1.  4.  4.  3.]
 [ 1.  4.  3.  2.  2.  4.  6.]
 [ 6.  3. -1.  0.  5.  6.  2.]
 [ 4.  4.  3.  2.  4.  1.  3.]
 [ 3.  0.  7.  7.  2. -0.  3.]
 [ 4.  3.  1.  6.  3.  2.  2.]
 [ 1.  4.  5.  5.  1.  1.  3.]
 [ 3.  0.  6.  4.  2.  3.  2.]
 [ 3.  2.  4.  4.  4. -0.  3.]
 [ 3.  5.  2.  4.  2.  1.  3.]
 [ 4.  6.  3. -0.  0.  5.  3.]
 [ 2.  4.  3.  4.  7. -1.  3.]
 [ 5.  5.  1.  2.  2.  2.  5.]
 [ 2.  4.  5.  4. -0.  1.  4.]
 [ 5.  2.  3.  1.  6.  2.  2.]
 [ 2.  3.  4.  5.  0.  4.  3.]
 [ 4.  6.  1. -2.  3.  5.  3.]
 [ 1.  3.  4.  5.  4.  0.  4.]
 [ 2.  2.  5.  6.  2.  1.  3.]
 [-2.  5.  6.  7.  1.  1.  3.]
 [ 1.  3.  7.  6.  1. 

Resultados sem batch normalization

DecisionTree - 208 reps e 8 maiores que 6
DecisionForest - 168 e 20

Resultados com batch normalization

DecisionTree - 150 reps e 23
DecisionForest - 117 reps e 36 > 6