## Test class for running FlowOCT implementation

In [1]:
import sys
sys.path.append("..")
import preprocessing.Datasets as DS
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from modelling.StrongTree.BendersOCTReplication import *

# Loading the dataset
problem = DS.hearts 
pname = problem.__name__.upper()
print(f'---{pname}---')
df = pd.read_csv(f'../../datasets/binary/{pname}_binary.csv')

# Remove for now, such that we can see original feature names
#df.columns = ['X_' + str(i) for i in range(len(df.columns) - 1)] + ['y']

---HEARTS---


### Testing implementation

In [2]:

# Setting up the parameters
depth = 2
time_limit = 100
_lambda = 0
random_state = 42

# Splitting the data
data_train, data_test = train_test_split(df, test_size=0.20, random_state=random_state)

# Setting up the tree manually
tree = Tree(depth)
master = BendersOCT(data_train, 'y', tree, _lambda, time_limit, "classification")

master.create_master_problem()
master.model.update()
master.model.optimize(mycallback)

b_value = master.model.getAttr("X", master.b)
beta_value = master.model.getAttr("X", master.beta)
p_value = master.model.getAttr("X", master.p)

Set parameter Username
[24/Jun/2024 14:09:23] INFO - Set parameter Username
Academic license - for non-commercial use only - expires 2025-05-13
[24/Jun/2024 14:09:23] INFO - Academic license - for non-commercial use only - expires 2025-05-13
Set parameter LazyConstraints to value 1
[24/Jun/2024 14:09:23] INFO - Set parameter LazyConstraints to value 1
Set parameter Threads to value 8
[24/Jun/2024 14:09:23] INFO - Set parameter Threads to value 8
Set parameter TimeLimit to value 100


[24/Jun/2024 14:09:23] INFO - Set parameter TimeLimit to value 100
Gurobi Optimizer version 11.0.2 build v11.0.2rc0 (win64 - Windows 11.0 (22631.2))
[24/Jun/2024 14:09:23] INFO - Gurobi Optimizer version 11.0.2 build v11.0.2rc0 (win64 - Windows 11.0 (22631.2))

[24/Jun/2024 14:09:23] INFO - 
CPU model: AMD Ryzen 5 3500U with Radeon Vega Mobile Gfx, instruction set [SSE2|AVX|AVX2]
[24/Jun/2024 14:09:23] INFO - CPU model: AMD Ryzen 5 3500U with Radeon Vega Mobile Gfx, instruction set [SSE2|AVX|AVX2]
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
[24/Jun/2024 14:09:23] INFO - Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

[24/Jun/2024 14:09:23] INFO - 
Optimize a model with 14 rows, 647 columns and 422 nonzeros
[24/Jun/2024 14:09:23] INFO - Optimize a model with 14 rows, 647 columns and 422 nonzeros
Model fingerprint: 0x845cfc70
[24/Jun/2024 14:09:23] INFO - Model fingerprint: 0x845cfc70
Variable types: 256 continuous, 391 integer

### Extracting metrics and plot

In [13]:
""" Defining custom tree structure class 

    - store the tree structure
    - plotting tree
    - methods to evaluate the tree structure
 """
from modelling.StrongTree.utils import get_node_status
import matplotlib.pyplot as plt
from graphviz import Digraph

class TreeStructure:

    class Node:
        def __init__(self, node_id, feature=None, value=None, is_leaf=False, depth=0):
            self.node_id = node_id
            self.depth = depth
            self.feature = feature
            self.value = value
            self.is_leaf = is_leaf
            self.children = []

        def __repr__(self):
            if self.is_leaf:
                return f"Leaf(Node {self.node_id}, Value: {self.value}, Children: {len(self.children)}, Depth: {self.depth})"
            else:
                return f"Branch(Node {self.node_id}, Feature: {self.feature}=0, Children: {len(self.children)}), Depth: {self.depth})"

    def __init__(self):
        self.nodes = {}
        self.root = None

    def add_node(self, node_id, feature=None, value=None, is_leaf=False, parent_id=None, depth=0):
        new_node = self.Node(node_id, feature, value, is_leaf, depth)
        self.nodes[node_id] = new_node
        if parent_id  != 0:
            self.nodes[parent_id].children.append(new_node)
        else:
            self.root = new_node

    def print_tree(self, node=None, level=0):
        if node is None:
            node = self.root
        indent = "   " * level
        print(indent + repr(node))
        for child in node.children:
            self.print_tree(child, level + 1)

    def __str__(self):
        return f"Decision Tree with {len(self.nodes)} nodes"
    
    def num_nodes(self):
        return len(self.nodes)

    def num_leaves(self):
        return len([leaf for leaf in self.nodes if self.nodes[leaf].is_leaf])

    def max_depth(self):
        return max([self.nodes[leaf].depth for leaf in self.nodes if self.nodes[leaf].is_leaf])

    def avg_depth(self):
        return sum([self.nodes[leaf].depth for leaf in self.nodes if self.nodes[leaf].is_leaf])/self.num_leaves()

    def features_used(self):
        return len(set([self.nodes[branch].feature for branch in self.nodes if not self.nodes[branch].is_leaf]))

    # Plot the tree structure using graphviz
    def plot_tree(self):
        def add_edges(dot, node):
            for child in node.children:
                dot.edge(str(node.node_id), str(child.node_id), label="True" if child == node.children[0] else "False")
                add_edges(dot, child)

        def add_nodes(dot, node):
            if node.is_leaf:
                dot.node(str(node.node_id), label=f'Predict: {node.value}', shape='ellipse')
            else:
                dot.node(str(node.node_id), label=f'{node.feature}=0', shape='oval')
            for child in node.children:
                add_nodes(dot, child)

        dot = Digraph()
        add_nodes(dot, self.root)
        add_edges(dot, self.root)
        return dot

# Create the tree structure based on the Gurobi model
def make_tree(grb_model, b, beta, p):
    tree = TreeStructure()
    model_tree = grb_model.tree
    for n in model_tree.Nodes + model_tree.Leaves:
        pruned, branching, selected_feature, leaf, value = get_node_status(grb_model, b, beta, p, n)
        parent_id = int(model_tree.get_parent(n))
        depth = len(model_tree.get_ancestors(n))
        if pruned:
            continue
        elif branching:
            tree.add_node(node_id=n, feature=selected_feature, is_leaf=False, parent_id=parent_id, depth=depth)
        elif leaf:
            tree.add_node(node_id=n, value=value, is_leaf=True, parent_id=parent_id, depth=depth)
    return tree

In [14]:
# Getting all the metrics using the custom tree structure
tree = make_tree(master, b_value, beta_value, p_value)
tree.print_tree()

print(tree.max_depth())
print(tree.avg_depth())
print(tree.num_nodes())
print(tree.num_leaves())
print(tree.features_used())

Branch(Node 1, Feature: X_26_neg=0, Children: 2), Depth: 0)
   Branch(Node 2, Feature: X_0_42.0-=0, Children: 2), Depth: 1)
      Leaf(Node 4, Value: 0, Children: 0, Depth: 2)
      Leaf(Node 5, Value: 0, Children: 0, Depth: 2)
   Branch(Node 3, Feature: X_25=0, Children: 2), Depth: 1)
      Leaf(Node 6, Value: 0, Children: 0, Depth: 2)
      Leaf(Node 7, Value: 1, Children: 0, Depth: 2)
2
2.0
7
4
3


In [9]:
# Getting accuracy using build in method
test_acc = get_acc(master, data_test, b_value, beta_value, p_value)
print('Test accuracy: ', test_acc)

Test accuracy:  0.9836065573770492


In [17]:
# Plot tree and validate calculated metrics
plot = tree.plot_tree()
plot.render('tree_plot', view=True)

'tree_plot.pdf'