## Test class for running OCT implementation

In [1]:
import sys
sys.path.append("..")
import preprocessing.Datasets as DS
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
os.environ['JULIA_NUM_THREADS'] = '4' 
from interpretableai import iai



In [2]:
# Load dataset
problem = DS.banknote 
pname = problem.__name__.upper()
print(f'---{pname}---')
df = problem('../datasets/original/')
X_train, X_test, y_train, y_test = train_test_split(df.drop('y', axis=1), df['y'], test_size=0.2, random_state=42)

---BANKNOTE---


### Testing implementation

In [3]:
# Define GridSearch for depth and fit model
grid = iai.GridSearch(
        iai.OptimalTreeClassifier(
            random_seed=42
        ),
        max_depth=range(1, 5)
        )
grid.fit(X_train, y_train)
learner = grid.get_learner()

In [4]:
# Print using best parameters
grid.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0], dtype=int64)

### Extracting metrics

In [5]:
# Generate plot of tree
import graphviz
learner.write_png("tree.png")

In [26]:
# Number of nodes
nodes= learner.get_num_nodes()
print(nodes)

# Number of leaf nodes
leafes = sum([learner.is_leaf(i) for i in range(1,1+learner.get_num_nodes())])
print(leafes)

# Number of rules
rules = nodes - leafes
print(rules)

# Max/Avg depth
depths = [learner.get_depth(i) for i in range(1,1+learner.get_num_nodes()) if learner.is_leaf(i)]
max_depth = max(depths)
avg_depth = np.mean(depths)
print(max_depth, avg_depth)

# Imbalanced tree
is_imbalanced_tree = (avg_depth!=max_depth)
print(is_imbalanced_tree)

# Number of features used
features = len(set([learner.get_split_feature(i) for i in range(1,1+learner.get_num_nodes()) if not learner.is_leaf(i)]))
print(features)

15
8
7
4 3.25
True
3
