In [1]:
#
#  This "preamble" code gets executed automatically when the main Notebook is run:
#

import os
import sys
import time
import json
import pickle
import threading

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sklearn as skl 
from sklearn                 import metrics
from sklearn.preprocessing   import StandardScaler
from sklearn.tree            import DecisionTreeClassifier
from sklearn.tree            import DecisionTreeRegressor
from sklearn.pipeline        import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.svm             import SVC 


#import ipywidgets as widgets
#from IPython.display import display, HTML


print("""
                \     _ \   _ \  |     |      _ \  
               _ \   |   | |   | |     |     |   | 
              ___ \  ___/  |   | |     |     |   | 
            _/    _\_|    \___/ _____|_____|\___/  
                                                   
           -  -  -  --  --  ---  --= --== ==*# ###>
      """)

def tree_to_data(decision_tree, feature_names=None, name_swap=None, y=None):
    def node_to_data(tree, node_id, criterion):
        if not isinstance(criterion, skl.tree.tree.six.string_types):
            criterion = "impurity"

        value = tree.value[node_id]
        if tree.n_outputs == 1:
            value = value[0, :]

        if tree.children_left[node_id] == skl.tree._tree.TREE_LEAF:
            return {
                "id": node_id,
                "criterion": criterion,
                "impurity": tree.impurity[node_id],
                "samples": tree.n_node_samples[node_id],
                "value": list(value),
                "class": decision_tree.classes_[np.argmax(value)]
            }
        else:
            if feature_names is not None:
                feature = feature_names[tree.feature[node_id]]
            else:
                feature = tree.feature[node_id]

            if "=" in feature:
                ruleType = "="
                ruleValue = "false"
            else:
                ruleType = "<="
                ruleValue = "%.4f" % tree.threshold[node_id]

            return {
                "id": node_id,
                "rule": "%s %s %s" % (feature, ruleType, ruleValue),
                criterion: tree.impurity[node_id],
                "samples": tree.n_node_samples[node_id],
            }

    def recurse(tree, node_id, criterion, parent=None, depth=0):
        left_child = tree.children_left[node_id]
        right_child = tree.children_right[node_id]

        node = node_to_data(tree, node_id, criterion)

        if left_child != skl.tree._tree.TREE_LEAF:
            node["left"] = recurse(tree,
                                   left_child,
                                   criterion=criterion,
                                   parent=node_id,
                                   depth=depth + 1)
            node["right"] = recurse(tree,
                                    right_child,
                                    criterion=criterion,
                                    parent=node_id,
                                    depth=depth + 1)

        return node

    if isinstance(decision_tree, skl.tree.tree.Tree):
        return recurse(decision_tree, 0, criterion="impurity")
    else:
        return recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)

def tree_to_simple_str(decision_tree, feature_names=None, name_swap=None, y=None):
    def node_to_data(tree, node_id, criterion):
        if not isinstance(criterion, skl.tree.tree.six.string_types):
            criterion = "impurity"

        value = tree.value[node_id]
        if tree.n_outputs == 1:
            value = value[0, :]

        if tree.children_left[node_id] == skl.tree._tree.TREE_LEAF:
            return {
                "policy": decision_tree.classes_[np.argmax(value)]
            }
        else:
            if feature_names is not None:
                feature = feature_names[tree.feature[node_id]]
            else:
                feature = tree.feature[node_id]

            if "=" in feature:
                ruleType = "="
                ruleValue = "false"
            else:
                ruleType = "<="
                ruleValue = "%.4f" % tree.threshold[node_id]

            return {
                "rule": "%s %s %s" % (feature, ruleType, ruleValue),
            }

    def recurse(tree, node_id, criterion, parent=None, depth=0):
        left_child = tree.children_left[node_id]
        right_child = tree.children_right[node_id]

        node = node_to_data(tree, node_id, criterion)

        if left_child != skl.tree._tree.TREE_LEAF:
            node["left"] = recurse(tree,
                                   left_child,
                                   criterion=criterion,
                                   parent=node_id,
                                   depth=depth + 1)
            node["right"] = recurse(tree,
                                    right_child,
                                    criterion=criterion,
                                    parent=node_id,
                                    depth=depth + 1)

        return node

    if isinstance(decision_tree, skl.tree.tree.Tree):
        return recurse(decision_tree, 0, criterion="impurity")
    else:
        return recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)

    
def hide_traceback(exc_tuple=None, filename=None, tb_offset=None,
                   exception_only=False, running_compiled_code=False):
    import sys
    ipython = get_ipython()
    etype, value, tb = sys.exc_info()
    return ipython._showtraceback(etype, value, ipython.InteractiveTB.get_exception_only(etype, value))

def format_bytes(size):
    # 2**10 = 1024
    power = 2**10
    n = 0
    power_labels = {0 : '', 1: 'k', 2: 'M', 3: 'G', 4: 'T'}
    while size > power:
        size /= power
        n += 1
    return str("%1.2f " % size + power_labels[n] + 'B')

def load_csv_data(data):
    def load_and_report(data, dfkey, csvfile):
        data[dfkey] = pd.read_csv(data['path'] + '/' + csvfile)
        dfbytes = data[dfkey].memory_usage(index=False, deep=True).sum()
        print("       data[%s]   (%s)   %s" % (dfkey, format_bytes(dfbytes), csvfile))
        return data
    ###
    print("Data source:\n\t%s\n" % data['path'])
    print("Loading:")
    data = load_and_report(data, 'apollo.trace', data['apollo.tracefile'])
    data = load_and_report(data, 'apollo.flush', data['apollo.flushfile'])
    data = load_and_report(data, 'apollo.steps', data['apollo.stepsfile'])
    data = load_and_report(data, 'normal.steps', data['normal.stepsfile'])
    print("")
    return data

#
# To suppress traceback output on errors, add this to main:
#
#ipython = get_ipython()
#ipython.showtraceback = hide_traceback
#