<a href="https://colab.research.google.com/github/DarkEol/AutoML/blob/main/AutoML-Ontology/Code/AutoH2O_DecisionTree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Script for building decision tree and ontology for AutoML-H2O from meta-features

In [None]:
!pip install owlready2

In [None]:
import pandas as pd
import graphviz
import pickle
from sklearn import tree
from sklearn.tree import export_text
from owlready2 import *

In [None]:
#building a decision tree from meta-features

info = pd.read_csv('features-h2o-40-semi.csv', delimiter=';')

#target is last attribute
y = info[info.columns[info.columns.size-1]]
X = info[info.columns.delete(info.columns.size-1)]

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, y)

print("save decision tree into file")
file = open('decisionTree.txt','wb')
pickle.dump(clf, file)
file.close()
print("file saved")

r = export_text(clf, feature_names=list(info.columns.values[:len(info.columns.values)-1]))
print(r)

In [None]:
#traverse the tree and add the path to leaf to ontology

#function that takes as input path from root to leaf-algorithm in decision tree and adds it into ontology
def extendOntology(list_of_nodes):
  algo = list_of_nodes.pop()
  info_to_add = onto.H2OAlgorithm
  for node in list_of_nodes:
    info_to_add = info_to_add & onto.suitableFor.some(dict_features[node])
  dict_algos[algo].is_a.append(info_to_add)

#traversing the decision tree. clf - decision tree created from meta-features, node_id - current node (initially root), list_of_nodes - path (initially empty)
def travelTree(clf, node_id, list_of_nodes):
  left_id = clf.tree_.children_left[node_id]
  right_id = clf.tree_.children_right[node_id]
  feature = info.columns[clf.tree_.feature[node_id]]

  if left_id==-1 and right_id==-1:
    list2 = list_of_nodes.copy()
    list2.append(clf.classes_[clf.tree_.value[node_id].argmax()])
    extendOntology(list2)

  if left_id!=-1:
    list2 = list_of_nodes.copy()
    list2.append("No"+feature)
    travelTree(clf, left_id, list2)
  if right_id!=-1:
    list2 = list_of_nodes.copy()
    list2.append(feature)
    travelTree(clf, right_id, list2)

onto = get_ontology("ClassOntologyEmpty.owl").load()

#list of algorithms supported by H2O
dict_algos = {
'DeepLearning' : onto.Deep_Learning,
'DRF' : onto.Distributed_Random_Forest,
'GBM' : onto.Gradient_Boosting_Machine,
'GLM' : onto.Generalized_Linear_Model,
'StackedEnsemble' : onto.Stacked_Ensembles,
'XGBoost' : onto.XGBoost }

#list of considered features of dataset
dict_features = {
    'BinaryClass' : onto.BinaryClass,
    'NoBinaryClass' : onto.NoBinaryClass,
    'StringClass' : onto.StringClass,
    'NoStringClass' : onto.NoStringClass,
    'UnaryAttributes' : onto.UnaryAttributes,
    'NoUnaryAttributes' : onto.NoUnaryAttributes,
    'ManyFeatures' : onto.ManyFeatures,
    'NoManyFeatures' : onto.FewFeatures,
    'FewFeatures' : onto.FewFeatures,
    'NoFewFeatures' : onto.ManyFeatures,
    'ManyInstances' : onto.ManyInstances,
    'FewInstances' : onto.FewInstances,
    'NoManyInstances' : onto.FewInstances,
    'NoFewInstances' : onto.ManyInstances,
    'BinaryAttributes' : onto.BinaryAttributes,
    'NoBinaryAttributes' : onto.NoBinaryAttributes,
    'NumericAttributes' : onto.NumericAttributes,
    'NoNumericAttributes' : onto.NoNumericAttributes,
    'NominalAttributes' : onto.NominalAttributes,
    'NoNominalAttributes' : onto.NoNominalAttributes,
    'StringAttributes' : onto.StringAttributes,
    'NoStringAttributes' : onto.NoStringAttributes,
    'MissingValues' : onto.MissingValues,
    'NoMissingValues' : onto.NoMissingValues,
    'NumericClass' : onto.NumericClass,
    'NoNumericClass' : onto.NoNumericClass
    }

travelTree(clf, 0, list())

onto.save(file = "ontoUpdated.owl")