In [None]:
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

import pandas as pd
import numpy as np
import pickle
import math

In [None]:
seed = 99

# Build/Load Model

In [None]:
batch1 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch1.dat", 
                    header=None, sep=' \d+:')

batch2 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch2.dat", 
                    header=None, sep=' \d+:')

batch3 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch3.dat", 
                    header=None, sep=' \d+:')

batch4 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch4.dat", 
                    header=None, sep=' \d+:')

batch5 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch5.dat", 
                    header=None, sep=' \d+:')

batch6 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch6.dat", 
                    header=None, sep=' \d+:')

batch7 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch7.dat", 
                    header=None, sep=' \d+:')

batch8 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch8.dat", 
                    header=None, sep=' \d+:')

batch9 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch9.dat", 
                    header=None, sep=' \d+:')

batch10 = pd.read_csv("/home/admin2/ML_NIC/datasets/gas+sensor+array+drift+dataset/batch10.dat", 
                    header=None, sep=' \d+:')

In [None]:
dataset = pd.concat([batch1, batch2, batch3, batch4, batch5,
                    batch6, batch7, batch8, batch9, batch10], ignore_index=True, axis=0)

In [None]:
features = dataset.drop(columns=[0])
labels = dataset[0]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=seed,
                                                   stratify=labels)

In [None]:
minmax = MinMaxScaler(feature_range=(0, 1), clip=True)

In [None]:
X_train = minmax.fit_transform(X_train)
X_test = minmax.transform(X_test)

In [None]:
# Load up model
with open("model.pkl", 'rb') as f:
    clf = pickle.load(f)

# Micro-C Model

In [None]:
n_nodes = clf.tree_.node_count
children_left = clf.tree_.children_left
children_right = clf.tree_.children_right
feature = clf.tree_.feature
threshold = clf.tree_.threshold
values = clf.tree_.value

node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, 0)]  # start with the root node id (0) and its depth (0)
while len(stack) > 0:
    # `pop` ensures each node is only visited once
    node_id, depth = stack.pop()
    node_depth[node_id] = depth

    # If the left and right child of a node is not the same we have a split
    # node
    is_split_node = children_left[node_id] != children_right[node_id]
    # If a split node, append left and right children and depth to `stack`
    # so we can loop through them
    if is_split_node:
        stack.append((children_left[node_id], depth + 1))
        stack.append((children_right[node_id], depth + 1))
    else:
        is_leaves[node_id] = True

print(
    "The binary tree structure has {n} nodes and has "
    "the following tree structure:\n".format(n=n_nodes)
)
for i in range(n_nodes):
    if is_leaves[i]:
        print(
            "{space}node={node} is a leaf node with value={value}.".format(
                space=node_depth[i] * "\t", node=i, value=values[i]
            )
        )
    else:
        print(
            "{space}node={node} is a split node with value={value}: "
            "go to node {left} if X[:, {feature}] <= {threshold} "
            "else to node {right}.".format(
                space=node_depth[i] * "\t",
                node=i,
                left=children_left[i],
                feature=feature[i],
                threshold=threshold[i],
                right=children_right[i],
                value=values[i],
            )
        )

In [None]:
# Decide number of bits required for the path storage
num_features = clf.n_features_in_
classes = clf.classes_
num_leaves = int(np.sum(is_leaves))
num_result_words = math.ceil(num_leaves/32.0)
result_init = hex((2**(num_leaves % 33))-1)
features_per_result_core = int(32.0/num_result_words)
num_result_cores = [math.ceil(num_features / features_per_result_core)]

while(num_result_cores[-1] != 1):
    num_result_cores.append(math.ceil(num_result_cores[-1] / features_per_result_core))

In [None]:
total_bits = 16
dec_bits = 13

def num_translate(number, total_bits, dec_bits):
    return round(number * (2**dec_bits))

In [None]:
print("__lmem uint32_t path_class[] = {", end='')
for i in range(is_leaves.shape[0]):
    if (is_leaves[i]):
        print(f"{classes[np.argmax(values[i][0])]}", end=', ')
print('};')

In [None]:
# Depth-first search through nodes
def depth_first(root, left, right, prediction, feature, threshold, output, feature_id, num_result_words, feat_file):
    if (left[root] == right[root]):  # leaf node is base case
        if (output == ''):
            return
        
        if feat_file:
            path_id = np.sum(is_leaves[:root])
            output = output.lstrip("&& ")
            output = output.rstrip(" ")

            if (num_result_words > 1):
                result_index = num_result_words - int(path_id/32) - 1
                path_id = path_id % 32
                feat_file.write(f"        if (!({output}))\n            result_gpr[{result_index}] &= ~(1 << {path_id});\n")
            else:
                feat_file.write(f"        if (!({output}))\n            result_gpr &= ~(1 << {path_id});\n")
            feat_file.write("\n")
            
        else:
            path_id = np.sum(is_leaves[:root])
            output = output.lstrip("and ")
            output = output.rstrip(" ")

            if (num_result_words > 1):
                result_index = num_result_words - int(path_id/32) - 1
                path_id = path_id % 32
                print(f"    if (not({output})):\n        result_gpr[{result_index}] &= ~(1 << {path_id})")
            else:
                print(f"    if (not({output})):\n        result_gpr &= ~(1 << {path_id})")
            print("")
            
    else:
        
        if feat_file:
            net_threshold = num_translate(threshold[root], total_bits, dec_bits)
            if (feature[root] == feature_id):
                left_output = output + f"&& feature <= {net_threshold} "
                right_output = output + f"&& feature > {net_threshold} "
            else:
                left_output = output
                right_output = output

            depth_first(left[root], left, right, prediction, feature, threshold, 
                        left_output, feature_id, num_result_words, feat_file)
            depth_first(right[root], left, right, prediction, feature, threshold, 
                        right_output, feature_id, num_result_words, feat_file)
            
        else:
            net_threshold = num_translate(threshold[root], total_bits, dec_bits)
            if (feature[root] == feature_id):
                left_output = output + f"and feature <= {net_threshold} "
                right_output = output + f"and feature > {net_threshold} "
            else:
                left_output = output
                right_output = output

            depth_first(left[root], left, right, prediction, feature, threshold, 
                        left_output, feature_id, num_result_words, feat_file)
            depth_first(right[root], left, right, prediction, feature, threshold, 
                        right_output, feature_id, num_result_words, feat_file)

        

In [None]:
for feat_id in range(num_features):    
    print(f"def feature{feat_id+1}(feature):")
    
    if (num_result_words > 1):
        print("    result_gpr = [0xffffffff", end='')
        for i in range(1, num_result_words):
            print(", 0xffffffff", end='')
        print(']')
    else:
        print(f"    result_gpr = {result_init}")
        
    depth_first(0, children_left, children_right, values, feature, threshold, "", feat_id, num_result_words, None)
    print("    return result_gpr")
    print()
    print()