## Parameters

In [1]:
xgb_params = {
    'objective': 'multi:softmax', 
    'num_class': 2, 
    'n_estimators': 7, 
    'max_depth': 7, 
    'eta': 0.8
}

treelut_params = {
    'w_feature': 4, 
    'w_tree': 2,
    'bits_features': 32,
    'pipeline': [0, 0, 0], 
    'dir_path': './OutputFiles/Susy/',
    'style': 'mux',
    'argmax': True,
    'quantized': True}

## MNIST Data Loading

In [2]:
import pandas as pd
from LoadDataset import LoadDataset
from sklearn.model_selection import train_test_split

susy = pd.read_csv('./Data/Susy/SUSY_int.csv')
data = susy.drop(columns=['target'])
target = susy['target'].astype('int')

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [3]:
data_un = data.astype('uint32')
X_train, X_test, y_train, y_test = train_test_split(data_un, target, test_size=0.2, random_state=42)


# Get max and min of each column of X_test
max_values = X_test.max().astype('uint32')
min_values = X_test.min().astype('uint32')

# To np arrays
max_values = max_values.to_numpy()
min_values = min_values.to_numpy()

print("Max values of each column in X_test:")
print(max_values)
print("Min values of each column in X_test:")
print(min_values)

Max values of each column in X_test:
[   1000000   18299086 4294967295 4294967293   26110743 4294967295
 4294967295   19206709 4294967290   22562485 4294967295   16918142
   16166820    6731210   13025709   16989283   11336282    1591660]
Min values of each column in X_test:
[     0 255157     11      0 428586      6      3    720      0      0
      4 268373   1041   2048      0  66475   4469      9]


In [4]:
# from torchvision import datasets
# import numpy as np


# data_train = datasets.MNIST(root="./Data", train=True, download=True)
# data_test = datasets.MNIST(root="./Data", train=False, download=True)

# X_train = np.array(data_train.data.numpy()).reshape(data_train.data.shape[0], -1)
# y_train = np.array(data_train.targets.numpy())

# X_test = np.array(data_test.data.numpy()).reshape(data_test.data.shape[0], -1)
# y_test = np.array(data_test.targets.numpy())

## Data Quantization

In [5]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np


scaler = MinMaxScaler()
w_feature = treelut_params['w_feature']
X_train_min_max = np.round(scaler.fit_transform(X_train)*(2**w_feature-1))
X_test_min_max = np.clip(np.round(scaler.transform(X_test)*(2**w_feature-1)), 0, 2**w_feature-1)

# Get the first 4 bits of each feature
w_feature = treelut_params['w_feature']
X_train_quantized = np.array(X_train, dtype=np.int32) & w_feature - 1
X_test_quantized = np.array(X_test, dtype=np.int32) & w_feature - 1 





## XGBoost Model Training

In [6]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

xgb_clf = XGBClassifier(**xgb_params)
xgb_clf.fit(X_train_quantized, y_train)
y_pred_xgb = xgb_clf.predict(X_test_quantized)
print(f"XGB Model Accuracy: {accuracy_score(y_pred_xgb, y_test):.3f}")

XGB Model Accuracy: 0.541


In [7]:
xgb_clf_not_quantized = XGBClassifier(**xgb_params)
xgb_clf_not_quantized.fit(X_train, y_train)
y_pred_xgb_not_quantized = xgb_clf_not_quantized.predict(X_test)
print(f"XGB Model Accuracy (not quantized): {accuracy_score(y_pred_xgb_not_quantized, y_test):.3f}")

XGB Model Accuracy (not quantized): 0.797


In [8]:
xgb_clf_min_max = XGBClassifier(**xgb_params)
xgb_clf_min_max.fit(X_train_min_max, y_train)
y_pred_xgb_min_max = xgb_clf_min_max.predict(X_test_min_max)
print(f"XGB Model Accuracy (min-max scaled): {accuracy_score(y_pred_xgb_min_max, y_test):.3f}")

XGB Model Accuracy (min-max scaled): 0.772


## TreeLUT Model Generation

In [9]:
from treelut import TreeLUTClassifier

# 2025-05-27 13:40:50
# Specifying the parameters for 
trelut_X_test = X_test if treelut_params['quantized'] else X_test_quantized


treelut_clf = TreeLUTClassifier(xgb_model=xgb_clf_min_max, **treelut_params, min=min_values, max=max_values)
treelut_clf.convert()
y_pred_treelut = treelut_clf.predict(X_test_min_max)
print(f"TreeLUT Model Accuracy: {accuracy_score(y_pred_treelut, y_test):.3f}")

treelut_clf.verilog()
treelut_clf.testbench(trelut_X_test, y_test)

TreeLUT Model Accuracy: 0.750
Info: Class 0 Tree 0 has 92 leaves and 91 nodes. Path max: 2.
Info: Class 1 Tree 0 has 92 leaves and 91 nodes. Path max: 2.
Info: Class 0 Tree 1 has 99 leaves and 98 nodes. Path max: 2.
Info: Class 1 Tree 1 has 99 leaves and 98 nodes. Path max: 2.
Info: Class 0 Tree 2 has 100 leaves and 99 nodes. Path max: 2.
Info: Class 1 Tree 2 has 100 leaves and 99 nodes. Path max: 2.
Info: Class 0 Tree 3 has 112 leaves and 111 nodes. Path max: 2.
Info: Class 1 Tree 3 has 112 leaves and 111 nodes. Path max: 2.
Info: Class 0 Tree 4 has 106 leaves and 105 nodes. Path max: 2.
Info: Class 1 Tree 4 has 106 leaves and 105 nodes. Path max: 2.
Info: Class 0 Tree 5 has 110 leaves and 109 nodes. Path max: 2.
Info: Class 1 Tree 5 has 110 leaves and 109 nodes. Path max: 2.
Info: Class 0 Tree 6 has 120 leaves and 119 nodes. Path max: 3.
Info: Class 1 Tree 6 has 120 leaves and 119 nodes. Path max: 3.
Info: Creating adder tree for class 0...
Info: Creating adder tree for class 1...
In