In [1]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder

from utils import * # Dataloader
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

Numerical Tests

In [2]:
from utils import *

file_path_to_test = 'Datasets/wine/wine.data'
delimiter = ','
label_position = 0 

data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)



print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12'] (13,)
Labels: [0 1 2] (3,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] (178,)
Dataset:
 [[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]] (178, 13)


Min-Height Problem 100% Training Accuracy

In [3]:
min_height_numerical_problem = SATreeCraft(dataset=dataset,features=features,labels=labels,true_labels_for_points=true_labels_for_points)
min_height_numerical_problem.solve()
min_height_numerical_problem.export_cnf()
print("Final Model: ", min_height_numerical_problem.model)
print("min depth found: ", min_height_numerical_problem.min_depth)

no solution at depth 1
no solution at depth 2
Final Model:  [{'type': 'branching', 'children': [1, 2], 'feature': '10', 'threshold': 0.915}, {'type': 'branching', 'children': [3, 4], 'feature': '6', 'threshold': 1.585}, {'type': 'branching', 'children': [5, 6], 'feature': '12', 'threshold': 716.0}, {'type': 'branching', 'children': [7, 8], 'feature': '9', 'threshold': 3.8449999999999998}, {'type': 'branching', 'children': [9, 10], 'feature': '12', 'threshold': 627.5}, {'type': 'branching', 'children': [11, 12], 'feature': '6', 'threshold': 0.565}, {'type': 'branching', 'children': [13, 14], 'feature': '9', 'threshold': 3.46}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 0}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 0}]
min depth found:  3


In [4]:
max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classifciation_objective='max_accuracy',
                                             fixed_depth=2)
max_accuracy_numerical_problem.solve()
max_accuracy_numerical_problem.export_cnf()
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

Final Model:  [{'type': 'branching', 'children': [1, 2], 'feature': '6', 'threshold': 1.4}, {'type': 'branching', 'children': [3, 4], 'feature': '9', 'threshold': 3.82}, {'type': 'branching', 'children': [5, 6], 'feature': '12', 'threshold': 755.0}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 0}]
Min cost found:  6


In [6]:
min_height_numerical_problem.display_solution()


Solution of Literals
A Variables:
0 0 0 0 0 0 0 0 0 0 1 0 0
0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1
0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 1 0 0 0


S Variables:
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
1 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 1 0 0 0
0 0 0 0 0 0 0
0 0 0 1 0 0 0
0 0 0 1 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 1 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 1 0 0 0
1 0 0 0 0 0 0
0 0 0 0 0 0 0
1 0 0 0 0 0 0
1 0 0 0 0 0 0
1 0 1 0 0 0 0
1 0 0 0 0 0 0
1 0 0 0 0 0 0
0 0 0 0 0 0 0
1 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 0
0 0 0 0 0 0 

In [5]:
max_accuracy_numerical_problem.display_solution()


Solution of Literals
A Variables:
0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1


S Variables:
0 0 0
0 0 0
0 0 0
0 0 0
0 0 1
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 1 0
0 0 0
0 1 0
0 1 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 1 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 1
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
1 1 1
1 1 1
0 0 1
0 1 1
0 0 1
0 1 1
0 0 1
0 0 1
0 0 1
1 1 1
1 1 1
1 1 0
0 1 1
0 1 1
0 1 0
0 1 0
0 1 1
0 0 1
1 1 1
0 1 1
0 1 1
0 1 1
0 0 1
0 1 1
0 0 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
1 1 1
0 1 1
0 1 1
0 1 1
0 1 0
1 1 1
0 1 1
0 0 1
0 1 1
0 1 1
1 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
1 1 1
0 1 1
0 1 1
0 0 1
0 1 1
0 1 1
0 1 1
0 1 1
0 0 1
0 1 1
0 1 1
0 1 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 1
1 0 0
1 0 1
1 0 1
1 0 0
1 

Model Classifier Tests - Numerical

In [None]:
# Classifier - sklearn Integration 

# Build model
model = SATreeClassifier(min_height_numerical_problem.model)

# Test cases 0- uisng training set expecting 100%
X_test = dataset[5:160]
y_test = true_labels_for_points[5:160]

# To get the accuracy score
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# To get the classification report
report = model.get_classification_report(X_test, y_test)
print("Classification Report:")
print(report)

# To get the confusion matrix
conf_matrix = model.get_confusion_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)

Categorical Datasets Test

In [None]:
# Promoter data set
file_path = 'Datasets/molecular+biology+promoter+gene+sequences/promoters.data'
label_index = 0
categorical_feature_index = 2  
numerical_indices = None

data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)

print("Features:", data_loader.features, data_loader.features.shape)
print("Categorical Features:", data_loader.features_categorical, data_loader.features_categorical.shape)
print("Numerical Features:", data_loader.features_numerical, data_loader.features_numerical.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

In [None]:
min_height_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical)
min_height_categorical_problem.solve()
min_height_categorical_problem.export_cnf()
print("Final Model: ", min_height_categorical_problem.model)
print("min depth found: ", min_height_categorical_problem.min_depth)

In [None]:
max_accuracy_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical,
                                           classifciation_objective= 'max_accuracy',
                                           fixed_depth=2)

max_accuracy_categorical_problem.solve()
max_accuracy_categorical_problem.export_cnf()
print("Final Model: ", max_accuracy_categorical_problem.model)
print("Min cost found: ", max_accuracy_categorical_problem.min_cost)

Tree Classifier Tests Metrics - Sklearn

In [None]:
# Classifier - sklearn Integration 

# Build model
model = SATreeClassifier(min_height_categorical_problem.model)

# Test cases 0- uisng training set expecting 100%
X_test = dataset
y_test = true_labels_for_points

# To get the accuracy score
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# To get the classification report
report = model.get_classification_report(X_test, y_test)
print("Classification Report:")
print(report)

# To get the confusion matrix
conf_matrix = model.get_confusion_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)