In [None]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder

from utils import * # Dataloader
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

Numerical Tests

In [None]:
from utils import *

file_path_to_test = 'Datasets/wine/wine.data'
delimiter = ','
label_position = 0 

data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)



print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Min-Height Problem 100% Training Accuracy

In [None]:
min_height_numerical_problem = SATreeCraft(dataset=dataset,features=features,labels=labels,true_labels_for_points=true_labels_for_points)
min_height_numerical_problem.solve()
min_height_numerical_problem.export_cnf()
print("Final Model: ", min_height_numerical_problem.model)
print("min depth found: ", min_height_numerical_problem.min_depth)

In [None]:
max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classifciation_objective='max_accuracy',
                                             fixed_depth=2)
max_accuracy_numerical_problem.solve()
max_accuracy_numerical_problem.export_cnf()
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

Model Classifier Tests - Numerical

In [None]:
# Classifier - sklearn Integration 

# Build model
model = SATreeClassifier(min_height_numerical_problem.model)

# Test cases 0- uisng training set expecting 100%
X_test = dataset[5:160]
y_test = true_labels_for_points[5:160]

# To get the accuracy score
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# To get the classification report
report = model.get_classification_report(X_test, y_test)
print("Classification Report:")
print(report)

# To get the confusion matrix
conf_matrix = model.get_confusion_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)

Categorical Datasets Test

In [None]:
# Promoter data set
file_path = 'Datasets/molecular+biology+promoter+gene+sequences/promoters.data'
label_index = 0
categorical_feature_index = 2  
numerical_indices = None

data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)

print("Features:", data_loader.features, data_loader.features.shape)
print("Categorical Features:", data_loader.features_categorical, data_loader.features_categorical.shape)
print("Numerical Features:", data_loader.features_numerical, data_loader.features_numerical.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

In [None]:
min_height_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical)
min_height_categorical_problem.solve()
min_height_categorical_problem.export_cnf()
print("Final Model: ", min_height_categorical_problem.model)
print("min depth found: ", min_height_categorical_problem.min_depth)

In [None]:
max_accuracy_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical,
                                           classifciation_objective= 'max_accuracy',
                                           fixed_depth=2)

max_accuracy_categorical_problem.solve()
max_accuracy_categorical_problem.export_cnf()
print("Final Model: ", max_accuracy_categorical_problem.model)
print("Min cost found: ", max_accuracy_categorical_problem.min_cost)

Tree Classifier Tests Metrics - Sklearn

In [None]:
# Classifier - sklearn Integration 

# Build model
model = SATreeClassifier(min_height_categorical_problem.model)

# Test cases 0- uisng training set expecting 100%
X_test = dataset
y_test = true_labels_for_points

# To get the accuracy score
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# To get the classification report
report = model.get_classification_report(X_test, y_test)
print("Classification Report:")
print(report)

# To get the confusion matrix
conf_matrix = model.get_confusion_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)