In [1]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder

from utils import * # Dataloader and K-fold mechanism 
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

Numerical Tests

In [None]:
from utils import *

file_path_to_test = 'Datasets/wine/wine.data'
delimiter = ','
label_position = 0 

data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)



print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Min-Height Problem 100% Training Accuracy

In [None]:
min_height_numerical_problem = SATreeCraft(dataset=dataset,features=features,labels=labels,true_labels_for_points=true_labels_for_points)
min_height_numerical_problem.solve()
#min_height_numerical_problem.export_cnf_min_height("dimacs/testPerfectHeight.cnf")
print("Final Model: ", min_height_numerical_problem.model)
print("min depth found: ", min_height_numerical_problem.min_depth)

In [None]:
max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classification_objective='max_accuracy',
                                             fixed_depth=2,
                                             # min_support= 5
                                             # min_margin  = 10
                                             # tree_structure= 'Oblivious'
                                             )
max_accuracy_numerical_problem.solve()
max_accuracy_numerical_problem.export_cnf()
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

In [None]:
max_accuracy_numerical_problem.sat_solution

In [None]:
min_height_numerical_problem.display_solution()

In [None]:
max_accuracy_numerical_problem.display_solution()

Model Classifier Tests - Numerical

In [None]:
# Classifier - sklearn Integration 

# Build model
model = SATreeClassifier(max_accuracy_numerical_problem.model)

# Test cases 0- uisng training set expecting 100%
X_test = dataset[5:160]
y_test = true_labels_for_points[5:160]

# To get the accuracy score
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# To get the classification report
report = model.get_classification_report(X_test, y_test)
print("Classification Report:")
print(report)

# To get the confusion matrix
conf_matrix = model.get_confusion_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)

K-Fold TESTS - NUMERICAL FOR MINIMUM Support 

In [None]:
k = 5
depth = 2 
minimum_support = 0
accuracies, mean_score = k_fold_tester(k=k, depth=depth, dataset=dataset, true_labels_for_points=true_labels_for_points, 
                                       labels=labels, features=features, min_support_level=minimum_support, 
                                       # complete_tree=False
                                       )

print(accuracies)
print(mean_score)

Minimum Margin Testing - Numerical Dataset

In [None]:
k = 5
depth = 2 
minimum_margin = 4
accuracies, mean_score = k_fold_tester(k=k, depth=depth, dataset=dataset, true_labels_for_points=true_labels_for_points, 
                                       labels=labels, features=features, min_margin_level= minimum_margin,
                                       # complete_tree=False
                                       )

print(accuracies)
print(mean_score)

Categorical Datasets Test

In [None]:
# Promoter data set
file_path = 'Datasets/molecular+biology+promoter+gene+sequences/promoters.data'
label_index = 0
categorical_feature_index = 2  
numerical_indices = None

data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)

print("Features:", data_loader.features, data_loader.features.shape)
print("Categorical Features:", data_loader.features_categorical, data_loader.features_categorical.shape)
print("Numerical Features:", data_loader.features_numerical, data_loader.features_numerical.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

In [None]:
min_height_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical)
min_height_categorical_problem.solve()
min_height_categorical_problem.export_cnf()
print("Final Model: ", min_height_categorical_problem.model)
print("min depth found: ", min_height_categorical_problem.min_depth)

In [None]:
max_accuracy_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical,
                                           classification_objective= 'max_accuracy',
                                           fixed_depth=2,
                                           # min_support= 2,
                                           # tree_structure = 'Oblivious'
                                           )

max_accuracy_categorical_problem.solve()
max_accuracy_categorical_problem.export_cnf("dimacs/testThisFile.cnf")
print("Final Model: ", max_accuracy_categorical_problem.model)
print("Min cost found: ", max_accuracy_categorical_problem.min_cost)

Tree Classifier Tests Metrics - Sklearn

In [None]:
# Classifier - sklearn Integration 

# Build model
model = SATreeClassifier(max_accuracy_categorical_problem.model)

# Test cases 0- uisng training set expecting 100%
X_test = dataset
y_test = true_labels_for_points

# To get the accuracy score
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# To get the classification report
report = model.get_classification_report(X_test, y_test)
print("Classification Report:")
print(report)

# To get the confusion matrix
conf_matrix = model.get_confusion_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)

K-Fold Module test

In [None]:
k = 5
depth = 2 
minimum_support = 0
accuracies, mean_score = k_fold_tester(k, depth, dataset, true_labels_for_points, labels, features, features_categorical, features_numerical, 
                                       # complete_tree= False
                                       )
                                       
print(accuracies)
print(mean_score)

Adding Additional Constraints - Minimum Support

In [None]:
k = 5
depth = 2 
minimum_support = 2
accuracies, mean_score = k_fold_tester(k, depth, dataset, true_labels_for_points, labels, features, features_categorical, features_numerical, True, minimum_support)

print(accuracies)
print(mean_score)

Own Toy sample

In [None]:
features = np.array(['0', '1'])
labels = np.array([0,1,2])
dataset = np.array([[1, 1], [1, 2], [7, 7], [7, 8], [15,5],[15,7]])  # Dataset X
true_labels_for_points = np.array([0,0,1,1,2,2])

print("Features:", features, features.shape)
print("Labels:", labels, labels.shape)
print("True Labels for Points:", true_labels_for_points, true_labels_for_points.shape)
print("Dataset:\n", dataset,dataset.shape)

max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classification_objective='max_accuracy',
                                             fixed_depth=2,
                                             # min_support= 5
                                             # min_margin  = 10
                                             # tree_structure= 'Oblivious'
                                             )
max_accuracy_numerical_problem.solve()
max_accuracy_numerical_problem.export_cnf()
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)



# CLUSTERING

In [1]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder

from utils import * # Dataloader and K-fold mechanism 
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

In [None]:
features = np.array(['0', '1'])
dataset = np.array([[1, 1], [1, 2], [7, 7], [7, 8], [15,5],[15,6]])  # Dataset X
epsilon = 0 
k_clusters = 4
depth = 2

# CL_pairs = np.array([])
ML_pairs = np.array([])
CL_pairs = np.array([[2,3],[3,1]])
# # ML_pairs = np.array([[4,5],[0,1],[2,3]])

clustering_problem = max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                                                  features= features,
                                                                  k_clusters= k_clusters,
                                                                  #ML_pairs= ML_pairs,
                                                                  CL_pairs= CL_pairs,
                                                                  epsilon = epsilon,
                                                                  fixed_depth= depth
                                             )


In [None]:
loandra_path = '/Users/harisrasul/Desktop/loandra'
clustering_problem.solve_loandra(loandra_path)
print(clustering_problem.cluster_assignments)
print(clustering_problem.cluster_diameters)

# Tests for results 

Numerical Datasets

In [8]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder
from utils import * # Dataloader and K-fold mechanism 
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

In [9]:
file_path_to_test = 'Datasets/data_banknote_authentication.txt'
delimiter = ','
label_position = -1 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3'] (4,)
Labels: [0 1] (2,)
True Labels for Points: [0 0 0 ... 1 1 1] (1372,)
Dataset:
 [[  3.6216    8.6661   -2.8073   -0.44699]
 [  4.5459    8.1674   -2.4586   -1.4621 ]
 [  3.866    -2.6383    1.9242    0.10645]
 ...
 [ -3.7503  -13.4586   17.5932   -2.7771 ]
 [ -3.5637   -8.3827   12.393    -1.2823 ]
 [ -2.5419   -0.65804   2.6842    1.1952 ]] (1372, 4)


In [10]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 38.46 seconds
Accuracies: [0.91636364 0.92       0.9379562  0.91240876 0.89781022]
Mean Score: 0.9169077637690777

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 125.89 seconds
Accuracies: [0.98181818 0.94545455 0.97445255 0.97080292 0.98175182]
Mean Score: 0.97085600530856

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 193.79 seconds
Accuracies: [0.99272727 0.99272727 0.98905109 0.97810219 0.98175182]
Mean Score: 0.9868719309887192



In [11]:
file_path_to_test = 'Datasets/breast+cancer+coimbra/dataR2.csv'
delimiter = ','
label_position = -1 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8'] (9,)
Labels: [0 1] (2,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1] (116,)
Dataset:
 [[ 48.          23.5         70.         ...   9.7024       7.99585
  417.114     ]
 [ 83.          20.69049454  92.         ...   5.429285     4.06405
  468.786     ]
 [ 82.          23.12467037  91.         ...  22.43204      9.27715
  554.697     ]
 ...
 [ 65.          32.05        97.         ...  22.54        10.33
  314.05      ]
 [ 72.          25.59        82.         ...  33.75         3.27
  392.46      ]
 [ 86.          27.18       138.         ...  14.11         4.35
   90.09      ]] (116, 9)


In [12]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 14.33 seconds
Accuracies: [0.625      0.65217391 0.39130435 0.82608696 0.69565217]
Mean Score: 0.6380434782608695

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 2824.99 seconds
Accuracies: [0.79166667 0.7826087  0.60869565 0.69565217 0.52173913]
Mean Score: 0.680072463768116

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 15071.69 seconds
Accuracies: [0.58333333 0.65217391 0.73913043 0.65217391 0.69565217]
Mean Score: 0.6644927536231884



In [4]:
file_path_to_test = 'Datasets/Cryotherapy.xlsx'
delimiter = ','
label_position = -1 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5'] (6,)
Labels: [0 1] (2,)
True Labels for Points: [0 1 0 0 0 1 0 0 1 0 0 1 1 1 0 1 0 0 1 1 1 1 1 0 1 0 1 0 0 1 1 1 0 0 0 1 0
 0 1 0 0 1 1 1 0 1 0 0 1 1 1 1 1 0 1 0 1 0 1 1 1 0 0 0 0 1 0 0 1 0 0 1 1 1
 0 1 0 0 1 1 1 1 1 0 1 0 1 0 1 1] (90,)
Dataset:
 [[1.000e+00 3.500e+01 1.200e+01 5.000e+00 1.000e+00 1.000e+02]
 [1.000e+00 2.900e+01 7.000e+00 5.000e+00 1.000e+00 9.600e+01]
 [1.000e+00 5.000e+01 8.000e+00 1.000e+00 3.000e+00 1.320e+02]
 [1.000e+00 3.200e+01 1.175e+01 7.000e+00 3.000e+00 7.500e+02]
 [1.000e+00 6.700e+01 9.250e+00 1.000e+00 1.000e+00 4.200e+01]
 [1.000e+00 4.100e+01 8.000e+00 2.000e+00 2.000e+00 2.000e+01]
 [1.000e+00 3.600e+01 1.100e+01 2.000e+00 1.000e+00 8.000e+00]
 [1.000e+00 5.900e+01 3.500e+00 3.000e+00 3.000e+00 2.000e+01]
 [1.000e+00 2.000e+01 4.500e+00 1.200e+01 1.000e+00 6.000e+00]
 [2.000e+00 3.400e+01 1.125e+01 3.000e+00 3.000e+00 1.500e+02]
 [2.000e+00 2.100e+01 1.075e+01 5.000e+00 1.000e+00 3.500e+01]
 [2.000e+00 1.500e+01 6

In [5]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 1.76 seconds
Accuracies: [0.94444444 0.83333333 0.88888889 1.         0.83333333]
Mean Score: 0.9

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 3.22 seconds
Accuracies: [0.72222222 0.94444444 0.94444444 0.94444444 0.77777778]
Mean Score: 0.8666666666666666

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 2.95 seconds
Accuracies: [0.83333333 0.83333333 0.94444444 0.88888889 0.83333333]
Mean Score: 0.8666666666666666



In [15]:
file_path_to_test = 'Datasets/Immunotherapy.xlsx'
delimiter = ','
label_position = -1 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6'] (7,)
Labels: [0 1] (2,)
True Labels for Points: [1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 0 0
 1 1 0 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1
 1 1 1 1 1 0 1 1 0 1 1 1 1 0 0 1] (90,)
Dataset:
 [[  1.    22.     2.25  14.     3.    51.    50.  ]
 [  1.    15.     3.     2.     3.   900.    70.  ]
 [  1.    16.    10.5    2.     1.   100.    25.  ]
 [  1.    27.     4.5    9.     3.    80.    30.  ]
 [  1.    20.     8.     6.     1.    45.     8.  ]
 [  1.    15.     5.     3.     3.    84.     7.  ]
 [  1.    35.     9.75   2.     2.     8.     6.  ]
 [  2.    28.     7.5    4.     1.     9.     2.  ]
 [  2.    19.     6.     2.     1.   225.     8.  ]
 [  2.    32.    12.     6.     3.    35.     5.  ]
 [  2.    33.     6.25   2.     1.    30.     3.  ]
 [  2.    17.     5.75  12.     3.    25.     7.  ]
 [  2.    15.     1.75   1.     2.    49.     7.  ]
 [  2.    15.     5.5   12.     1.  

In [16]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 1.43 seconds
Accuracies: [0.72222222 0.83333333 0.83333333 0.83333333 0.88888889]
Mean Score: 0.8222222222222222

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 9.30 seconds
Accuracies: [0.72222222 0.66666667 0.66666667 0.83333333 1.        ]
Mean Score: 0.7777777777777778

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 3.68 seconds
Accuracies: [0.61111111 0.77777778 0.77777778 0.77777778 0.72222222]
Mean Score: 0.7333333333333333



In [17]:
file_path_to_test = 'Datasets/ionosphere/ionosphere.data'
label_position = -1 
delimiter = ','
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15'
 '16' '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29'
 '30' '31' '32' '33'] (34,)
Labels: [0 1] (2,)
True Labels for Points: [1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] (351,)
Dataset:
 [[ 1.       0.       0.99539 ... -0.54487  0.18

In [18]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 44.23 seconds
Accuracies: [0.87323944 0.9        0.9        0.92857143 0.91428571]
Mean Score: 0.9032193158953723



KeyboardInterrupt: 

In [19]:
file_path_to_test = 'Datasets/iris/iris.data'
label_position = -1 
delimiter = ','
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3'] (4,)
Labels: [0 1 2] (3,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2] (150,)
Dataset:
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 

In [20]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 2.37 seconds
Accuracies: [0.96666667 0.86666667 1.         0.9        0.9       ]
Mean Score: 0.9266666666666667

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 2.08 seconds
Accuracies: [0.96666667 0.96666667 0.93333333 0.93333333 0.96666667]
Mean Score: 0.9533333333333334

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 3.92 seconds
Accuracies: [1.         0.96666667 0.86666667 0.93333333 0.93333333]
Mean Score: 0.9400000000000001



In [21]:
file_path_to_test = 'Datasets/Data_User_Modeling_Dataset_Hamdi_Tolga_KAHRAMAN.xls'
delimiter = ','
label_position = -1 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4'] (5,)
Labels: [0 1 2 3] (4,)
True Labels for Points: [3 0 1 2 1 2 2 3 1 0 0 1 2 0 2 0 3 1 1 2 1 2 2 1 1 0 3 1 0 0 2 1 1 0 0 1 1
 1 3 2 1 0 2 2 3 2 2 0 0 2 1 2 1 0 2 1 2 2 0 1 1 2 2 1 2 3 3 2 1 2 2 0 3 2
 0 3 1 1 3 1 1 2 2 1 1 3 0 1 1 2 2 2 3 1 0 1 1 0 2 1 1 0 2 1 1 2 2 2 2 0 1
 1 0 0 2 2 3 0 2 0 1 1 1 2 0 0 0 1 1 2 1 2 3 0 2 2 1 1 2 1 3 2 2 1 2 0 1 3
 1 0 0 1 3 1 2 2 2 0 2 2 1 2 0 0 3 0 2 0 1 0 1 2 0 1 2 1 1 0 2 0 1 0 0 1 1
 1 0 2 1 0 2 0 2 0 2 1 3 1 2 3 3 2 3 1 0 1 2 2 3 0 2 2 0 2 2 1 1 0 2 1 1 2
 2 2 1 0 2 1 0 0 2 1 1 0 2 1 1 0 2 2 1 0 2 1 2 0 2 1 0 2 2 2 1 0 2 0 2 0] (258,)
Dataset:
 [[0.   0.   0.   0.   0.  ]
 [0.08 0.08 0.1  0.24 0.9 ]
 [0.06 0.06 0.05 0.25 0.33]
 ...
 [0.54 0.82 0.71 0.29 0.77]
 [0.5  0.75 0.81 0.61 0.26]
 [0.66 0.9  0.76 0.87 0.74]] (258, 5)


In [22]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 4.02 seconds
Accuracies: [0.76923077 0.90384615 0.84615385 0.8627451  0.90196078]
Mean Score: 0.856787330316742

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 10.71 seconds
Accuracies: [0.90384615 0.92307692 0.90384615 0.92156863 0.90196078]
Mean Score: 0.9108597285067873

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 32.35 seconds
Accuracies: [0.88461538 0.90384615 0.94230769 0.96078431 0.94117647]
Mean Score: 0.9265460030165913



In [23]:
file_path_to_test = 'Datasets/vertebral+column/verbex.data'
delimiter = ','
label_position = -1 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5'] (6,)
Labels: [0 1] (2,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1] (310,)
Dataset:
 [[ 63.0278175   22.55258597  39.60911701  40.47523153  98.67291675
   -0.25439999]
 [ 39.05695098  10.06099147  25.01537822  28.99595951 114.4054254
    4.56425864]
 [ 68.83202098  22.21848205  50.09219357  46.61353893 105.9851355
   -3.53031731]
 ...
 [ 61.44659663  2

In [24]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 12.61 seconds
Accuracies: [0.77419355 0.82258065 0.75806452 0.82258065 0.82258065]
Mean Score: 0.8

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 3392.52 seconds
Accuracies: [0.77419355 0.80645161 0.79032258 0.85483871 0.79032258]
Mean Score: 0.803225806451613



KeyboardInterrupt: 

In [31]:
file_path_to_test = 'Datasets/wine/wine.data'
delimiter = ','
label_position = 0 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12'] (13,)
Labels: [0 1 2] (3,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] (178,)
Dataset:
 [[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]] (178, 13)


In [32]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 4.31 seconds
Accuracies: [0.91666667 0.94444444 0.83333333 0.8        0.94285714]
Mean Score: 0.8874603174603175

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 6.05 seconds
Accuracies: [0.86111111 0.94444444 0.97222222 0.91428571 0.97142857]
Mean Score: 0.9326984126984126

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 6.75 seconds
Accuracies: [0.91666667 0.80555556 0.94444444 0.91428571 0.88571429]
Mean Score: 0.8933333333333333



In [33]:
file_path_to_test = 'Datasets/monk+s+problems/monks-2.train' # Update this to the path of your data file
delimiter = ' '
label_position = 0 
data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position, custom_exclude=[-1])
print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5'] (6,)
Labels: [0 1] (2,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 1 0
 1 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 1 0 0 1 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0
 0 0 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 0
 0 0 0 0 0 1 1 0 1 0 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0 1 0 1 1 1 0 1 1 0 0 0 1
 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0] (169,)
Dataset:
 [[1. 1. 1. 1. 2. 2.]
 [1. 1. 1. 1. 4. 1.]
 [1. 1. 1. 2. 1. 1.]
 ...
 [3. 3. 2. 3. 1. 1.]
 [3. 3. 2. 3. 2. 1.]
 [3. 3. 2. 3. 4. 2.]] (169, 6)


In [34]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0,
                                                    complete_tree= False)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 2.53 seconds
Accuracies: [0.58823529 0.5        0.55882353 0.55882353 0.57575758]
Mean Score: 0.5563279857397505

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 3, Time Taken: 37.50 seconds
Accuracies: [0.5        0.47058824 0.70588235 0.73529412 0.57575758]
Mean Score: 0.5975044563279858

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 4, Time Taken: 1544.88 seconds
Accuracies: [0.55882353 0.61764706 0.67647059 0.64705882 0.60606061]
Mean Score: 0.6212121212121212



Max height Problem - Numerical

In [None]:
loandra_path = '/Users/harisrasul/Desktop/loandra'

In [None]:
max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classification_objective='max_accuracy',
                                             fixed_depth=3,
                                             # min_support= 5
                                             # min_margin  = 10
                                             # tree_structure= 'Oblivious'
                                             )
max_accuracy_numerical_problem.solve_loandra(loandra_path= loandra_path)
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

Min Height Problem - Numerical

In [None]:
min_height_numerical_problem = SATreeCraft(dataset=dataset,features=features,labels=labels,true_labels_for_points=true_labels_for_points)
min_height_numerical_problem.solve_loandra(loandra_path)
print("Final Model: ", min_height_numerical_problem.model)
print("min depth found: ", min_height_numerical_problem.min_depth)

K-fold tests accuracies

In [None]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth
for depth in depths:
    # Capture the start time
    start_time = time.time()

    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, 
                                                    min_support_level= 0)
    # Capture the end time
    end_time = time.time()

    # Calculate the duration
    duration = end_time - start_time

    # Print or log the results including the time taken
    print(f"Depth: {depth}, Min Support: {minimum_support}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")


Categorical Datasets

In [6]:
from utils import *
# Example usage:
file_path = 'Datasets/credit+approval/crx.data'
label_index = -1
categorical_feature_index = None  
numerical_indices = np.array([1, 2, 7, 10, 13,14]) 

data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)

# Accessing the processed data
print("Features:", data_loader.features, data_loader.features.shape)
print("Categorical Features:", data_loader.features_categorical, data_loader.features_categorical.shape)
print("Numerical Features:", data_loader.features_numerical, data_loader.features_numerical.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)

features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14'] (15,)
Categorical Features: ['0' '3' '4' '5' '6' '8' '9' '11' '12'] (9,)
Numerical Features: ['1' '2' '7' '10' '13' '14'] (6,)
Labels: [0 1] (2,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 

In [7]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth and minimum support level
for depth in depths:
    # Capture the start time
    start_time = time.time()
    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, features_categorical=features_categorical, features_numerical=features_numerical, 
                                                    min_support_level=0, complete_tree= False)
    # Capture the end time
    end_time = time.time()
    # Calculate the duration
    duration = end_time - start_time
    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")

Iteration complete
Iteration complete
Iteration complete
Iteration complete
Iteration complete
Oblivious Depth: 2, Time Taken: 82.28 seconds
Accuracies: [0.82442748 0.88549618 0.85496183 0.83076923 0.87692308]
Mean Score: 0.8545155607751027



KeyboardInterrupt: 

In [2]:
# Promoter data set
file_path = 'Datasets/molecular+biology+promoter+gene+sequences/promoters.data'
label_index = 0
categorical_feature_index = 2  
numerical_indices = None
data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)
print("Features:", data_loader.features, data_loader.features.shape)
print("Categorical Features:", data_loader.features_categorical, data_loader.features_categorical.shape)
print("Numerical Features:", data_loader.features_numerical, data_loader.features_numerical.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)
features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15'
 '16' '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29'
 '30' '31' '32' '33' '34' '35' '36' '37' '38' '39' '40' '41' '42' '43'
 '44' '45' '46' '47' '48' '49' '50' '51' '52' '53' '54' '55' '56'] (57,)
Categorical Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15'
 '16' '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29'
 '30' '31' '32' '33' '34' '35' '36' '37' '38' '39' '40' '41' '42' '43'
 '44' '45' '46' '47' '48' '49' '50' '51' '52' '53' '54' '55' '56'] (57,)
Numerical Features: [] (106, 0)
Labels: [0 1] (2,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] (106,)
Dataset:
 [['t' 'a' 'c' ... 'c' 'g' 't']
 ['t' 'g' 'c' ... 'c' 'a' 'a']
 ['g' 't' 'a' ... 'g' 'c' 

In [3]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth and minimum support level
for depth in depths:
    # Capture the start time
    start_time = time.time()
    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, features_categorical=features_categorical, features_numerical=features_numerical, 
                                                    min_support_level=0, complete_tree= False)
    # Capture the end time
    end_time = time.time()
    # Calculate the duration
    duration = end_time - start_time
    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")

  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete
Oblivious Depth: 2, Time Taken: 190.17 seconds
Accuracies: [0.86363636 0.71428571 0.71428571 0.76190476 0.80952381]
Mean Score: 0.7727272727272727



  if str(j) in features_numerical:


KeyboardInterrupt: 

In [4]:
file_path = 'Datasets/hiv+1+protease+cleavage/746Data.txt'
label_index = 1
categorical_feature_index = 0  # The features are a string at the third element
numerical_indices = None
data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)
# Accessing the processed data
print("Features:", data_loader.features)
print("Categorical Features:", data_loader.features_categorical)
print("Numerical Features:", data_loader.features_numerical)
print("Labels:", data_loader.labels)
print("True Labels for Points:", data_loader.true_labels_for_points)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)

features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

dataset = dataset[3::4]
true_labels_for_points = true_labels_for_points[3::4]

Features: ['0' '1' '2' '3' '4' '5' '6' '7']
Categorical Features: ['0' '1' '2' '3' '4' '5' '6' '7']
Numerical Features: []
Labels: [0 1]
True Labels for Points: [0 0 0 0 0 0 0 1 0 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 1 1 0 1 0
 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1
 1 0 0 0 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1
 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 1
 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1
 1 1 1 1 1 1 1

In [5]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth and minimum support level
for depth in depths:
    # Capture the start time
    start_time = time.time()
    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, features_categorical=features_categorical, features_numerical=features_numerical, 
                                                    min_support_level=0, complete_tree= False)
    # Capture the end time
    end_time = time.time()
    # Calculate the duration
    duration = end_time - start_time
    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")

Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete
Oblivious Depth: 2, Time Taken: 12.72 seconds
Accuracies: [0.76315789 0.81081081 0.89189189 0.7027027  0.59459459]
Mean Score: 0.7526315789473685



  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete
Oblivious Depth: 3, Time Taken: 8.86 seconds
Accuracies: [0.63157895 0.67567568 0.75675676 0.78378378 0.75675676]
Mean Score: 0.7209103840682788



  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete
Oblivious Depth: 4, Time Taken: 5.66 seconds
Accuracies: [0.63157895 0.72972973 0.7027027  0.54054054 0.59459459]
Mean Score: 0.6398293029871978



In [6]:
file_path = 'Datasets/soybean+large/soybean-large.data'
label_index = 0
categorical_feature_index = None  # The features are a string at the third element
numerical_indices = None

data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)

# Accessing the processed data
print("Features:", data_loader.features, data_loader.features.shape)
print("Categorical Features:", data_loader.features_categorical, data_loader.features_categorical.shape)
print("Numerical Features:", data_loader.features_numerical, data_loader.features_numerical.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)

features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15'
 '16' '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29'
 '30' '31' '32' '33' '34'] (35,)
Categorical Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15'
 '16' '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29'
 '30' '31' '32' '33' '34'] (35,)
Numerical Features: [] (0,)
Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14] (15,)
True Labels for Points: [ 0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  1  1  1  2  2  2  2
  2  2  2  2  2  2  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  4  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  5  5  5  5  5  5
  5  5  5  5  6  6  6  6  6  6  6  6  6  6  7  7  7  7  7  7  7  7  7  7
  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7
  7  7  7  7  7  7  8  8  8  8  8  8  8  8  8  8  9  9  9  9  9  9  9  9
  9  9 10 10 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11 11 

In [7]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth and minimum support level
for depth in depths:
    # Capture the start time
    start_time = time.time()
    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, features_categorical=features_categorical, features_numerical=features_numerical, 
                                                    min_support_level=0, complete_tree= False)
    # Capture the end time
    end_time = time.time()
    # Calculate the duration
    duration = end_time - start_time
    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")

  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete
Oblivious Depth: 2, Time Taken: 68.50 seconds
Accuracies: [0.37037037 0.47169811 0.26415094 0.56603774 0.39622642]
Mean Score: 0.413696715583508



  if str(j) in features_numerical:


KeyboardInterrupt: 

In [8]:
file_path = 'Datasets/hiv+1+protease+cleavage/746Data.txt'
label_index = 1
categorical_feature_index = 0  # The features are a string at the third element
numerical_indices = None
data_loader = TreeDataLoaderWithCategorical(
    file_path= file_path,
    label_index= label_index,
    numerical_indices= numerical_indices,
    categorical_feature_index=categorical_feature_index
)
# Accessing the processed data
print("Features:", data_loader.features)
print("Categorical Features:", data_loader.features_categorical)
print("Numerical Features:", data_loader.features_numerical)
print("Labels:", data_loader.labels)
print("True Labels for Points:", data_loader.true_labels_for_points)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)

features = data_loader.features
features_categorical = data_loader.features_categorical
features_numerical = data_loader.features_numerical
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset


Features: ['0' '1' '2' '3' '4' '5' '6' '7']
Categorical Features: ['0' '1' '2' '3' '4' '5' '6' '7']
Numerical Features: []
Labels: [0 1]
True Labels for Points: [0 0 0 0 0 0 0 1 0 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 1 1 0 1 0
 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1
 1 0 0 0 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1
 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 1
 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1
 1 1 1 1 1 1 1

In [9]:
import time
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depths = [2,3,4]
# depths = [3,4]
# Iterate through each combination of depth and minimum support level
for depth in depths:
    # Capture the start time
    start_time = time.time()
    # Execute the k-fold tester
    accuracies, mean_score = k_fold_tester_loandra(loandra_path=loandra_path,
                                                    k=k, depth=depth, dataset=dataset, 
                                                    true_labels_for_points=true_labels_for_points, 
                                                    labels=labels, features=features, features_categorical=features_categorical, features_numerical=features_numerical, 
                                                    min_support_level=0, complete_tree= False)
    # Capture the end time
    end_time = time.time()
    # Calculate the duration
    duration = end_time - start_time
    # Print or log the results including the time taken
    print(f"Oblivious Depth: {depth}, Time Taken: {duration:.2f} seconds")
    print(f"Accuracies: {accuracies}")
    print(f"Mean Score: {mean_score}\n")

Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete


  if str(j) in features_numerical:


Iteration complete
Oblivious Depth: 2, Time Taken: 638.60 seconds
Accuracies: [0.84       0.77852349 0.85234899 0.86577181 0.82550336]
Mean Score: 0.8324295302013424



  if str(j) in features_numerical:


KeyboardInterrupt: 

Max Accuracy Categorical

In [None]:
max_accuracy_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical,
                                           classification_objective= 'max_accuracy',
                                           fixed_depth=2,
                                           # min_support= 2,
                                           # tree_structure = 'Oblivious'
                                           )

loandra_path = '/Users/harisrasul/Desktop/loandra'
max_accuracy_categorical_problem.solve_loandra(loandra_path)
print("Final Model: ", max_accuracy_categorical_problem.model)
print("Min cost found: ", max_accuracy_categorical_problem.min_cost)

Min Height Categorical

In [None]:
min_height_categorical_problem = SATreeCraft(dataset=dataset,
                                           features=features,
                                           labels=labels,
                                           true_labels_for_points=true_labels_for_points,
                                           features_categorical=features_categorical,
                                           features_numerical=features_numerical)
min_height_categorical_problem.solve()
min_height_categorical_problem.export_cnf()
print("Final Model: ", min_height_categorical_problem.model)
print("min depth found: ", min_height_categorical_problem.min_depth)

K-fold Categoircal Test

In [None]:
loandra_path = '/Users/harisrasul/Desktop/loandra'
k = 5
depth = 2 
minimum_support = 0
accuracies, mean_score = k_fold_tester(k, depth, dataset, true_labels_for_points, labels, features, features_categorical, features_numerical, 
                                       # complete_tree= False
                                       )
                                       
print(accuracies)
print(mean_score)