# DataLoader

In [1]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder

from utils import * # Dataloader and K-fold mechanism 
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

In [2]:
from utils import *

file_path_to_test = 'Datasets/wine/wine.data'
delimiter = ','
label_position = 0 

data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)



print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

Features: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12'] (13,)
Labels: [0 1 2] (3,)
True Labels for Points: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] (178,)
Dataset:
 [[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]] (178, 13)


# Classification

In [3]:
min_height_numerical_problem = SATreeCraft(dataset=dataset,features=features,labels=labels,true_labels_for_points=true_labels_for_points)
min_height_numerical_problem.solve()
#min_height_numerical_problem.export_cnf_min_height("dimacs/testPerfectHeight.cnf")
print("Final Model: ", min_height_numerical_problem.model)
print("min depth found: ", min_height_numerical_problem.min_depth)

no solution at depth 1
no solution at depth 2
Final Model:  [{'type': 'branching', 'children': [1, 2], 'feature': '10', 'threshold': 0.915}, {'type': 'branching', 'children': [3, 4], 'feature': '6', 'threshold': 1.585}, {'type': 'branching', 'children': [5, 6], 'feature': '12', 'threshold': 716.0}, {'type': 'branching', 'children': [7, 8], 'feature': '9', 'threshold': 3.8449999999999998}, {'type': 'branching', 'children': [9, 10], 'feature': '12', 'threshold': 627.5}, {'type': 'branching', 'children': [11, 12], 'feature': '6', 'threshold': 0.565}, {'type': 'branching', 'children': [13, 14], 'feature': '9', 'threshold': 3.46}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 0}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 0}]
min depth found:  3


In [5]:
max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classification_objective='max_accuracy',
                                             fixed_depth=2,
                                             # min_support= 5
                                             # min_margin  = 10
                                             tree_structure= 'Oblivious'
                                             )
max_accuracy_numerical_problem.solve()
max_accuracy_numerical_problem.export_cnf()
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

Final Model:  [{'type': 'branching', 'children': [1, 2], 'feature': '0', 'threshold': 12.78}, {'type': 'branching', 'children': [3, 4], 'feature': '6', 'threshold': 0.94}, {'type': 'branching', 'children': [5, 6], 'feature': '6', 'threshold': 1.5750000000000002}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 1}, {'type': 'leaf', 'label': 2}, {'type': 'leaf', 'label': 0}]
Min cost found:  11


# LOANDRA

In [None]:
loandra_path = '/Users/harisrasul/Desktop/loandra'
max_accuracy_numerical_problem.solve_loandra(loandra_path)
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

# SKLEARN Integration

In [None]:
k = 5
depth = 2 
minimum_support = 0
accuracies, mean_score = k_fold_tester(k, depth, dataset, true_labels_for_points, labels, features, 
                                       # complete_tree= False
                                       )
                                       
print(accuracies)
print(mean_score)

# Clustering

In [None]:
features = np.array(['0', '1'])
dataset = np.array([[1, 1], [1, 2], [7, 7], [7, 8], [15,5],[15,6]])  # Dataset X
epsilon = 1 
k_clusters = 3
depth = 2

# # ML_pairs = np.array([[4,5],[0,1],[2,3]])
# CL_pairs = np.array([])

ML_pairs = np.array([])
CL_pairs = np.array([[2,3],[3,1]])


clustering_problem = max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                                                  features= features,
                                                                  k_clusters= k_clusters,
                                                                  #ML_pairs= ML_pairs,
                                                                  CL_pairs= CL_pairs,
                                                                  epsilon = epsilon,
                                                                  fixed_depth= depth
                                             )

loandra_path = '/Users/harisrasul/Desktop/loandra'
clustering_problem.solve_loandra(loandra_path)
print(clustering_problem.cluster_assignments)
print(clustering_problem.cluster_diameters)

In [None]:
features = np.array(['0'])
dataset = np.array([[0], [2], [3], [9], [11], [19]])  # Dataset X
epsilon = 0
k_clusters = 2
depth = 3

ML_pairs = np.array([[0,3]])
CL_pairs = np.array([])
clustering_obj = 'max_diamete'


clustering_problem = SATreeCraft(dataset=dataset,
                                features= features,
                                k_clusters= k_clusters,
                                ML_pairs= ML_pairs,
                                CL_pairs= CL_pairs,
                                epsilon = epsilon,
                                fixed_depth= depth,
                                clustering_objective=clustering_obj,
                                smart_pairs= True
                                )
clustering_problem.solve()
#loandra_path = '/Users/harisrasul/Desktop/loandra'
#clustering_problem.solve_loandra(loandra_path)
print(clustering_problem.cluster_assignments)
print(clustering_problem.cluster_diameters)


In [None]:
clustering_problem.min_cost

In [None]:
clustering_problem.sat_solution