# DataLoader

In [1]:
import numpy as np
import pandas as pd
import openpyxl
from sklearn.preprocessing import LabelEncoder

from utils import * # Dataloader and K-fold mechanism 
from SATreeCraft import * # Tree solver framework
from SATreeClassifier import * 

In [None]:
from utils import *

file_path_to_test = 'Datasets/wine/wine.data'
delimiter = ','
label_position = 0 

data_loader = TreeDataLoaderBinaryNumerical(file_path=file_path_to_test, delimiter=delimiter, label_position= label_position)



print("Features:", data_loader.features, data_loader.features.shape)
print("Labels:", data_loader.labels, data_loader.labels.shape)
print("True Labels for Points:", data_loader.true_labels_for_points, data_loader.true_labels_for_points.shape)
print("Dataset:\n", data_loader.dataset,data_loader.dataset.shape)


features = data_loader.features
labels = data_loader.labels
true_labels_for_points = data_loader.true_labels_for_points
dataset = data_loader.dataset

# Classification

In [None]:
min_height_numerical_problem = SATreeCraft(dataset=dataset,features=features,labels=labels,true_labels_for_points=true_labels_for_points)
min_height_numerical_problem.solve()
#min_height_numerical_problem.export_cnf_min_height("dimacs/testPerfectHeight.cnf")
print("Final Model: ", min_height_numerical_problem.model)
print("min depth found: ", min_height_numerical_problem.min_depth)

In [None]:
max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                             features=features,labels=labels,
                                             true_labels_for_points=true_labels_for_points,
                                             classification_objective='max_accuracy',
                                             fixed_depth=2,
                                             # min_support= 5
                                             # min_margin  = 10
                                             tree_structure= 'Oblivious'
                                             )
max_accuracy_numerical_problem.solve()
max_accuracy_numerical_problem.export_cnf()
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

# LOANDRA

In [None]:
loandra_path = '/Users/harisrasul/Desktop/loandra'
max_accuracy_numerical_problem.solve_loandra(loandra_path)
print("Final Model: ", max_accuracy_numerical_problem.model)
print("Min cost found: ", max_accuracy_numerical_problem.min_cost)

# SKLEARN Integration

In [None]:
k = 5
depth = 2 
minimum_support = 0
accuracies, mean_score = k_fold_tester(k, depth, dataset, true_labels_for_points, labels, features, 
                                       # complete_tree= False
                                       )
                                       
print(accuracies)
print(mean_score)

# Clustering

In [None]:
features = np.array(['0', '1'])
dataset = np.array([[1, 1], [1, 2], [7, 7], [7, 8], [15,5],[15,6]])  # Dataset X
epsilon = 1 
k_clusters = 3
depth = 2

# # ML_pairs = np.array([[4,5],[0,1],[2,3]])
# CL_pairs = np.array([])

ML_pairs = np.array([])
CL_pairs = np.array([[2,3],[3,1]])


clustering_problem = max_accuracy_numerical_problem = SATreeCraft(dataset=dataset,
                                                                  features= features,
                                                                  k_clusters= k_clusters,
                                                                  #ML_pairs= ML_pairs,
                                                                  CL_pairs= CL_pairs,
                                                                  epsilon = epsilon,
                                                                  fixed_depth= depth
                                             )

loandra_path = '/Users/harisrasul/Desktop/loandra'
clustering_problem.solve_loandra(loandra_path)
print(clustering_problem.cluster_assignments)
print(clustering_problem.cluster_diameters)

In [18]:
features = np.array(['0', '1'])
dataset = np.array([[1, 1], [1, 2], [7, 7], [7, 8], [15,5],[15,6]])  # Dataset X
epsilon = 1
k_clusters = 2
depth = 3

# # ML_pairs = np.array([[4,5],[0,1],[2,3]])
# CL_pairs = np.array([])
clustering_obj = 'max_diameter'

ML_pairs = np.array([])
CL_pairs = np.array([[2,3]])

clustering_problem = SATreeCraft(dataset=dataset,
                                features= features,
                                k_clusters= k_clusters,
                                ML_pairs= ML_pairs,
                                CL_pairs= CL_pairs,
                                epsilon = epsilon,
                                fixed_depth= depth,
                                clustering_objective=clustering_obj
                                )
#clustering_problem.solve()
loandra_path = '/Users/harisrasul/Desktop/loandra'
clustering_problem.solve_loandra(loandra_path)
print(clustering_problem.cluster_assignments)
print(clustering_problem.cluster_diameters)

{0: [3, 4, 5], 1: [0, 1, 2]}
{0: 8.54400374531753, 1: 8.48528137423857}
