In [2]:
import torch
import pandas
import numpy as np

In [3]:
import pandas
#dataset taken from https://www.kaggle.com/yashsawarn/wifi-stretgth-for-rooms
    
    
def read_dataset(csv_name = 'sao-paulo-properties-april-2019.csv'):
    """
    Reads a csv dataset 
    returns it as a pytorch tensor
    """
    data_frame = pandas.read_csv(csv_name)
        
    #do data preprocessing and return torch matrix with targets in the last column
    """
    Creates a new column with categories using the "price" column
      1) 900000 < y , categoría 4
      2) 580000 < y < 900000, categoría 3
      3) 400000 < y < 580000, categoría 2
      4) y < 400000, categoría 1    
    """
    data_frame.loc[ data_frame['Price'] <  400000, 'Class'] = 1
    data_frame.loc[(data_frame['Price'] >= 400000) & (data_frame['Price'] < 580000), 'Class'] = 2
    data_frame.loc[(data_frame['Price'] >= 580000) & (data_frame['Price'] < 900000), 'Class'] = 3
    data_frame.loc[ data_frame['Price'] >= 900000, 'Class'] = 4
    data_frame['Class'] = data_frame['Class'].astype('int')
    
    #Remove all unnecesary attributes and leave only the ones that we need
    data_frame = data_frame.loc[:, ['Rooms', 'Size', 'Toilets', 'Parking', 'Class']]
    
    return data_frame

In [4]:
class Node_CART:    
    def __init__(self, num_classes = 4, ref_CART = None, current_depth = 0):
        """
        Create the node attributes
        param num_classes: K number of classes to classify
        param ref_cart: reference to the tree containing the node
        param current_depth: current depth of the node in the tree
        """
        self.ref_CART = ref_CART
        self.threshold_value = 0
        self.feature_num = 0
        self.node_right = None
        self.node_left = None
        self.data_torch_partition = None
        self.gini = 0
        self.dominant_class = None
        self.accuracy_dominant_class = None        
        self.num_classes = num_classes
        self.current_depth = current_depth
    
    def to_xml(self, current_str = ""):
        """
        Recursive function to write the node content to an xml formatted string
        param current_str : the xml content so far in the whole tree
        return the string with the node content
        """
        str_node = "<node><thresh>" + str(self.threshold_value) + "</thresh>" + "<feature>" + str(self.feature_num) + "</feature><depth>" + str(self.current_depth)+ "</depth>" 
        str_node += "<gini>" + str(self.gini) + "</gini>"
        if(self.node_right != None):
            str_left = self.node_right.to_xml(current_str)
            str_node += str_left
        if(self.node_left != None):
            str_right = self.node_left.to_xml(current_str)
            str_node += str_right
            
        if(self.is_leaf()):
            str_node += "<dominant_class>" + str(self.dominant_class) + "</dominant_class><acc_dominant_class>"  + str(self.accuracy_dominant_class) + "</acc_dominant_class>"
        str_node += "</node>"
        return str_node
    
    def is_leaf(self):
        """
        Checks whether the node is a leaf
        """
        return (self.node_left == None and self.node_right == None)
    
    def create_with_children(self, data_torch, current_depth, list_selected_features = [], min_gini = 0.000001):
        """
        Creates a node by selecting the best feature and threshold, and if needed, creating its children
        param data_torch: dataset with the current partition to deal with in the node
        param current_depth: depth counter for the node
        param list_selected_features: list of selected features so far for the CART building process
        param min_gini: hyperparmeter selected by the user defining the minimum tolerated gini coefficient for a  node
        return the list of selected features so far
        """        
        #update depth of children
        depth_children = current_depth + 1
        if(depth_children <= self.ref_CART.get_max_depth()):
            num_observations = data_torch.shape[0]            
            #careful with max depth
            #if no threshold and feature were selected, select it using a greedy approach            
            (threshold_value, feature_num, gini) = self.select_best_feature_and_thresh(data_torch, list_features_selected = list_selected_features)
            list_selected_features += [feature_num]
            #store important data in attributes
            self.threshold_value = threshold_value
            self.feature_num = feature_num
            self.data_torch_partition = data_torch
            self.gini = gini            
            num_features = data_torch.shape[1]
            #data_torch_left = torch.zeros(1, num_features)
            #data_torch_right = torch.zeros(1, num_features)
            #create the right and left node data if the current gini is still high            
            if(self.gini > min_gini):
                print(feature_num)
                data_torch_left = data_torch[data_torch[feature_num] < threshold_value]
                data_torch_right = data_torch[data_torch[feature_num] >= threshold_value]
                #if the new partitions have more than min_observations, make them
                if(data_torch_left.shape[0] >= self.ref_CART.get_min_observations() and data_torch_right.shape[0] >= self.ref_CART.get_min_observations()):
                    #add data to the right and left children
                    self.node_right = Node_CART(num_classes = self.num_classes, ref_CART = self.ref_CART, current_depth = depth_children)
                    self.node_left = Node_CART(num_classes = self.num_classes, ref_CART = self.ref_CART, current_depth = depth_children)
                    list_selected_features = self.node_right.create_with_children(data_torch_right, depth_children, list_selected_features = list_selected_features)            
                    self.node_left.create_with_children( data_torch_left, depth_children, list_selected_features = list_selected_features)
        #if is leaf, fill the         
        if(self.is_leaf()):
            print("Is leaf /n", data_torch)
            labels_data = data_torch[:, -1]
            self.dominant_class = torch.mode(labels_data).values.item()
            num_obs_label = labels_data[labels_data == self.dominant_class].shape[0]
            self.accuracy_dominant_class = num_obs_label / labels_data.shape[0]           
            
        return list_selected_features
    
    
    def select_best_feature_and_thresh(self, data_torch, list_features_selected = [], num_classes = 4):
        """
        ONLY USE  2 FORS
        Selects the best feature and threshold that minimizes the gini coefficient
        param data_torch: dataset partition to analyze
        param list_features_selected list of features selected so far, thus must be ignored 
        param num_classes: number of K classes to discriminate from 
        return min_thresh, min_feature, min_gini found for the dataset partition when 
        selecting the found feature and threshold
        """       
        
        #TODO
        #Tenemos que reccorer todos los features y para cada feature todos los values posibles y sacar los ginis
        #return selected cut
        min_thresh = 0
        min_feature = ''
        min_gini = 1
    
        for feature_name in data_torch.iloc[: , :4]:
            current_feature_index = data_torch.columns.get_loc(feature_name)
            for tresh in data_torch[feature_name].unique():
                print(feature_name, current_feature_index, tresh)
                data_torch_left = data_torch[data_torch[feature_name] < tresh]
                data_torch_right = data_torch[data_torch[feature_name] >= tresh]
                #print(data_torch_left, data_torch_right)
                
                gini_left = self.calculate_gini(data_torch_left, feature_name)
                gini_right = self.calculate_gini(data_torch_right, feature_name)
                gini_ponderado = ((data_torch_left.shape[0] / data_torch.shape[0]) * gini_left) + ((data_torch_right.shape[0] / data_torch.shape[0]) * gini_right)
                
                if gini_ponderado < min_gini:
                    min_gini = gini_ponderado
                    min_feature = feature_name
                    min_thresh = tresh
                
        print('Min tresh :',  min_thresh,'Min gini :', min_gini,'Min feature :', min_feature)
        return (min_thresh, min_feature, min_gini)   
        
    
    def calculate_gini(self, data_partition_torch, col = '', num_classes = 4):
        """
        Calculates the gini coefficient for a given partition with the given number of classes
        param data_partition_torch: current dataset partition as a tensor
        param num_classes: K number of classes to discriminate from
        returns the calculated gini coefficient
        """        
        classQty = data_partition_torch.groupby([data_partition_torch[[col]].columns.item()]).size().array
        print(classQty)
        totalQty = data_partition_torch.shape[0]
        gini = 1 - sum( (classQty / totalQty) **2)
        
        return gini
   
    
    def evaluate_node(self, input_torch): 
        """
        Evaluates an input observation within the node. 
        If is not a leaf node, send it to the corresponding node
        return predicted label
        """
        feature_val_input = input_torch[self.feature_num]
        if(self.is_leaf()):
            return self.dominant_class
        else:
            if(feature_val_input < self.threshold_value):
                return self.node_left.evaluate_node(input_torch)
            else:
                return self.node_right.evaluate_node(input_torch)
        

class CART:
    def __init__(self, dataset_torch, max_CART_depth = 4, min_observations = 2):
        """
        CART has only one root node
        """
        #min observations per node
        self.min_observations = min_observations
        self.root = Node_CART(num_classes = 4, ref_CART = self, current_depth = 0)
        self.max_CART_depth = max_CART_depth
        self.list_selected_features = []
        
    def get_root(self):
        """
        Gets tree root
        """
        return self.root
    
    def get_min_observations(self):
        """
        return min observations per node
        """
        return self.min_observations
    
    def get_max_depth(self):
        """
        Gets the selected max depth of the tree
        """
        return self.max_CART_depth
    
    def build_CART(self, data_torch):
        """
        Build CART from root
        """
        self.list_selected_features = self.root.create_with_children(data_torch, current_depth = 0)
    
    def to_xml(self, xml_file_name):
        """
        write Xml file with tree content
        """
        str_nodes = self.root.to_xml()
        file = open(xml_file_name,"w+") 
        file.write(str_nodes)
        file.close()
        return str_nodes
    
    
    def evaluate_input(self, input_torch):
        """
        Evaluate a specific input in the tree and get the predicted class
        """
        return self.root.evaluate_node(input_torch)
        
    
def train_CART(dataset_torch, name_xml = "", max_CART_depth = 3, min_obs_per_leaf = 2): 
    """
    Train CART model
    """
    tree = CART(dataset_torch = dataset_torch, max_CART_depth = max_CART_depth, min_observations =  min_obs_per_leaf)
    tree.build_CART(dataset_torch)
    if(not name_xml == ""):
        tree.to_xml(name_xml)
    return tree

def test_CART(tree, testset_torch):
    """
    Test a previously built CART
    """
    #TODO, use tree.evaluate_input(current_observation) for this
    return accuracy

        

#tree = train_CART(dataset_torch, name_xml = "CART_example.xml")
#acc = test_CART(tree, dataset_torch)
'''
CART_1 =  CART(dataset_torch)
CART_1.to_xml("arbolito_vacio.xml")
nodo_A = Node_CART(num_classes = 2, current_depth = 1)
CART_1.root.node_left = nodo_A
CART_1.to_xml("arbolito_peque.xml")
'''


dataset_torch = read_dataset()
print(dataset_torch)

CART_DATA = CART(dataset_torch)
CART_DATA.build_CART(dataset_torch)
CART_DATA.to_xml("xmlData.xml")
    
        

      Rooms  Size  Toilets  Parking  Class
0         4   343        7        5      4
1         4   343        6        5      4
2         4   420        6        4      4
3         4   278        7        4      4
4         4   278        5        5      4
...     ...   ...      ...      ...    ...
4889      2    47        1        1      1
4890      2    57        1        1      1
4891      2    56        1        1      1
4892      2    48        1        1      1
4893      2    50        1        1      1

[4894 rows x 5 columns]
Rooms 0 4
<PandasArray>
[436, 2199, 1934]
Length: 3, dtype: int64
<PandasArray>
[317, 7, 1]
Length: 3, dtype: int64
Rooms 0 3
<PandasArray>
[436, 2199]
Length: 2, dtype: int64
<PandasArray>
[1934, 317, 7, 1]
Length: 4, dtype: int64
Rooms 0 6
<PandasArray>
[436, 2199, 1934, 317, 7]
Length: 5, dtype: int64
<PandasArray>
[1]
Length: 1, dtype: int64
Rooms 0 2
<PandasArray>
[436]
Length: 1, dtype: int64
<PandasArray>
[2199, 1934, 317, 7, 1]
Length: 5, dtype: i

<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  3,  2,  4,  4,  4,  2,  3,  2,  2,  5]
Length: 201, dtype: int64
<PandasArray>
[ 1,  1,  1,  1,  1,  2,  5,  1,  3,  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,
  1,  3,  4,  2,  1,  5,  1, 10,  1,  1,  1,  2,  2,  1,  1,  6,  1,  1,  3,
  1,  4,  1,  3,  1,  1,  1,  1,  1,  1,  1,  3,  3,  2,  1,  1,  1,  3,  1,
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 67, dtype: int64
Size 1 226
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  1,  1,  2,  1, 12,  5,  1,  2,  1,  4]
Length: 188, dtype: int64
<PandasArray>
[ 4,  3,  1,  3,  2,  4,  4,  4,  2,  3,  2,  2,  5,  1,  1,  1,  1,  1,  2,
  5,  1,  3,  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,  1,  3,  4,  2,  1,  5,
  1, 10,  1,  1,  1,  2,  2,  1,  1,  6,  1,  1,  3,  1,  4,  1,  3,  1,  1,
  1,  1,  1,  1,  1,  3,  3,  2,  1,  1,  1,  3,  1,  1,  1,  2,  1,  2,  1,
  2,  1,  1,  1]
Length: 80, dtype: int64
Size 1 201
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21

<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  3,  4,  2,  1,  5,  1, 10,  1,  1,  1]
Length: 231, dtype: int64
<PandasArray>
[2, 2, 1, 1, 6, 1, 1, 3, 1, 4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 2, 1, 1, 1, 3,
 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1]
Length: 37, dtype: int64
Size 1 221
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  3,  2,  9,  2,  4,  1,  1,  2,  1, 12]
Length: 183, dtype: int64
<PandasArray>
[ 5,  1,  2,  1,  4,  4,  3,  1,  3,  2,  4,  4,  4,  2,  3,  2,  2,  5,  1,
  1,  1,  1,  1,  2,  5,  1,  3,  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,  1,
  3,  4,  2,  1,  5,  1, 10,  1,  1,  1,  2,  2,  1,  1,  6,  1,  1,  3,  1,
  4,  1,  3,  1,  1,  1,  1,  1,  1,  1,  3,  3,  2,  1,  1,  1,  3,  1,  1,
  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 85, dtype: int64
Size 1 214
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  3,  1,  1,  3,  4,  3,  2,  9,  2,  4]
Length: 178, dtype: int64
<PandasArray>
[ 1,  1,  2,  1, 12,  5,  1,  2,  1,  4,  4,  3

<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
 26,  1,  9,  7,  5, 14, 10,  5,  7,  4]
Length: 120, dtype: int64
<PandasArray>
[14,  5, 10,  2,  5,  8, 13,  2,  7,  3,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 148, dtype: int64
Size 1 223
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  9,  2,  4,  1,  1,  2,  1, 12,  5,  1]
Length: 185, dtype: int64
<PandasArray>
[ 2,  1,  4,  4,  3,  1,  3,  2,  4,  4,  4,  2,  3,  2,  2,  5,  1,  1,  1,
  1,  1,  2,  5,  1,  3,  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,  1,  3,  4,
  2,  1,  5,  1, 10,  1,  1,  1,  2,  2,  1,  1,  6,  1,  1,  3,  1,  4,  1,
  3,  1,  1,  1,  1,  1,  1,  1,  3,  3,  2,  1,  1,  1,  3,  1,  1,  1,  2,
  1,  2,  1,  2,  1,  1,  1]
Length: 83, dtype: int64
Size 1 191
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
 10,  8,  2,  4,  7,  7,  6,  1,  6,  7]
Length: 160, dtype: int64
<PandasArray>
[ 2,  3,  5,  1,  3,  2,  3, 14,  3,  1,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  

<PandasArray>
[ 7,  4, 14,  5, 10,  2,  5,  8, 13,  2,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 150, dtype: int64
Size 1 157
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  5,  7,  4, 14,  5, 10,  2,  5,  8, 13]
Length: 127, dtype: int64
<PandasArray>
[ 2,  7,  3, 23,  2,  5,  7,  5,  8,  6,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 141, dtype: int64
Size 1 91
<PandasArray>
[ 12,  10,   4,   8,  15,  23,  19,  12,  21,  21,  30,  15,  25,  13,  20,
  83,  29,  62, 100,  73, 181,  70,  67,  63,  72, 116,  89,  61,  95,  82,
 143,  56,  96, 104, 112, 145,  63,  90,  88,  41, 167,  31,  55,  37,  68,
  68,  46,  31,  60,  22,  62,  27,  39,  48,  22,  40,  24,  23,  33,  17,
  51]
Length: 61, dtype: int64
<PandasArray>
[22, 34, 26, 23, 44, 36, 25, 36,  8, 55,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 207, dtype: int64
Size 1 125
<PandasArray>
[ 12,  10,   4,   8,  15,  23,  19,  12,  21,  21,  30,  15,  25,  13,  20,
  83,  29,  62, 100, 

<PandasArray>
[ 12,  10,   4,   8,  15,  23,  19,  12,  21,  21,  30,  15,  25,  13,  20,
  83,  29,  62, 100,  73, 181,  70,  67,  63,  72, 116,  89,  61,  95,  82,
 143,  56,  96, 104, 112, 145,  63,  90,  88,  41, 167,  31,  55,  37,  68,
  68,  46,  31,  60,  22,  62,  27,  39,  48,  22,  40,  24,  23,  33,  17,
  51,  22,  34,  26]
Length: 64, dtype: int64
<PandasArray>
[23, 44, 36, 25, 36,  8, 55, 11,  9, 25,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 204, dtype: int64
Size 1 126
<PandasArray>
[ 12,  10,   4,   8,  15,  23,  19,  12,  21,  21,  30,  15,  25,  13,  20,
  83,  29,  62, 100,  73, 181,  70,  67,  63,  72, 116,  89,  61,  95,  82,
 143,  56,  96, 104, 112, 145,  63,  90,  88,  41, 167,  31,  55,  37,  68,
  68,  46,  31,  60,  22,  62,  27,  39,  48,  22,  40,  24,  23,  33,  17,
  51,  22,  34,  26,  23,  44,  36,  25,  36,   8,  55,  11,   9,  25,  19,
  26,  17,  19,  17,  11,  37,   7,  14,  13,   6,  19,   9,  12,  19,  10,
  30,   8,  16,   6,  17,  3

<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  2,  1, 12,  5,  1,  2,  1,  4,  4,  3]
Length: 190, dtype: int64
<PandasArray>
[ 1,  3,  2,  4,  4,  4,  2,  3,  2,  2,  5,  1,  1,  1,  1,  1,  2,  5,  1,
  3,  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,  1,  3,  4,  2,  1,  5,  1, 10,
  1,  1,  1,  2,  2,  1,  1,  6,  1,  1,  3,  1,  4,  1,  3,  1,  1,  1,  1,
  1,  1,  1,  3,  3,  2,  1,  1,  1,  3,  1,  1,  1,  2,  1,  2,  1,  2,  1,
  1,  1]
Length: 78, dtype: int64
Size 1 97
<PandasArray>
[ 12,  10,   4,   8,  15,  23,  19,  12,  21,  21,  30,  15,  25,  13,  20,
  83,  29,  62, 100,  73, 181,  70,  67,  63,  72, 116,  89,  61,  95,  82,
 143,  56,  96, 104, 112, 145,  63,  90,  88,  41, 167,  31,  55,  37,  68,
  68,  46,  31,  60,  22,  62,  27,  39,  48,  22,  40,  24,  23,  33,  17,
  51,  22,  34,  26,  23,  44,  36]
Length: 67, dtype: int64
<PandasArray>
[25, 36,  8, 55, 11,  9, 25, 19, 26, 17,
 ...
  1,  1,  2,  1,  2,  1,  2,  1,  1,  1]
Length: 201, dtype: int64
S

<PandasArray>
[ 95,  82, 143,  56,  96, 104, 112, 145,  63,  90,
 ...
   1,   1,   2,   1,   2,   1,   2,   1,   1,   1]
Length: 240, dtype: int64
Size 1 248
<PandasArray>
[12, 10,  4,  8, 15, 23, 19, 12, 21, 21,
 ...
  2,  3,  2,  2,  5,  1,  1,  1,  1,  1]
Length: 206, dtype: int64
<PandasArray>
[ 2,  5,  1,  3,  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,  1,  3,  4,  2,  1,
  5,  1, 10,  1,  1,  1,  2,  2,  1,  1,  6,  1,  1,  3,  1,  4,  1,  3,  1,
  1,  1,  1,  1,  1,  1,  3,  3,  2,  1,  1,  1,  3,  1,  1,  1,  2,  1,  2,
  1,  2,  1,  1,  1]
Length: 62, dtype: int64
Size 1 50
<PandasArray>
[12, 10, 4, 8, 15, 23, 19, 12, 21, 21, 30, 15, 25, 13, 20, 83, 29, 62, 100,
 73]
Length: 20, dtype: int64
<PandasArray>
[181,  70,  67,  63,  72, 116,  89,  61,  95,  82,
 ...
   1,   1,   2,   1,   2,   1,   2,   1,   1,   1]
Length: 248, dtype: int64
Size 1 61
<PandasArray>
[ 12,  10,   4,   8,  15,  23,  19,  12,  21,  21,  30,  15,  25,  13,  20,
  83,  29,  62, 100,  73, 181,  70,  67,  63,  

Length: 40, dtype: int64
Size 1 377
<PandasArray>
[1, 1, 2, 1, 3, 1, 9, 3, 2, 7,
 ...
 1, 1, 1, 1, 3, 1, 2, 1, 1, 1]
Length: 236, dtype: int64
<PandasArray>
[3, 1, 1, 1, 1, 1, 2, 1, 1, 1]
Length: 10, dtype: int64
Size 1 275
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
  1,  1,  2,  2, 16,  1,  1,  2,  1,  3]
Length: 204, dtype: int64
<PandasArray>
[ 4,  2,  1,  4,  1, 10,  1,  1,  1,  1,  1,  1,  1,  5,  1,  1,  3,  1,  2,
  1,  3,  1,  1,  1,  1,  1,  3,  1,  2,  1,  1,  1,  3,  1,  1,  1,  1,  1,
  2,  1,  1,  1]
Length: 42, dtype: int64
Size 1 415
<PandasArray>
[1, 1, 2, 1, 3, 1, 9, 3, 2, 7,
 ...
 1, 2, 1, 1, 1, 3, 1, 1, 1, 1]
Length: 241, dtype: int64
<PandasArray>
[1, 2, 1, 1, 1]
Length: 5, dtype: int64
Size 1 387
<PandasArray>
[1, 1, 2, 1, 3, 1, 9, 3, 2, 7,
 ...
 1, 1, 3, 1, 2, 1, 1, 1, 3, 1]
Length: 238, dtype: int64
<PandasArray>
[1, 1, 1, 1, 2, 1, 1, 1]
Length: 8, dtype: int64
Size 1 400
<PandasArray>
[1, 1, 2, 1, 3, 1, 9, 3, 2, 7,
 ...
 3, 1, 2, 1, 1, 1, 3, 1, 

<PandasArray>
[1, 1, 2, 1, 3, 1, 9, 3, 2, 7,
 ...
 1, 4, 4, 2, 1, 3, 2, 2, 4, 1]
Length: 179, dtype: int64
<PandasArray>
[ 2,  3,  2,  1,  5,  1,  1,  1,  1,  2,  5,  1,  3,  1,  1,  1,  1,  2,  2,
 16,  1,  1,  2,  1,  3,  4,  2,  1,  4,  1, 10,  1,  1,  1,  1,  1,  1,  1,
  5,  1,  1,  3,  1,  2,  1,  3,  1,  1,  1,  1,  1,  3,  1,  2,  1,  1,  1,
  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 67, dtype: int64
Size 1 185
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
  1,  1,  2,  5,  1, 20,  9,  8,  2,  4]
Length: 139, dtype: int64
<PandasArray>
[7, 7, 5, 1, 6, 6, 1, 2, 5, 1,
 ...
 3, 1, 1, 1, 1, 1, 2, 1, 1, 1]
Length: 107, dtype: int64
Size 1 181
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
  1,  7,  4,  1,  1,  1,  2,  5,  1, 20]
Length: 135, dtype: int64
<PandasArray>
[9, 8, 2, 4, 7, 7, 5, 1, 6, 6,
 ...
 3, 1, 1, 1, 1, 1, 2, 1, 1, 1]
Length: 111, dtype: int64
Size 1 294
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
  3,  4,  2,  1,  4, 

<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,  4,  7,  5,  5,  8, 16,  6,  5, 11,
 20, 14, 19, 13, 22, 11, 47,  9, 15, 16, 25, 34, 25, 16, 24, 15, 31, 21, 22,
 30, 12, 21, 11, 11, 23, 11, 22, 16, 29, 17, 14, 32, 23, 17, 14,  6]
Length: 55, dtype: int64
<PandasArray>
[31,  5,  8, 21, 15, 16, 13, 13, 11,  9,
 ...
  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 191, dtype: int64
Size 1 160
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
  9,  5, 10,  2,  5,  7, 12,  1,  6,  3]
Length: 115, dtype: int64
<PandasArray>
[20,  2,  5,  6,  3,  7,  4,  3,  2, 12,
 ...
  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 131, dtype: int64
Size 1 208
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
  2,  5,  1,  2,  1,  2, 11,  2,  3,  4]
Length: 156, dtype: int64
<PandasArray>
[ 3,  2,  8,  2,  3,  1,  1,  2,  1, 10,  5,  1,  2,  1,  4,  4,  2,  1,  3,
  2,  2,  4,  1,  2,  3,  2,  1,  5,  1,  1,  1,  1,  2,  5,  1,  3,  1,  1,
  1,  1,  2,  2, 16,  1,  1,  2,  1,  3,

<PandasArray>
[ 9,  6,  4, 14,  5,  9, 14, 10, 17,  7,
 ...
  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 179, dtype: int64
Size 1 99
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,  4,  7,  5,  5,  8, 16,  6,  5, 11,
 20, 14, 19, 13, 22, 11, 47,  9, 15, 16, 25, 34, 25, 16, 24, 15, 31, 21, 22,
 30, 12, 21, 11, 11, 23, 11, 22, 16, 29, 17, 14, 32, 23, 17, 14]
Length: 54, dtype: int64
<PandasArray>
[ 6, 31,  5,  8, 21, 15, 16, 13, 13, 11,
 ...
  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 192, dtype: int64
Size 1 304
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,
 ...
 10,  1,  1,  1,  1,  1,  1,  1,  5,  1]
Length: 219, dtype: int64
<PandasArray>
[1, 3, 1, 2, 1, 3, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 2, 1, 1,
 1]
Length: 27, dtype: int64
Size 1 115
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,  4,  7,  5,  5,  8, 16,  6,  5, 11,
 20, 14, 19, 13, 22, 11, 47,  9, 15, 16, 25, 34, 25, 16, 24, 15, 31, 21, 22,
 30, 12, 21, 11, 11, 23, 11, 22, 16, 29

<PandasArray>
[ 1,  3,  2,  2,  4,  1,  2,  3,  2,  1,  5,  1,  1,  1,  1,  2,  5,  1,  3,
  1,  1,  1,  1,  2,  2, 16,  1,  1,  2,  1,  3,  4,  2,  1,  4,  1, 10,  1,
  1,  1,  1,  1,  1,  1,  5,  1,  1,  3,  1,  2,  1,  3,  1,  1,  1,  1,  1,
  3,  1,  2,  1,  1,  1,  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 73, dtype: int64
Size 1 97
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,  4,  7,  5,  5,  8, 16,  6,  5, 11,
 20, 14, 19, 13, 22, 11, 47,  9, 15, 16, 25, 34, 25, 16, 24, 15, 31, 21, 22,
 30, 12, 21, 11, 11, 23, 11, 22, 16, 29, 17, 14, 32, 23]
Length: 52, dtype: int64
<PandasArray>
[17, 14,  6, 31,  5,  8, 21, 15, 16, 13,
 ...
  3,  1,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 194, dtype: int64
Size 1 106
<PandasArray>
[ 1,  1,  2,  1,  3,  1,  9,  3,  2,  7,  4,  7,  5,  5,  8, 16,  6,  5, 11,
 20, 14, 19, 13, 22, 11, 47,  9, 15, 16, 25, 34, 25, 16, 24, 15, 31, 21, 22,
 30, 12, 21, 11, 11, 23, 11, 22, 16, 29, 17, 14, 32, 23, 17, 14,  6, 31,  5,
  8, 21, 15, 16]
Lengt

Length: 1, dtype: int64
Toilets 2 6
<PandasArray>
[39, 886, 348, 289, 170]
Length: 5, dtype: int64
<PandasArray>
[28, 7]
Length: 2, dtype: int64
Toilets 2 5
<PandasArray>
[39, 886, 348, 289]
Length: 4, dtype: int64
<PandasArray>
[170, 28, 7]
Length: 3, dtype: int64
Toilets 2 4
<PandasArray>
[39, 886, 348]
Length: 3, dtype: int64
<PandasArray>
[289, 170, 28, 7]
Length: 4, dtype: int64
Toilets 2 3
<PandasArray>
[39, 886]
Length: 2, dtype: int64
<PandasArray>
[348, 289, 170, 28, 7]
Length: 5, dtype: int64
Toilets 2 2
<PandasArray>
[39]
Length: 1, dtype: int64
<PandasArray>
[886, 348, 289, 170, 28, 7]
Length: 6, dtype: int64
Toilets 2 1
<PandasArray>
[]
Length: 0, dtype: int64
<PandasArray>
[39, 886, 348, 289, 170, 28, 7]
Length: 7, dtype: int64
Parking 3 5
<PandasArray>
[1365, 239, 136]
Length: 3, dtype: int64
<PandasArray>
[23, 3, 1]
Length: 3, dtype: int64
Parking 3 4
<PandasArray>
[1365, 239]
Length: 2, dtype: int64
<PandasArray>
[136, 23, 3, 1]
Length: 4, dtype: int64
Parking 3 7
<Pan

<PandasArray>
[ 1,  1,  1,  1,  1,  3,  1,  1,  1,  1,
 ...
  4,  2,  1,  4,  1, 10,  1,  1,  1,  1]
Length: 142, dtype: int64
<PandasArray>
[1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2,
 1, 1, 1]
Length: 29, dtype: int64
Size 1 267
<PandasArray>
[ 1,  1,  1,  1,  1,  3,  1,  1,  1,  1,
 ...
  1,  4,  1,  2,  1,  1,  1,  2,  2, 14]
Length: 127, dtype: int64
<PandasArray>
[ 1,  1,  2,  1,  3,  4,  2,  1,  4,  1, 10,  1,  1,  1,  1,  1,  1,  3,  1,
  1,  3,  1,  1,  1,  3,  1,  1,  1,  1,  1,  3,  1,  1,  1,  1,  3,  1,  1,
  1,  1,  2,  1,  1,  1]
Length: 44, dtype: int64
Size 1 295
<PandasArray>
[ 1,  1,  1,  1,  1,  3,  1,  1,  1,  1,
 ...
  3,  4,  2,  1,  4,  1, 10,  1,  1,  1]
Length: 141, dtype: int64
<PandasArray>
[1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1,
 2, 1, 1, 1]
Length: 30, dtype: int64
Size 1 298
<PandasArray>
[ 1,  1,  1,  1,  1,  3,  1,  1,  1,  1,
 ...
  2,  1,  4,  1, 10,  1,  1,  1,  1,  1]
Length: 

<PandasArray>
[3, 2, 3, 2, 5, 2, 5, 4, 2, 2,
 ...
 1, 3, 1, 1, 1, 1, 2, 1, 1, 1]
Length: 126, dtype: int64
Size 1 223
<PandasArray>
[1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 2, 1, 5, 4,
 1, 1, 1, 2, 6, 7, 9, 1, 2, 2, 4, 1, 2, 4, 5, 5, 2, 1, 3, 3, 2, 3, 2, 5, 2, 5,
 4, 2, 2, 4, 3, 4, 2, 1, 4, 1, 6, 4, 1, 1, 5, 1, 7, 4, 4, 2, 2, 4, 7, 2, 1, 5,
 4, 1, 1, 1, 2, 5, 2, 3, 4, 3, 2, 5, 2, 2, 1, 1, 1, 1, 8, 5, 1]
Length: 99, dtype: int64
<PandasArray>
[ 2,  1,  4,  3,  2,  1,  3,  2,  2,  4,  1,  2,  3,  2,  5,  1,  1,  1,  1,
  4,  1,  2,  1,  1,  1,  2,  2, 14,  1,  1,  2,  1,  3,  4,  2,  1,  4,  1,
 10,  1,  1,  1,  1,  1,  1,  3,  1,  1,  3,  1,  1,  1,  3,  1,  1,  1,  1,
  1,  3,  1,  1,  1,  1,  3,  1,  1,  1,  1,  2,  1,  1,  1]
Length: 72, dtype: int64
Size 1 191
<PandasArray>
[1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 2, 1, 5, 4,
 1, 1, 1, 2, 6, 7, 9, 1, 2, 2, 4, 1, 2, 4, 5, 5, 2, 1, 3, 3, 2, 3, 2, 5, 2, 5,
 4, 2, 2, 4, 3, 4, 2, 1, 4, 

<PandasArray>
[1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 2, 1, 5, 4,
 1, 1, 1, 2, 6, 7, 9, 1, 2, 2, 4, 1, 2, 4, 5, 5, 2, 1, 3, 3, 2, 3, 2, 5, 2, 5,
 4, 2, 2, 4, 3, 4, 2, 1, 4, 1, 6, 4, 1, 1, 5]
Length: 67, dtype: int64
<PandasArray>
[1, 7, 4, 4, 2, 2, 4, 7, 2, 1,
 ...
 1, 3, 1, 1, 1, 1, 2, 1, 1, 1]
Length: 104, dtype: int64
Size 1 128
<PandasArray>
[1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 2, 1, 5, 4,
 1, 1, 1, 2, 6]
Length: 31, dtype: int64
<PandasArray>
[7, 9, 1, 2, 2, 4, 1, 2, 4, 5,
 ...
 1, 3, 1, 1, 1, 1, 2, 1, 1, 1]
Length: 140, dtype: int64
Size 1 188
<PandasArray>
[1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 2, 1, 5, 4,
 1, 1, 1, 2, 6, 7, 9, 1, 2, 2, 4, 1, 2, 4, 5, 5, 2, 1, 3, 3, 2, 3, 2, 5, 2, 5,
 4, 2, 2, 4, 3, 4, 2, 1, 4, 1, 6, 4, 1, 1, 5, 1, 7, 4, 4, 2, 2, 4, 7, 2]
Length: 76, dtype: int64
<PandasArray>
[ 1,  5,  4,  1,  1,  1,  2,  5,  2,  3,  4,  3,  2,  5,  2,  2,  1,  1,  1,
  1,  8,  5,  1,  2,  1,  

<PandasArray>
[239]
Length: 1, dtype: int64
<PandasArray>
[136, 23, 3, 1]
Length: 4, dtype: int64
Parking 3 7
<PandasArray>
[239, 136, 23, 3]
Length: 4, dtype: int64
<PandasArray>
[1]
Length: 1, dtype: int64
Parking 3 3
<PandasArray>
[]
Length: 0, dtype: int64
<PandasArray>
[239, 136, 23, 3, 1]
Length: 5, dtype: int64
Parking 3 6
<PandasArray>
[239, 136, 23]
Length: 3, dtype: int64
<PandasArray>
[3, 1]
Length: 2, dtype: int64
Min tresh : 4 Min gini : 0.11497726093459086 Min feature : Parking
Parking
Rooms 0 4
<PandasArray>
[3, 39]
Length: 2, dtype: int64
<PandasArray>
[117, 3, 1]
Length: 3, dtype: int64
Rooms 0 3
<PandasArray>
[3]
Length: 1, dtype: int64
<PandasArray>
[39, 117, 3, 1]
Length: 4, dtype: int64
Rooms 0 6
<PandasArray>
[3, 39, 117, 3]
Length: 4, dtype: int64
<PandasArray>
[1]
Length: 1, dtype: int64
Rooms 0 5
<PandasArray>
[3, 39, 117]
Length: 3, dtype: int64
<PandasArray>
[3, 1]
Length: 2, dtype: int64
Rooms 0 2
<PandasArray>
[]
Length: 0, dtype: int64
<PandasArray>
[3, 39

<PandasArray>
[ 2,  2,  2,  1,  1,  1,  6,  5,  2,  1,  2,  2,  1,  3,  1,  2,  1,  1,  1,
  2,  2,  2,  1,  1,  4,  1,  2,  1,  1,  1,  1,  2, 14,  1,  1,  1,  2,  4,
  2,  1,  2,  1,  9,  1,  1,  1,  1,  1,  2,  1,  1,  2,  1,  1,  1,  3,  1,
  1,  1,  1,  3,  1,  1,  1,  3,  1,  1,  1,  1,  1,  1,  1,  1]
Length: 73, dtype: int64
Size 1 238
<PandasArray>
[1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1,
 1, 1, 6, 5, 2, 1, 2, 2, 1, 3, 1, 2, 1, 1, 1, 2]
Length: 42, dtype: int64
<PandasArray>
[ 2,  2,  1,  1,  4,  1,  2,  1,  1,  1,  1,  2, 14,  1,  1,  1,  2,  4,  2,
  1,  2,  1,  9,  1,  1,  1,  1,  1,  2,  1,  1,  2,  1,  1,  1,  3,  1,  1,
  1,  1,  3,  1,  1,  1,  3,  1,  1,  1,  1,  1,  1,  1,  1]
Length: 53, dtype: int64
Size 1 236
<PandasArray>
[1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1,
 1, 1, 6, 5, 2, 1, 2, 2, 1, 3, 1, 2, 1, 1]
Length: 40, dtype: int64
<PandasArray>
[ 1,  2,  2,  2,  1,  1,  4,  1,  2,  1,  1,  1

<PandasArray>
[5, 6]
Length: 2, dtype: int64
<PandasArray>
[13, 43, 67, 23, 6]
Length: 5, dtype: int64
Toilets 2 1
<PandasArray>
[]
Length: 0, dtype: int64
<PandasArray>
[5, 6, 13, 43, 67, 23, 6]
Length: 7, dtype: int64
Toilets 2 2
<PandasArray>
[5]
Length: 1, dtype: int64
<PandasArray>
[6, 13, 43, 67, 23, 6]
Length: 6, dtype: int64
Parking 3 5
<PandasArray>
[136]
Length: 1, dtype: int64
<PandasArray>
[23, 3, 1]
Length: 3, dtype: int64
Parking 3 4
<PandasArray>
[]
Length: 0, dtype: int64
<PandasArray>
[136, 23, 3, 1]
Length: 4, dtype: int64
Parking 3 7
<PandasArray>
[136, 23, 3]
Length: 3, dtype: int64
<PandasArray>
[1]
Length: 1, dtype: int64
Parking 3 6
<PandasArray>
[136, 23]
Length: 2, dtype: int64
<PandasArray>
[3, 1]
Length: 2, dtype: int64
Min tresh : 5 Min gini : 0.04317200636219041 Min feature : Parking
Parking
Is leaf /n       Rooms  Size  Toilets  Parking  Class
0         4   343        7        5      4
1         4   343        6        5      4
4         4   278        5  

TypeError: '(slice(None, None, None), -1)' is an invalid key