In [3]:
import numpy as np

In [165]:

class tree():
    def __init__(
        self,
        max_depth: float,
        i_depth: float,
        minimum_sample_leaf: float,
        y_val: np.array,
        x_val: np.array,
        is_terminal: bool,
    ):
        self.max_depth = max_depth
        self.minimum_sample_leaf = minimum_sample_leaf
        self.i_depth = i_depth
        self.is_terminal = is_terminal

        self.y_val = y_val
        self.x_val = x_val
        self.best_feature = None
        self.best_feature_value = None

        self.l_tree = None
        self.r_tree = None

    def obj_fun(self, l_values, r_values):
        l_mean = np.mean(l_values)
        r_mean = np.mean(r_values)

        l_len = len(l_values)
        r_len = len(r_values)

        l_mse =np.sum(((l_values - l_mean)**2)**0.5)
        r_mse =np.sum(((r_values - r_mean)**2)**0.5)

        mse_weighted =  (l_mse * l_len + r_mse * r_len)/(l_len + r_len)

        return mse_weighted
        


    def fit(self, ):
        best_score = None
        for i in range(len(self.x_val[0,:])):

            for j in list(set(self.x_val[:,i])):

                left_ind = self.x_val[:,i] < j

                y_left = self.y_val[left_ind]
                y_right = self.y_val[~left_ind]

                if self.best_feature is None:
                    self.best_feature = i
                    self.best_feature_value = j
                    best_score = self.obj_fun(y_left, y_right)
                    
                else:
                    new_score = self.obj_fun(y_left, y_right)
                    if new_score < best_score:
                        self.best_feature = i
                        self.best_feature_value = j
                        best_score = new_score
                        


        if self.max_depth >= self.i_depth:
            # 찾은 최적값으로 좌우 할당
            left_ind = self.x_val[:,self.best_feature] < self.best_feature_value
            
            y_left = self.y_val[left_ind]
            y_right = self.y_val[~left_ind]

            x_left = self.x_val[left_ind]
            x_right = self.x_val[~left_ind]

            if len(y_left) > self.minimum_sample_leaf:
                self.l_tree = tree(max_depth = self.max_depth,i_depth = self.i_depth + 1, minimum_sample_leaf = self.minimum_sample_leaf, x_val = x_left, y_val = y_left, is_terminal=False)
                self.l_tree.fit()
            else:
                self.l_tree = tree(max_depth = self.max_depth,i_depth = self.i_depth + 1, minimum_sample_leaf = self.minimum_sample_leaf, x_val = x_left, y_val = y_left, is_terminal=True)

            if len(y_right) > self.minimum_sample_leaf:
                self.r_tree = tree(max_depth = self.max_depth,i_depth = self.i_depth + 1, minimum_sample_leaf = self.minimum_sample_leaf, x_val = x_right, y_val = y_right, is_terminal=False)
                self.r_tree.fit()
            else:
                self.r_tree = tree(max_depth = self.max_depth,i_depth = self.i_depth + 1, minimum_sample_leaf = self.minimum_sample_leaf, x_val = x_right, y_val = y_right, is_terminal=True)
        
        else:
            self.is_terminal = True


    def i_pred(self,x_data):
        if len(x_data) != len(self.x_val[0,:]):
            raise Exception(f'입력된 자료의 차원이 {len(x_data)} 입니다. 학습된 자료의 차원 {len(self.x_val[0,:])}과 일치시켜야 합니다.') 
        
        if self.is_terminal:
            pred = np.mean(self.y_val)
            return pred
        
        if x_data[self.best_feature] < self.best_feature_value:
            if self.l_tree.is_terminal:
                pred = np.mean(self.l_tree.y_val)
            else:
                pred = self.l_tree.i_pred(x_data)
        else:
            if self.r_tree.is_terminal:
                pred = np.mean(self.r_tree.y_val)
            else:
                pred = self.r_tree.i_pred(x_data)

        return pred

    def prediction(self, x_arr):
        
        results = []
        for i in range(len(x_arr)):
            i_val = x_arr[i,:]
            
            result = self.i_pred(i_val)
            results.append(result)

        return results
    
    def get_tree_structure(self):
        def get_info_dic(i_tree):
            result = {
            'best_feature': i_tree.best_feature,
            'best_feature_value': i_tree.best_feature_value,
            'terminal': i_tree.is_terminal,
            'depth': i_tree.i_depth,
            }
            
            if i_tree.l_tree is not None:
                result['l_tree'] = get_info_dic(i_tree.l_tree)
            
            if i_tree.r_tree is not None:
                result['r_tree'] = get_info_dic(i_tree.r_tree)

            return result
        
        info = get_info_dic(self)
        return info


In [166]:
x = np.array(
    [[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21]]
)

y = np.array(
    [1,2,3,4,5,6,7]
)

model = tree(max_depth = 5,i_depth = 0, minimum_sample_leaf = 2, x_val = x, y_val = y, is_terminal=False)
model.fit()

In [167]:
model.get_tree_structure()

{'best_feature': 0,
 'best_feature_value': 10,
 'terminal': False,
 'depth': 0,
 'l_tree': {'best_feature': 0,
  'best_feature_value': 4,
  'terminal': False,
  'depth': 1,
  'l_tree': {'best_feature': None,
   'best_feature_value': None,
   'terminal': True,
   'depth': 2},
  'r_tree': {'best_feature': None,
   'best_feature_value': None,
   'terminal': True,
   'depth': 2}},
 'r_tree': {'best_feature': 0,
  'best_feature_value': 16,
  'terminal': False,
  'depth': 1,
  'l_tree': {'best_feature': None,
   'best_feature_value': None,
   'terminal': True,
   'depth': 2},
  'r_tree': {'best_feature': None,
   'best_feature_value': None,
   'terminal': True,
   'depth': 2}}}

In [173]:
# [1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21]

model.prediction(np.array([[10,11,12]]))

[4.5]