In [7]:
import numpy as np
from sklearn.metrics import r2_score

In [9]:
class DecisionTree:
    def __init__(self, size):
        self.size = size
        self.root = None
        
    def fit(self, X, y, level = 0):
        if len(X) == 0:
            return None
        
        M = X.shape[1]
        
        best_t = None
        best_i = None
        best_c1 = None
        best_c2 = None
        best_X1, best_y1, best_X2, best_y2 = None, None, None, None
        error = None
        best_error = np.inf
        
        for i in range(M):
            for t in X[:, i]:
                X1 = X[X[:, i] <= t]
                y1 = y[X[:, i] <= t]
                
                X2 = X[X[:, i] > t]
                y2 = y[X[:, i] > t]
                
                c1 = np.mean(y1)
                c2 = np.mean(y2)
                
                
                error = np.sum((y1 - c1) ** 2) + np.sum((y2 - c2) ** 2)
                if error < best_error:
                    best_error = error
                    best_i = i
                    best_t = t
                    best_c1 = c1
                    best_c2 = c2
                    best_X1, best_y1, best_X2, best_y2 = X1, y1, X2, y2
            
        left, right = None, None
        if level < self.size:
            left = self.fit(best_X1, best_y1, level = level + 1)
            right = self.fit(best_X2, best_y2, level = level + 1)
                
        node = {"feature" : best_i , "t" : best_t, "c1": best_c1, "c2": best_c2, "left" : left, "right" : right}
            
        if level == 0:
            self.root = node
        
        return node
                
               
    def predict(self, X):
        y_pred = []
        
        for x in X:
            node = self.root
            y = None
            while node is not None:
                if x[node["feature"]] <= node["t"]:
                    y = node["c1"]
                    node = node["left"]
                else:
                    y = node["c2"]
                    node = node["right"]
            y_pred.append(y)
        
        y_pred = np.array(y_pred)
        return y_pred
                    
                    
    
    def score(self, X, y_true):
        y_pred = self.predict(X)
        return r2_score(y_true, y_pred)

In [10]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

X, y = make_regression(n_samples=100, n_features=5, noise=1, random_state = 42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

In [11]:
reg = DecisionTree(size = 3)
reg.fit(X_train, y_train)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


{'feature': 1,
 't': 0.3436182895684614,
 'c1': -60.11328625383147,
 'c2': 107.81404507626624,
 'left': {'feature': 0,
  't': -0.16128571166600914,
  'c1': -126.28643508220091,
  'c2': 0.30567571989718034,
  'left': {'feature': 1,
   't': -0.718444221252436,
   'c1': -220.28734566291834,
   'c2': -88.68607084991396,
   'left': {'feature': 1,
    't': -0.9080240755212109,
    'c1': -300.1468653536479,
    'c2': -180.35758581755357,
    'left': None,
    'right': None},
   'right': {'feature': 4,
    't': -1.6064463202575725,
    'c1': -182.00601274038084,
    'c2': -65.35608537729725,
    'left': None,
    'right': None}},
  'right': {'feature': 1,
   't': -1.245738778711988,
   'c1': -128.93177075878958,
   'c2': 27.5135591890944,
   'left': {'feature': 2,
    't': -1.2208436499710222,
    'c1': -256.3655171644758,
    'c2': -86.45385529022751,
    'left': None,
    'right': None},
   'right': {'feature': 3,
    't': -0.18565897666381712,
    'c1': -55.19135940272465,
    'c2': 65.6850

In [12]:
print(reg.score(X_train, y_train))
print(reg.score(X_test, y_test))

0.8637606855998137
0.5715166714193223
