In [40]:
import pandas as pd
from sklearn.datasets import california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score


In [15]:
housing_dataset = california_housing.fetch_california_housing() 
X = pd.DataFrame(housing_dataset.data) 
X.columns = housing_dataset.feature_names 
y = housing_dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2018)

In [16]:
lr = LinearRegression()

In [59]:
class myModel:
    def __init__(self, X, y, n_estimators=10):
        self.C = sum(y)/len(y)
        residuals = [ y_i - self.C for y_i in y]
        self.trees = []
        for _ in range(n_estimators):
            tree_model = DecisionTreeRegressor(max_depth=4)
            tree_model.fit(X, residuals)
            self.trees.append(tree_model)
            y_pred = tree_model.predict(X)
            residuals = residuals - y_pred

    def predict(self, x):
        all_predictions = []
        for tree in self.trees:
            all_predictions.append(tree.predict(x))
        sum_of_preds = []
        for column in range(len(x)):
            sum_of_preds.append(sum(row[column] for row in all_predictions))
        sum_of_preds = [pred + self.C for pred in sum_of_preds]
        return sum_of_preds
        

In [60]:
my_model = myModel(X_train, y_train, n_estimators=10)

In [61]:
y_pred = my_model.predict(X_test)

In [62]:
r2_score(y_test, y_pred)

0.77913420947889755