In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

In [2]:
X, y = load_boston(return_X_y = True)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

In [4]:
def write_answer(task_num, ans, verbose=True):
    if verbose:
        print(ans)
    with open(f"boosting_task{task_num}.txt", 'w') as f:
        f.write(ans)

In [5]:
class SimpleBoosting:
    def __init__(self, estimator, N=50):
        self.estimator = estimator
        self.N = N
        
        
    def fit(self, X, y):
        self.estimators = []
        self.coefs = []
        for i in range(self.N):
            tree = self.estimator(max_depth=5, random_state=42)
            if i == 0:
                tree.fit(X, y)
            else:
                tree.fit(X, y - self.predict(X))
            self.estimators.append(tree)
            self.coefs.append(0.9 / (1.0 + i))
    
    def predict(self, X):
        # self.coefs[i]  ();
        # self.estimators[i].predict()   (X.shape[0],)
        # predicts (N, X.shape[0])
        predicts = [self.coefs[i] * self.estimators[i].predict(X)
                    for i in range(len(self.estimators))]
        precits = np.array(predicts)
        return np.sum(predicts, axis=0)

In [6]:
boost = SimpleBoosting(DecisionTreeRegressor)

In [7]:
boost.fit(X_train, y_train)

In [8]:
mse = mean_squared_error(y_test, boost.predict(X_test))
write_answer(3, str(mse** 0.5))

4.812550945781193


In [10]:
from sklearn.linear_model import LinearRegression

In [11]:
lr = LinearRegression()
lr.fit(X_train, y_train)
mse = mean_squared_error(y_test, lr.predict(X_test))
write_answer(5, str(mse** 0.5))

8.25497975354908
