In [1]:
import pandas as pd
import numpy as np
from sklearn import datasets

In [2]:
data = datasets.load_boston()
X = data.data
y = data.target

In [36]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.25)

In [47]:
def loss(y, z):
    return (y - z) ** 2
def loss_der_modified(y, z):
    return -1*(y - z)
def antigrad(y, z):
    return y - z

In [48]:
def gbm_predict(X):
    return [sum([coeff * algo.predict([x])[0] for algo, coeff in zip(trees, coeffs)]) for x in X]

In [54]:
from sklearn.tree import DecisionTreeRegressor
trees = []
coeffs = []
current_y = y_train
for i in np.arange(50):
    current_coeff = 0.9
    coeffs.append(current_coeff)
    current_tree = DecisionTreeRegressor(max_depth=5, random_state=42)
    current_tree.fit(X_train, current_y)
    trees.append(current_tree)
    current_y = antigrad(y_train, gbm_predict(X_train))

In [55]:
def write_answer(answer, num):
    with open("answer" + str(num) + ".txt", "w") as fw:
        fw.write(str(answer))

In [56]:
from sklearn.metrics import mean_squared_error
RMSE_1 = np.sqrt(mean_squared_error(y_test, gbm_predict(X_test)))
print(RMSE_1)

5.455565103009402


In [57]:
write_answer(RMSE_1, 2)

In [58]:
trees = []
coeffs = []
current_y = y_train
for i in np.arange(50):
    current_coeff = 0.9/(1 + i)
    coeffs.append(current_coeff)
    current_tree = DecisionTreeRegressor(max_depth=5, random_state=42)
    current_tree.fit(X_train, current_y)
    trees.append(current_tree)
    current_y = antigrad(y_train, gbm_predict(X_train))

In [59]:
RMSE_2 = np.sqrt(mean_squared_error(y_test, gbm_predict(X_test)))
print(RMSE_2)
write_answer(RMSE_2, 3)

4.812550945781193


In [85]:
from sklearn.ensemble import GradientBoostingRegressor
iters = np.arange(10, 3000, 100)
depths = np.arange(2, 51, 3)
depths_results = []
iters_results = []

In [88]:
for it in iters:
    model = GradientBoostingRegressor(n_estimators=it, learning_rate=0.01)
    model.fit(X_train, y_train)
    iters_results.append(np.sqrt(mean_squared_error(y_test, model.predict(X_test))))

In [89]:
print(iters_results)

[5.7431636871353415, 5.311915763824368, 5.20228805103377, 5.318979737839098, 5.202139243594489, 5.19061885174173, 5.179134338893995, 5.28575621254104, 5.191076765267676, 5.19645550910136, 5.278209218713519, 4.802105159008978, 4.774005637888815, 5.232948176525001, 5.402685549057301, 5.204880932521157, 5.196204114940838, 5.171260373432692, 5.179130952728364, 5.226394157362567, 5.280767009770814, 5.310465051048612, 5.155923398869424, 5.1607392200539675, 4.838789199621357, 5.460621675161659, 5.45205159888912, 5.178639742839731, 4.824368042773698, 5.442620131795749, 10.827402095640828, 7.4127267011334546, 6.192092321554503, 5.727562952418692, 5.390889430264894, 5.013941198329257, 4.86404701151674, 4.7596196406953215, 4.674442307969077, 4.645703982902233, 4.608463271516643, 4.557580813569979, 4.503156169091718, 4.498330298734472, 4.473446935652927, 4.473826171731815, 4.419882472979262, 4.436328723051601, 4.436123304447348, 4.4263479730258615, 4.395270007261006, 4.398286036827399, 4.370427239

In [77]:
for depth in depths:
    model = GradientBoostingRegressor(max_depth=depth)
    model.fit(X_train, y_train)
    depths_results.append(np.sqrt(mean_squared_error(y_test, model.predict(X_test))))

In [78]:
print(depths_results)

[5.018147158357345, 4.914942950195539, 5.52279376565664, 5.760550180894194, 5.594264144132336, 5.4839408120488295, 5.801492560894895, 5.595203285512259, 5.8243258260841015, 5.631476171889868, 5.735529442805163, 5.6659651070462065, 5.601194096066907, 5.775731995272557, 5.840392934406474, 5.892506192496567, 5.81040002047448]


In [82]:
write_answer(str(2) + " " + str(3), 4)

In [90]:
from sklearn.linear_model import LinearRegression

In [91]:
model = LinearRegression()
model.fit(X_train, y_train)
RMSE_4 = np.sqrt(mean_squared_error(y_test, model.predict(X_test)))
print(RMSE_4)

8.254979753549085


In [92]:
write_answer(RMSE_4, 5)

In [3]:
arr = np.zeros(15)

In [4]:
def set(arr, L, R, val):
    for i in range(L, R):
        arr[i] = val
    return arr
def get_sum(arr, L, R):
    sum = 0
    for i in range(L, R):
        sum = sum + arr[L]
    return sum
def add(arr, L, R, val):
    for i in range(L, R):
        arr[i] = arr[i] + val
    return arr

In [11]:
arr = np.zeros(15)
arr = set(arr, 1, 15, -2)
print(arr)
arr = set(arr, 2, 4, 3)
arr = set(arr, 6, 8, 1)
arr = add(arr, 4, 10, get_sum(arr, 4, 10))
arr = add(arr, 2, 9, get_sum(arr, 5, 13))
arr = add(arr, 3, 14, -get_sum(arr, 4, 10))
arr = add(arr, 10, 11, -get_sum(arr, 5, 13))
print(str(get_sum(arr, 0, 15)) + " ")
print(str(get_sum(arr, 1, 5)) + " ")
print(str(get_sum(arr, 4, 10)) + " ")
print(str(get_sum(arr, 6, 14)) + " ")

[ 0. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2.]
0.0 
-8.0 
3780.0 
5064.0 
