In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler

# Допиливаем класс

In [3]:
class MyGradientRegressor:
    def __init__(self, n_estimators=300, max_depth=3, lr=0.1, patience=10, validation_split=0.2, metric=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.lr = lr
        self.patience = patience
        self.validation_split = validation_split
        self.metric = metric if metric else mean_squared_error
        self.estimators = []
        self.best_estimators = []
        self.best_iteration = 0

    def fit(self, X, y):
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=self.validation_split, random_state=42)

        self.estimators = []
        predictions = np.zeros_like(y_train)
        best_score = float('inf')
        patience_counter = 0

        for i in range(self.n_estimators):
            new_model = DecisionTreeRegressor(max_depth=self.max_depth)
            new_target = -2 * (predictions - y_train)
            new_model.fit(X_train, new_target)
            predictions += self.lr * new_model.predict(X_train)

            val_predictions = self.predict(X_val, training=True)
            score = self.metric(y_val, val_predictions)

            if score < best_score:
                best_score = score
                self.best_estimators = self.estimators.copy()
                self.best_iteration = i
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= self.patience:
                break

            self.estimators.append(new_model)

    def predict(self, X, training=False):
        curr_pred = np.zeros(len(X))
        estimators = self.estimators if training else self.best_estimators
        for est in estimators:
            curr_pred += self.lr * est.predict(X)
        return curr_pred


# Загружаем датасет

In [5]:
file_path = "house_price_regression_dataset [zQQfCL].csv"
df = pd.read_csv(file_path)


In [6]:
df.head(10)

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,1981,0.599637,0,5,262382.9
1,4272,3,3,2016,4.753014,1,6,985260.9
2,3592,1,2,2016,3.634823,0,9,777977.4
3,966,1,2,1977,2.730667,1,8,229698.9
4,4926,2,1,1993,4.699073,0,8,1041741.0
5,3944,5,3,1990,2.47593,2,8,879797.0
6,3671,1,2,2012,4.91196,0,1,814427.9
7,3419,1,1,1972,2.805281,1,1,703413.1
8,630,3,3,1997,1.014286,1,8,173875.0
9,2185,4,2,1981,3.941604,2,5,504176.5


# Выделяем целевую переменную и признаки

In [8]:
X = df.drop(columns=["House_Price"])
y = df["House_Price"]


In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Инициализируем модельку и обучаем ее с базовым зкритерием остановки

In [24]:
model = MyGradientRegressor(n_estimators=300, max_depth=3, lr=0.1, patience=15)
model.fit(X_scaled, y)

In [25]:
preds = model.predict(X_scaled)
mse = mean_squared_error(y, preds)
mae = mean_absolute_error(y, preds)
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"Итерация остановки: {model.best_iteration + 1}")

MSE: 73260962.77540122
MAE: 6437.496249723201
Итерация остановки: 124


# Проверим, что остановка работает на крайнем случае = 3

In [39]:
model = MyGradientRegressor(n_estimators=300, max_depth=3, lr=0.1, patience=3)
model.fit(X_scaled, y)

In [40]:
preds = model.predict(X_scaled)
mse = mean_squared_error(y, preds)
mae = mean_absolute_error(y, preds)
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"Итерация остановки: {model.best_iteration + 1}")

MSE: 92297964.57362384
MAE: 7442.164911348376
Итерация остановки: 80
