In [9]:
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
# Загрузка данных
data_class = load_breast_cancer()
X_class, y_class = data_class.data, data_class.target

# Разделение данных на тренировочную и тестовую выборки
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Бейзлайн модель градиентного бустинга для классификации
gb_clf = GradientBoostingClassifier(random_state=42)
gb_clf.fit(X_train_class, y_train_class)
y_pred_class = gb_clf.predict(X_test_class)

# Оценка
accuracy_class = accuracy_score(y_test_class, y_pred_class)
precision_class = precision_score(y_test_class, y_pred_class)
recall_class = recall_score(y_test_class, y_pred_class)
f1_class = f1_score(y_test_class, y_pred_class)

print(f"Baseline Gradient Boosting Classification Metrics:\nAccuracy: {accuracy_class}\nPrecision: {precision_class}\nRecall: {recall_class}\nF1 Score: {f1_class}")

Baseline Gradient Boosting Classification Metrics:
Accuracy: 0.956140350877193
Precision: 0.9583333333333334
Recall: 0.971830985915493
F1 Score: 0.965034965034965


In [10]:
# Загрузка данных
data_reg = fetch_california_housing()
X_reg, y_reg = data_reg.data, data_reg.target

# Разделение данных на тренировочную и тестовую выборки
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# Бейзлайн модель градиентного бустинга для регрессии
gb_reg = GradientBoostingRegressor(random_state=42)
gb_reg.fit(X_train_reg, y_train_reg)
y_pred_reg = gb_reg.predict(X_test_reg)

# Оценка
mae_reg = mean_absolute_error(y_test_reg, y_pred_reg)
mse_reg = mean_squared_error(y_test_reg, y_pred_reg)
r2_reg = r2_score(y_test_reg, y_pred_reg)

print(f"Baseline Gradient Boosting Regression Metrics:\nMAE: {mae_reg}\nMSE: {mse_reg}\nR-squared: {r2_reg}")

Baseline Gradient Boosting Regression Metrics:
MAE: 0.37164256904255966
MSE: 0.2939973248643864
R-squared: 0.7756446042829697


In [4]:
from sklearn.model_selection import GridSearchCV

In [5]:
# Параметры для поиска
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.1, 0.01, 0.001],
    'max_depth': [3, 5, 7]
}

grid_search = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_grid,
    cv=3,
    scoring='f1',
    n_jobs=-1
)

grid_search.fit(X_train_class, y_train_class)

# Лучший классификатор
best_gb_clf = grid_search.best_estimator_

# Оценка
y_pred_class_improved = best_gb_clf.predict(X_test_class)
accuracy_class_improved = accuracy_score(y_test_class, y_pred_class_improved)
precision_class_improved = precision_score(y_test_class, y_pred_class_improved)
recall_class_improved = recall_score(y_test_class, y_pred_class_improved)
f1_class_improved = f1_score(y_test_class, y_pred_class_improved)

print(f"Improved Gradient Boosting Classification Metrics:\nAccuracy: {accuracy_class_improved}\nPrecision: {precision_class_improved}\nRecall: {recall_class_improved}\nF1 Score: {f1_class_improved}")

Improved Gradient Boosting Classification Metrics:
Accuracy: 0.956140350877193
Precision: 0.9583333333333334
Recall: 0.971830985915493
F1 Score: 0.965034965034965


In [11]:
# Параметры для поиска
param_grid_reg = {
    'n_estimators': [100, 200],
    'learning_rate': [0.1, 0.01],
    'max_depth': [3, 5]
}

grid_search_reg = GridSearchCV(
    GradientBoostingRegressor(random_state=42),
    param_grid_reg,
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1
)

grid_search_reg.fit(X_train_reg, y_train_reg)

# Лучший регрессор
best_gb_reg = grid_search_reg.best_estimator_

# Оценка
y_pred_reg_improved = best_gb_reg.predict(X_test_reg)
mae_reg_improved = mean_absolute_error(y_test_reg, y_pred_reg_improved)
mse_reg_improved = mean_squared_error(y_test_reg, y_pred_reg_improved)
r2_reg_improved = r2_score(y_test_reg, y_pred_reg_improved)

print(f"Improved Gradient Boosting Regression Metrics:\nMAE: {mae_reg_improved}\nMSE: {mse_reg_improved}\nR-squared: {r2_reg_improved}")

Improved Gradient Boosting Regression Metrics:
MAE: 0.3140484519884909
MSE: 0.22388069595607893
R-squared: 0.8291520436187658


In [18]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from scipy.special import expit, logit


In [19]:
class CustomGradientBoostingClassifier:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        self.models = []
        # Инициализация с логитов 0.5
        y_prob = np.full(y.shape, 0.5)
        y_pred_logit = logit(y_prob)
        
        for _ in range(self.n_estimators):
            # Вычисление отрицательных градиентов (разность между реальными классами и вероятностями текущей модели)
            residuals = y - expit(y_pred_logit)
            # Обучение нового дерева на этих остатках
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            # Обновление логитов
            y_pred_logit += self.learning_rate * tree.predict(X)
            self.models.append(tree)

    def predict(self, X):
        y_pred_logit = np.full(X.shape[0], 0.5)
        for tree in self.models:
            y_pred_logit += self.learning_rate * tree.predict(X)
        y_prob = expit(y_pred_logit)
        # Преобразование вероятностей в бинарные предсказания
        return (y_prob > 0.5).astype(int)

# Загрузка данных
data_class = load_breast_cancer()
X_class, y_class = data_class.data, data_class.target

# Разделение данных на тренировочную и тестовую выборки
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Обучение и оценка кастомного градиентного бустинга
custom_gb_clf = CustomGradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
custom_gb_clf.fit(X_train_class, y_train_class)
y_pred_class_custom = custom_gb_clf.predict(X_test_class)

# Оценка
accuracy_class_custom = accuracy_score(y_test_class, y_pred_class_custom)
precision_class_custom = precision_score(y_test_class, y_pred_class_custom)
recall_class_custom = recall_score(y_test_class, y_pred_class_custom)
f1_class_custom = f1_score(y_test_class, y_pred_class_custom)

print(f"Custom Gradient Boosting Classification Metrics:\nAccuracy: {accuracy_class_custom}\nPrecision: {precision_class_custom}\nRecall: {recall_class_custom}\nF1 Score: {f1_class_custom}")

Custom Gradient Boosting Classification Metrics:
Accuracy: 0.956140350877193
Precision: 0.9583333333333334
Recall: 0.971830985915493
F1 Score: 0.965034965034965


In [14]:
class CustomGradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        self.models = []
        y_pred = np.zeros(y.shape)
        for _ in range(self.n_estimators):
            residuals = y - y_pred
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            y_pred += self.learning_rate * tree.predict(X)
            self.models.append(tree)

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])
        for tree in self.models:
            y_pred += self.learning_rate * tree.predict(X)
        return y_pred

# Обучение и оценка кастомного градиентного бустинга
custom_gb_reg = CustomGradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
custom_gb_reg.fit(X_train_reg, y_train_reg)
y_pred_reg_custom = custom_gb_reg.predict(X_test_reg)

mae_reg_custom = mean_absolute_error(y_test_reg, y_pred_reg_custom)
mse_reg_custom = mean_squared_error(y_test_reg, y_pred_reg_custom)
r2_reg_custom = r2_score(y_test_reg, y_pred_reg_custom)

print(f"Custom Gradient Boosting Regression Metrics:\nMAE: {mae_reg_custom}\nMSE: {mse_reg_custom}\nR-squared: {r2_reg_custom}")

Custom Gradient Boosting Regression Metrics:
MAE: 0.37162030240959865
MSE: 0.2939346830965822
R-squared: 0.7756924075023037
