In [3]:
import numpy as np

In [4]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
X, y = make_regression(n_samples=50_000, n_features=100, random_state=10, noise=12, n_informative=70)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8, random_state=10)
X_train, X_vals, y_train, y_vals = train_test_split(X_train, y_train, train_size=0.9, random_state=10)

# XGBRegressor

In [6]:
import xgboost as xgb

In [31]:
params = {
    'n_estimators' : 1000,            # Количество моделей в ансамбле
    'learning_rate' : 0.05,          # Шаг обучения
    'max_depth' : 2,                 # Максимальная глубина дерева
    'gamma' : 1,                     # Минимальное снижение функции потерь, которое требуется, чтобы разбить узел дерева.
    'reg_lambda': 1e-05,             # L2-регуляризация
    'reg_alpha' : 0.5,              # L1-регуляризация
    'early_stopping_rounds' : 10,    # Количество раундов без улучшений, после которых обучение будет остановлено
    'subsample' : 0.8,               # Процент подвыборки для обучения одного дерева
    'random_state' : 10,             # Состояние рандома для subsample
    'eval_metric' : 'rmse',          # Метрика используемая на валидационном наборе
    'booster': 'gbtree',             # Модель по умолчанию(Дерево)
    'n_jobs' : -1,                   # Количество ядер процессора
    'device' : 'cpu'                 # Процессор или видеокарта
    
}

In [32]:
gb = xgb.XGBRegressor(**params)
gb

In [33]:
gb.fit(X_train, y_train, 
       eval_set=[(X_vals, y_vals)],
       verbose=False
      )

In [34]:
y_pred = gb.predict(X_test)

In [35]:
from sklearn.metrics import r2_score, mean_squared_error
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.8408323248096379
39431.59383584018


In [12]:
gbm = xgb.XGBRegressor()
gbm.fit(X_train, y_train)
gbm.predict(X_test)

print(r2_score(y_test, gbm.predict(X_test)))
print(mean_squared_error(y_test, gbm.predict(X_test)))

0.8000508875897014
49534.63182125473


# LinearRegression

In [13]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression(n_jobs=-1)

In [14]:
lin_reg.fit(X_train, y_train)

In [15]:
lin_reg.predict(X_test)

array([-436.12562536, -593.71634072,   95.92330337, ...,  337.16068425,
       -663.40718125,  -81.70712282])

In [16]:
print(r2_score(y_test, lin_reg.predict(X_test)))
print(mean_squared_error(y_test, lin_reg.predict(X_test)))

0.9994201882725723
143.6403497748139


# GridSearchCV

In [36]:
from sklearn.model_selection import GridSearchCV

In [37]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 5, 10],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [0, 0.1, 1],
}
xgb_model = xgb.XGBRegressor(n_jobs=-1)

cv = GridSearchCV(xgb_model, param_grid, cv=3, scoring='r2', verbose=2)
cv.fit(X_train, y_train)
cv.best_params_

Fitting 3 folds for each of 6561 candidates, totalling 19683 fits
[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, reg_alpha=0, reg_lambda=0, subsample=0.7; total time=   0.7s
[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, reg_alpha=0, reg_lambda=0, subsample=0.7; total time=   0.8s
[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, reg_alpha=0, reg_lambda=0, subsample=0.7; total time=   0.7s
[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, reg_alpha=0, reg_lambda=0, subsample=0.8; total time=   0.7s
[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, reg_alpha=0, reg_lambda=0, subsample=0.8; total time=   0.7s
[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, reg_alpha=0, reg_

KeyboardInterrupt: 