In [1]:
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as MSE
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()

In [2]:
X, y = load_boston(return_X_y=True)

In [3]:
kfold = KFold(n_splits=5, shuffle=True, random_state=2)

In [4]:
def regression_model(model):
    # Obtain scores of cross-validation using 10 splits and mean squared error
    scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=kfold)

    # Take square root of the scores
    rmse = (-scores)**0.5

    # Return mean score
    return round(rmse.mean(), 2)

In [5]:
regression_model(XGBRegressor(booster='gblinear', objective='reg:squarederror'))

6.07

In [6]:
regression_model(LinearRegression())

4.78

In [7]:
regression_model(Lasso())

5.25

In [8]:
regression_model(Ridge())

4.79

In [9]:
regression_model(XGBRegressor(booster='gbtree', objective='reg:squarederror'))

3.08

In [10]:
def grid_search(params, reg=XGBRegressor(booster='gblinear', objective='reg:squarederror')):

    # Instantiate GridSearchCV as grid_reg
    grid_reg = GridSearchCV(reg, params, scoring='neg_mean_squared_error', cv=kfold)
    
    # Fit grid_reg on X_train and y_train
    grid_reg.fit(X, y)

    # Extract best params
    best_params = grid_reg.best_params_

    # Print best params
    print("Best params:", best_params)
    
    # Compute best score
    best_score = np.sqrt(-grid_reg.best_score_)

    # Print best score
    print("Training score: {:.2f}".format(best_score))

In [11]:
grid_search(params={'reg_alpha':[0.01, 0.1, 0.5, 1, 2, 4]})

Best params: {'reg_alpha': 0.01}
Training score: 6.12


In [12]:
grid_search(params={'reg_lambda':[0.01, 0.1, 0.5, 1, 2, 4]})

Best params: {'reg_lambda': 0.01}
Training score: 5.95


In [13]:
grid_search(params={'reg_lambda':[0.01, 0.05, 0.15, 0.2]})

Best params: {'reg_lambda': 0.01}
Training score: 5.95


In [14]:
grid_search(params={'feature_selector':['shuffle']})

Best params: {'feature_selector': 'shuffle'}
Training score: 6.19


In [15]:
grid_search(params={'feature_selector':['random', 'greedy', 'thrifty'], 'updater':['coord_descent'] })

Best params: {'feature_selector': 'greedy', 'updater': 'coord_descent'}
Training score: 5.67


In [16]:
grid_search(params={'feature_selector':['greedy', 'thrifty'], 'updater':['coord_descent'], 'top_k':[2, 4, 6, 8, 10, 12]})

Best params: {'feature_selector': 'greedy', 'top_k': 12, 'updater': 'coord_descent'}
Training score: 5.67


In [24]:
grid_search(params={'feature_selector':['greedy'], 'updater':['coord_descent'], 
                    'learning_rate':[0.3]})

Best params: {'feature_selector': 'greedy', 'learning_rate': 0.3, 'updater': 'coord_descent'}
Training score: 5.55


In [17]:
regression_model(XGBRegressor(booster='dart', objective='reg:squarederror'))

3.08

In [26]:
regression_model(XGBRegressor(booster='dart', objective='reg:squarederror', sample_type='weighted', 
                             normalize_type='forest'))

3.08

In [43]:
regression_model(XGBRegressor(booster='dart', objective='reg:squarederror', sample_type='weighted', 
                             rate_drop=0.001))

3.07

In [41]:
regression_model(XGBRegressor(booster='dart', objective='reg:squarederror', sample_type='weighted', 
                             one_drop=1))

3.09