In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split
from sklearn.cross_validation import KFold
random_num = np.random.randint(10000)

boston = load_boston()
features = boston.feature_names
x_data = boston.data
y_data = boston.target
print('features:', features)
print('x_data.shape: \t', x_data.shape)
print('y_data.shape: \t', y_data.shape)

features: ['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
x_data.shape: 	 (506, 13)
y_data.shape: 	 (506,)




In [2]:
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2, random_state=random_num, shuffle=True)
print('x_train.shape: \t', x_train.shape)
print('x_val.shape: \t', x_val.shape)
print('y_train.shape: \t', y_train.shape)
print('y_val.shape: \t', y_val.shape)

x_train.shape: 	 (404, 13)
x_val.shape: 	 (102, 13)
y_train.shape: 	 (404,)
y_val.shape: 	 (102,)


In [3]:
xg_val = xgb.DMatrix(x_val, label=y_val)

In [4]:
xg_train_0 = xgb.DMatrix(x_train, label=y_train)
watchlist = [(xg_train_0, 'train'), (xg_val, 'val')]
params = {'objective': 'reg:linear', 'verbose': False}
model_0 = xgb.train(params, xg_train_0, 30)

mse_train = mse(model_0.predict(xg_train_0), y_train)
mse_val = mse(model_0.predict(xg_val), y_val)
print('mse_train: \t%.4f, mse_val: \t%.4f' % (mse_train, mse_val))

mse_train: 	0.1323, mse_val: 	6.2348


In [5]:
kf = KFold(len(x_train), n_folds=2)
for train_index, val_index in kf:
    x_train_kf, y_train_kf = x_train[val_index], y_train[val_index]
    xg_train_kf = xgb.DMatrix(x_train_kf, label=y_train_kf)
    model_kf = xgb.train(params, xg_train_kf, 30)

    mse_train = mse(model_kf.predict(xg_train_kf), y_train_kf)
    mse_val = mse(model_kf.predict(xg_val), y_val)
    print('mse_train: \t%.4f, mse_val: \t%.4f' % (mse_train, mse_val))
#     del xg_train_kf
#     del model_kf

mse_train: 	0.0752, mse_val: 	14.8576
mse_train: 	0.0720, mse_val: 	9.5229
