In [1]:
import numpy as np
import mltools as ml
import xgboost as xgb

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
np.random.seed(9)

In [4]:
X_train = np.genfromtxt('data/X_train.txt', delimiter=None)
Y_train = np.genfromtxt('data/Y_train.txt', delimiter=None)
X_test = np.genfromtxt('data/X_test.txt', delimiter=None)

#### Rescaling Data

In [5]:
X_train_rescaled, parameters = ml.rescale(X_train)
X_test_rescaled, _ = ml.rescale(X_test, parameters)

#### Split data into Train and Validate

In [6]:
X_split_train, X_split_validation, Y_split_train, Y_split_validation = \
                                                train_test_split(X_train_rescaled, Y_train, test_size = 0.3)

#### Applying XGBoost

In [20]:
xlf = xgb.XGBRegressor(max_depth=10, 
                        learning_rate=0.2, 
                        n_estimators=200, 
                        silent=True, 
                        objective='reg:linear', 
                        nthread=10, 
                        gamma=0, 
                        min_child_weight=1, 
                        max_delta_step=0, 
                        subsample=0.9, 
                        colsample_bytree=0.9, 
                        colsample_bylevel=1, 
                        reg_alpha=0, 
                        reg_lambda=1, 
                        scale_pos_weight=1, 
                        seed=100, 
                        eval_metric='auc',
                        missing=None)

In [21]:
xlf.fit(X_split_train, Y_split_train, eval_metric='auc', verbose = True, \
        eval_set = [(X_split_train, Y_split_train)], early_stopping_rounds=500)

# xlf.fit(X_train_rescaled, Y_train, eval_metric='auc', verbose = True)

[0]	validation_0-auc:0.716964
Will train until validation_0-auc hasn't improved in 500 rounds.
[1]	validation_0-auc:0.742643
[2]	validation_0-auc:0.754946
[3]	validation_0-auc:0.764116
[4]	validation_0-auc:0.772241
[5]	validation_0-auc:0.778537
[6]	validation_0-auc:0.784494
[7]	validation_0-auc:0.788894
[8]	validation_0-auc:0.793753
[9]	validation_0-auc:0.799157
[10]	validation_0-auc:0.803685
[11]	validation_0-auc:0.806783
[12]	validation_0-auc:0.812339
[13]	validation_0-auc:0.815779
[14]	validation_0-auc:0.817672
[15]	validation_0-auc:0.819832
[16]	validation_0-auc:0.822112
[17]	validation_0-auc:0.826373
[18]	validation_0-auc:0.828829
[19]	validation_0-auc:0.830331
[20]	validation_0-auc:0.833182
[21]	validation_0-auc:0.835648
[22]	validation_0-auc:0.83833
[23]	validation_0-auc:0.840296
[24]	validation_0-auc:0.841811
[25]	validation_0-auc:0.843295
[26]	validation_0-auc:0.844775
[27]	validation_0-auc:0.847354
[28]	validation_0-auc:0.848083
[29]	validation_0-auc:0.849966
[30]	validation_

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.9, eval_metric='auc', gamma=0, learning_rate=0.2,
       max_delta_step=0, max_depth=10, min_child_weight=1, missing=None,
       n_estimators=200, n_jobs=1, nthread=10, objective='reg:linear',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=100, silent=True, subsample=0.9)

In [22]:
predictions = xlf.predict(X_test_rescaled)

In [26]:
predictions

array([ 0.8320092 ,  0.39496756,  0.03182447, ...,  0.30192417,
        0.07910615,  0.13096502], dtype=float32)

#### Saving txt file

In [24]:
Y_test = np.vstack((np.arange(X_test.shape[0]), predictions)).T

In [25]:
np.savetxt('Y_submit_XGBoost_v3.txt', Y_test, '%d, %.2f', header='ID,Prob1', comments='', delimiter=',')