In [1]:
import numpy as np
import mltools as ml
import xgboost as xgb

In [9]:
from sklearn.model_selection import train_test_split

In [2]:
np.random.seed(9)

In [5]:
X_train = np.genfromtxt('data/X_train.txt', delimiter=None)
Y_train = np.genfromtxt('data/Y_train.txt', delimiter=None)
X_test = np.genfromtxt('data/X_test.txt', delimiter=None)

#### Rescaling Data

In [6]:
X_train_rescaled, parameters = ml.rescale(X_train)
X_test_rescaled, _ = ml.rescale(X_test, parameters)

#### Split data into Train and Validate

In [56]:
X_split_train, X_split_validation, Y_split_train, Y_split_validation = \
                                                train_test_split(X_train_rescaled, Y_train, test_size = 0.2)

#### Applying XGBoost

In [60]:
xlf = xgb.XGBRegressor(max_depth=10, 
                        learning_rate=0.01, 
                        n_estimators=100, 
                        silent=True, 
                        objective='reg:linear', 
                        nthread=-1, 
                        gamma=0,
                        min_child_weight=1, 
                        max_delta_step=0, 
                        subsample=0.85, 
                        colsample_bytree=0.7, 
                        colsample_bylevel=1, 
                        reg_alpha=0, 
                        reg_lambda=1, 
                        scale_pos_weight=1, 
                        seed=1440, 
                       eval_metric='auc',
                        missing=None)

In [61]:
xlf.fit(X_split_train, Y_split_train, eval_metric='auc', verbose = True, \
        eval_set = [(X_split_train, Y_split_train)], early_stopping_rounds=100)

# xlf.fit(X_train_rescaled, Y_train, eval_metric='auc', verbose = True)

[0]	validation_0-auc:0.715523
Will train until validation_0-auc hasn't improved in 100 rounds.
[1]	validation_0-auc:0.724326
[2]	validation_0-auc:0.731422
[3]	validation_0-auc:0.735532
[4]	validation_0-auc:0.740283
[5]	validation_0-auc:0.740702
[6]	validation_0-auc:0.740428
[7]	validation_0-auc:0.740708
[8]	validation_0-auc:0.74544
[9]	validation_0-auc:0.74802
[10]	validation_0-auc:0.750112
[11]	validation_0-auc:0.749755
[12]	validation_0-auc:0.751417
[13]	validation_0-auc:0.753102
[14]	validation_0-auc:0.75285
[15]	validation_0-auc:0.754348
[16]	validation_0-auc:0.756382
[17]	validation_0-auc:0.757836
[18]	validation_0-auc:0.758949
[19]	validation_0-auc:0.759672
[20]	validation_0-auc:0.76053
[21]	validation_0-auc:0.760342
[22]	validation_0-auc:0.76082
[23]	validation_0-auc:0.761058
[24]	validation_0-auc:0.761608
[25]	validation_0-auc:0.761656
[26]	validation_0-auc:0.762229
[27]	validation_0-auc:0.764047
[28]	validation_0-auc:0.764618
[29]	validation_0-auc:0.765853
[30]	validation_0-au

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.7, eval_metric='auc', gamma=0,
       learning_rate=0.01, max_delta_step=0, max_depth=10,
       min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
       nthread=-1, objective='reg:linear', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=1440, silent=True,
       subsample=0.85)

In [45]:
predictions = xlf.predict(X_test_rescaled)

In [29]:
predictions #1

array([ 0.71816027,  0.40702298,  0.3570112 , ...,  0.3848815 ,
        0.26137674,  0.29080856], dtype=float32)

In [33]:
predictions #2

array([ 0.88731736,  0.51474732,  0.11791123, ...,  0.27728382,
        0.06862013,  0.12416136], dtype=float32)

In [40]:
predictions #3

array([ 0.69462633,  0.49171945,  0.32191864, ...,  0.33926171,
        0.2801075 ,  0.28359342], dtype=float32)

In [46]:
predictions #4

array([ 0.52289128,  0.49507147,  0.46891677, ...,  0.47719181,
        0.46705362,  0.46660593], dtype=float32)

#### Saving txt file

In [34]:
Yte = np.vstack((np.arange(X_test.shape[0]), predictions)).T

In [35]:
np.savetxt('Y_submit_XGBoost_v1.txt', Yte, '%d, %.2f', header='ID,Prob1', comments='', delimiter=',')