In [1]:
import numpy as np
import mltools as ml
import xgboost as xgb

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
np.random.seed(9)

In [4]:
X_train = np.genfromtxt('data/X_train.txt', delimiter=None)
Y_train = np.genfromtxt('data/Y_train.txt', delimiter=None)
X_test = np.genfromtxt('data/X_test.txt', delimiter=None)

#### Rescaling Data

In [5]:
X_train_rescaled, parameters = ml.rescale(X_train)
X_test_rescaled, _ = ml.rescale(X_test, parameters)

#### Split data into Train and Validate

In [6]:
X_split_train, X_split_validation, Y_split_train, Y_split_validation = \
                                                train_test_split(X_train_rescaled, Y_train, test_size = 0.2)

#### Applying XGBoost

In [15]:
xlf = xgb.XGBRegressor(max_depth=12, 
                        learning_rate=0.05, 
                        n_estimators=2000, 
                        silent=True, 
                        objective='binary:logistic', 
                        nthread=6, 
                        min_child_weight=6, 
                        max_delta_step=0, 
                       subssample=0.8,
                       colsample_bytree=0.8,
                        colsample_bylevel=1,
                        scale_pos_weight=1, 
                        seed=1440, 
                        eval_metric='auc',
                        missing=None)

In [None]:
xlf.fit(X_split_train, Y_split_train, eval_metric='auc', verbose = True, \
        eval_set = [(X_split_validation, Y_split_validation)], early_stopping_rounds=50)

# xlf.fit(X_train_rescaled, Y_train, verbose=True)

[0]	validation_0-auc:0.692325
Will train until validation_0-auc hasn't improved in 50 rounds.
[1]	validation_0-auc:0.705501
[2]	validation_0-auc:0.711415
[3]	validation_0-auc:0.715192
[4]	validation_0-auc:0.718691
[5]	validation_0-auc:0.730594
[6]	validation_0-auc:0.732754
[7]	validation_0-auc:0.733368
[8]	validation_0-auc:0.73372
[9]	validation_0-auc:0.733863
[10]	validation_0-auc:0.735174
[11]	validation_0-auc:0.736072
[12]	validation_0-auc:0.737182
[13]	validation_0-auc:0.737081
[14]	validation_0-auc:0.737951
[15]	validation_0-auc:0.738844
[16]	validation_0-auc:0.742076
[17]	validation_0-auc:0.74224
[18]	validation_0-auc:0.742517
[19]	validation_0-auc:0.742332
[20]	validation_0-auc:0.742537
[21]	validation_0-auc:0.74266
[22]	validation_0-auc:0.742819
[23]	validation_0-auc:0.74307
[24]	validation_0-auc:0.743893
[25]	validation_0-auc:0.744542
[26]	validation_0-auc:0.744986
[27]	validation_0-auc:0.745342
[28]	validation_0-auc:0.745321
[29]	validation_0-auc:0.745455
[30]	validation_0-au

[259]	validation_0-auc:0.780333
[260]	validation_0-auc:0.780385
[261]	validation_0-auc:0.780394
[262]	validation_0-auc:0.780531
[263]	validation_0-auc:0.780606
[264]	validation_0-auc:0.780707
[265]	validation_0-auc:0.780814
[266]	validation_0-auc:0.780871
[267]	validation_0-auc:0.780875
[268]	validation_0-auc:0.780859
[269]	validation_0-auc:0.780871
[270]	validation_0-auc:0.780944
[271]	validation_0-auc:0.780998
[272]	validation_0-auc:0.781074
[273]	validation_0-auc:0.78107
[274]	validation_0-auc:0.781059
[275]	validation_0-auc:0.781141
[276]	validation_0-auc:0.781335
[277]	validation_0-auc:0.781558
[278]	validation_0-auc:0.781577
[279]	validation_0-auc:0.781618
[280]	validation_0-auc:0.781745
[281]	validation_0-auc:0.781762
[282]	validation_0-auc:0.781763
[283]	validation_0-auc:0.781751
[284]	validation_0-auc:0.781746
[285]	validation_0-auc:0.781787
[286]	validation_0-auc:0.78196
[287]	validation_0-auc:0.781985
[288]	validation_0-auc:0.782128
[289]	validation_0-auc:0.782209
[290]	vali

In [57]:
predictions = xlf.predict(X_test_rescaled)

In [58]:
predictions #1

array([ 0.82907212,  0.63176537,  0.31591514, ...,  0.27784756,
        0.09122714,  0.12314682], dtype=float32)

#### Saving txt file

In [59]:
Yte = np.vstack((np.arange(X_test.shape[0]), predictions)).T

In [60]:
np.savetxt('Y_submit_XGBoost_v5.txt', Yte, '%d, %.2f', header='ID,Prob1', comments='', delimiter=',')