In [1]:
from numpy import loadtxt 
from xgboost import XGBClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import StratifiedKFold 
from sklearn.model_selection import cross_val_score 
import warnings

In [2]:
dataset = loadtxt('pima.txt', delimiter=",")

In [3]:
dataset

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

In [4]:
X = dataset[:,0:8] 
y = dataset[:,8]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 

In [6]:
model = XGBClassifier() 
model.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [7]:
y_pred = model.predict(X_test) 
predictions = [round(value) for value in y_pred] 

accuracy = accuracy_score(y_test, predictions) 
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 74.89%


### XGBoost-Pima-KFold

In [8]:
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [9]:
kfold = StratifiedKFold(n_splits=10, random_state=7) 
results = cross_val_score(model, X, y, cv=kfold)

In [10]:
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Accuracy: 76.95% (5.88%)


### XGBoost-Pima-EarlyStopping

In [11]:
eval_set = [(X_test, y_test)] 
model.fit(X_train, y_train, eval_metric="error", eval_set=eval_set, verbose=True)

[0]	validation_0-error:0.281385
[1]	validation_0-error:0.25974
[2]	validation_0-error:0.264069
[3]	validation_0-error:0.264069
[4]	validation_0-error:0.268398
[5]	validation_0-error:0.264069
[6]	validation_0-error:0.268398
[7]	validation_0-error:0.25974
[8]	validation_0-error:0.255411
[9]	validation_0-error:0.255411
[10]	validation_0-error:0.25974
[11]	validation_0-error:0.255411
[12]	validation_0-error:0.25974
[13]	validation_0-error:0.25974
[14]	validation_0-error:0.251082
[15]	validation_0-error:0.251082
[16]	validation_0-error:0.251082
[17]	validation_0-error:0.251082
[18]	validation_0-error:0.246753
[19]	validation_0-error:0.255411
[20]	validation_0-error:0.25974
[21]	validation_0-error:0.255411
[22]	validation_0-error:0.255411
[23]	validation_0-error:0.255411
[24]	validation_0-error:0.255411
[25]	validation_0-error:0.255411
[26]	validation_0-error:0.242424
[27]	validation_0-error:0.242424
[28]	validation_0-error:0.242424
[29]	validation_0-error:0.242424
[30]	validation_0-error:0.

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

> Take note that the parameter has been added. The parameter is early_stopping_rounds=36

In [12]:
eval_set = [(X_test, y_test)] 
model.fit(X_train, y_train, eval_metric="error",early_stopping_rounds=36, eval_set=eval_set, verbose=True)

[0]	validation_0-error:0.281385
Will train until validation_0-error hasn't improved in 36 rounds.
[1]	validation_0-error:0.25974
[2]	validation_0-error:0.264069
[3]	validation_0-error:0.264069
[4]	validation_0-error:0.268398
[5]	validation_0-error:0.264069
[6]	validation_0-error:0.268398
[7]	validation_0-error:0.25974
[8]	validation_0-error:0.255411
[9]	validation_0-error:0.255411
[10]	validation_0-error:0.25974
[11]	validation_0-error:0.255411
[12]	validation_0-error:0.25974
[13]	validation_0-error:0.25974
[14]	validation_0-error:0.251082
[15]	validation_0-error:0.251082
[16]	validation_0-error:0.251082
[17]	validation_0-error:0.251082
[18]	validation_0-error:0.246753
[19]	validation_0-error:0.255411
[20]	validation_0-error:0.25974
[21]	validation_0-error:0.255411
[22]	validation_0-error:0.255411
[23]	validation_0-error:0.255411
[24]	validation_0-error:0.255411
[25]	validation_0-error:0.255411
[26]	validation_0-error:0.242424
[27]	validation_0-error:0.242424
[28]	validation_0-error:0.

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [13]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [14]:
y_pred = model.predict(X_test) 
predictions = [round(value) for value in y_pred] 

accuracy = accuracy_score(y_test, predictions) 
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 77.49%
