In [5]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-2.1.3-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.3-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
    --------------------------------------- 1.8/124.9 MB 9.1 MB/s eta 0:00:14
   - -------------------------------------- 3.1/124.9 MB 8.4 MB/s eta 0:00:15
   - -------------------------------------- 4.5/124.9 MB 7.3 MB/s eta 0:00:17
   - -------------------------------------- 5.5/124.9 MB 6.8 MB/s eta 0:00:18
   -- ------------------------------------- 6.6/124.9 MB 6.3 MB/s eta 0:00:19
   -- ------------------------------------- 7.9/124.9 MB 6.3 MB/s eta 0:00:19
   -- ------------------------------------- 8.9/124.9 MB 6.2 MB/s eta 0:00:19
   --- ------------------------------------ 10.0/124.9 MB 6.0 MB/s eta 0:00:20
   --- ------------------------------------ 11.0/124.9 MB 5.9 MB/s eta 0:00:20
   --- ------------------------------------ 12.1/124.9 MB 5.9 MB/s eta 0:00:2

### Importing Libraries

In [6]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import xgboost as xgb

### Import/Read Data

In [4]:
cancer_data  = datasets.load_breast_cancer()
cancer_data_df = pd.DataFrame(data=cancer_data.data,columns=cancer_data.feature_names)
cancer_data_df['target'] = cancer_data.target

In [7]:
### Data splitting

X = cancer_data_df.drop('target',axis=1)
y = cancer_data_df['target']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=12)

In [9]:
train_x_y = xgb.DMatrix(data = X_train,label=y_train)

In [12]:
parameters = {'max_depth':10,'objective':'binary:logistic','eval':'auc','learning_rate':0.05}

In [16]:
xgb_classifier = xgb.train(params=parameters,dtrain=train_x_y)

Parameters: { "eval" } are not used.



In [17]:
dtest = xgb.DMatrix(X_test)

In [18]:
y_pred = xgb_classifier.predict(dtest)

In [19]:
y_pred

array([0.43578044, 0.77845126, 0.77845126, 0.77845126, 0.77845126,
       0.5044193 , 0.74613315, 0.74130255, 0.74613315, 0.3863775 ,
       0.77845126, 0.3863775 , 0.3863775 , 0.3863775 , 0.48119792,
       0.7477808 , 0.77845126, 0.3863775 , 0.77845126, 0.77845126,
       0.7690649 , 0.5409438 , 0.77845126, 0.77845126, 0.3863775 ,
       0.3863775 , 0.7312173 , 0.3863775 , 0.7228209 , 0.3863775 ,
       0.77845126, 0.3863775 , 0.3863775 , 0.77845126, 0.3863775 ,
       0.77845126, 0.77845126, 0.3863775 , 0.3863775 , 0.650189  ,
       0.71652484, 0.77845126, 0.77845126, 0.74129635, 0.77845126,
       0.3863775 , 0.49261305, 0.77845126, 0.77845126, 0.77845126,
       0.77845126, 0.77845126, 0.73473984, 0.3863775 , 0.3863775 ,
       0.77845126, 0.75222945, 0.77845126, 0.3863775 , 0.5297318 ,
       0.7623166 , 0.3863775 , 0.7301239 , 0.7169196 , 0.66552263,
       0.77845126, 0.77845126, 0.77845126, 0.3863775 , 0.77845126,
       0.77845126, 0.77845126, 0.44704652, 0.3863775 , 0.38637

In [20]:
for i in range(len(y_pred)):
    if y_pred[i]>=0.5:
        y_pred[i]=1
    else:
        y_pred[i]=0

In [21]:
y_pred

array([0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1.,
       0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
       1., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
       1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 0., 1., 0., 0., 1., 1., 1., 1.], dtype=float32)

In [22]:
accuracy_score(y_test,y_pred)

0.9122807017543859

In [23]:
confusion_matrix(y_test,y_pred)

array([[39,  9],
       [ 1, 65]])

In [50]:
gb_model_low = GradientBoostingClassifier(learning_rate=0.3,n_estimators=500)
gb_model_high = GradientBoostingClassifier(learning_rate=1,n_estimators=5)

In [51]:
gb_model_low.fit(X_train,y_train)
gb_model_high.fit(X_train,y_train)

In [53]:
y_pred_low = gb_model_low.predict(X_test)
y_pred_high = gb_model_high.predict(X_test)

In [54]:
accuracy_score(y_test,y_pred_low)

0.956140350877193

In [55]:
accuracy_score(y_test,y_pred_high)

0.9210526315789473