# XGBoost

In [4]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-1.3.3-py3-none-win_amd64.whl (95.2 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.3.3
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
from sklearn import datasets
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [2]:
# reading the features and the labels
data= pd.read_csv('pima-indians-diabetes.csv')

In [3]:
data.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure (mm Hg),Triceps skinfold thickness (mm),2-Hour serum insulin (mu U/ml),Body mass index (weight in kg/(height in m)^2),Diabetes pedigree function,Age,Is Diabetic
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
data.columns

Index(['Number of times pregnant', 'Plasma glucose concentration',
       'Diastolic blood pressure (mm Hg)', 'Triceps skinfold thickness (mm)',
       '2-Hour serum insulin (mu U/ml)',
       'Body mass index (weight in kg/(height in m)^2)',
       'Diabetes pedigree function', 'Age', 'Is Diabetic'],
      dtype='object')

In [5]:
cols = ['Plasma glucose concentration',
       'Diastolic blood pressure (mm Hg)', 'Triceps skinfold thickness (mm)',
       '2-Hour serum insulin (mu U/ml)',
       'Body mass index (weight in kg/(height in m)^2)',
       'Diabetes pedigree function', 'Age']

In [6]:
# as mentioned in the data description, the missing values have been replaced by zeroes. So, we are replacing zeroes with nan
for col in cols:
    data[col]=data[col].replace(0, np.nan)

In [7]:
# checking for missing values
data.isna().sum()

Number of times pregnant                            0
Plasma glucose concentration                        5
Diastolic blood pressure (mm Hg)                   35
Triceps skinfold thickness (mm)                   227
2-Hour serum insulin (mu U/ml)                    374
Body mass index (weight in kg/(height in m)^2)     11
Diabetes pedigree function                          0
Age                                                 0
Is Diabetic                                         0
dtype: int64

In [18]:
# imputing the missing values
data['Plasma glucose concentration']=data['Plasma glucose concentration'].fillna(data['Plasma glucose concentration'].mode()[0])
data['Diastolic blood pressure (mm Hg)']=data['Diastolic blood pressure (mm Hg)'].fillna(data['Diastolic blood pressure (mm Hg)'].mode()[0])
data['Triceps skinfold thickness (mm)']=data['Triceps skinfold thickness (mm)'].fillna(data['Triceps skinfold thickness (mm)'].mean())
data['2-Hour serum insulin (mu U/ml)']=data['2-Hour serum insulin (mu U/ml)'].fillna(data['2-Hour serum insulin (mu U/ml)'].mean())
data['Body mass index (weight in kg/(height in m)^2)']=data['Body mass index (weight in kg/(height in m)^2)'].fillna(data['Body mass index (weight in kg/(height in m)^2)'].mean())

In [19]:
# checking for missing values after imputation
data.isna().sum()

Number of times pregnant                          0
Plasma glucose concentration                      0
Diastolic blood pressure (mm Hg)                  0
Triceps skinfold thickness (mm)                   0
2-Hour serum insulin (mu U/ml)                    0
Body mass index (weight in kg/(height in m)^2)    0
Diabetes pedigree function                        0
Age                                               0
Is Diabetic                                       0
dtype: int64

In [22]:
#Separating the feature and the Label columns 
x=data.drop(columns='Is Diabetic')
y=data['Is Diabetic']

In [23]:
x.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure (mm Hg),Triceps skinfold thickness (mm),2-Hour serum insulin (mu U/ml),Body mass index (weight in kg/(height in m)^2),Diabetes pedigree function,Age
0,6,148.0,72.0,35.0,155.548223,33.6,0.627,50
1,1,85.0,66.0,29.0,155.548223,26.6,0.351,31
2,8,183.0,64.0,29.15342,155.548223,23.3,0.672,32
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33


In [56]:
y.head(8)

0    1
1    0
2    1
3    0
4    1
5    0
6    1
7    0
Name: Is Diabetic, dtype: int64

In [24]:
# as the datapoints differ a lot in magnitude, we'll scale them
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaled_data=scaler.fit_transform(x)

In [25]:
from sklearn.model_selection import train_test_split
train_x,test_x,train_y,test_y=train_test_split(scaled_data,y,test_size=0.3,random_state=42)

In [26]:
# fit model no training data
model = XGBClassifier(objective='binary:logistic')
model.fit(train_x, train_y)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [27]:
# cheking training accuracy
y_pred = model.predict(train_x)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(train_y,predictions)
accuracy

1.0

In [63]:
b=[x for x in scaled_data[0]]

In [64]:
model.predict(np.asarray(b).reshape(1,-1))

array([1], dtype=int64)

In [65]:
# cheking initial test accuracy
y_pred = model.predict(test_x)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(test_y,predictions)
accuracy

0.7272727272727273

In [66]:
test_x[0]

array([ 0.63994726, -0.77251205, -1.18156252,  0.43784695,  0.40547846,
        0.22451019, -0.1264714 ,  0.83038113])

In [67]:
# Now to increae the accuracy of the model, we'll do hyperparameter tuning using grid search

In [68]:
from sklearn.model_selection import GridSearchCV

In [69]:
param_grid={
   
    ' learning_rate':[1,0.5,0.1,0.01,0.001],
    'max_depth': [3,5,10,20],
    'n_estimators':[10,50,100,200]
    
}

In [70]:
grid= GridSearchCV(XGBClassifier(objective='binary:logistic'),param_grid, verbose=3) #objective= loss+regul.

In [71]:
grid.fit(train_x,train_y)

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=0.852, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=0.750, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=0.794, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=0.692, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=0.748, total=   0.0s
[CV]  learning_rate=1, max_depth=3

[CV]   learning_rate=1, max_depth=3, n_estimators=100, score=0.701, total=   0.1s
[CV]  learning_rate=1, max_depth=3, n_estimators=100 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=3, n_estimators=100, score=0.776, total=   0.1s
[CV]  learning_rate=1, max_depth=3, n_estimators=200 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=3, n_estimators=200, score=0.778, total=   0.2s
[CV]  learning_rate=1, max_depth

[CV]   learning_rate=1, max_depth=5, n_estimators=10, score=0.766, total=   0.0s
[CV]  learning_rate=1, max_depth=5, n_estimators=50 ..................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=5, n_estimators=50, score=0.778, total=   0.1s
[CV]  learning_rate=1, max_depth=5, n_estimators=50 ..................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=5, n_estimators=50, score=0.759, total=   0.1s
[CV]  learning_rate=1, max_depth=5,


[CV]   learning_rate=1, max_depth=5, n_estimators=100, score=0.729, total=   0.1s
[CV]  learning_rate=1, max_depth=5, n_estimators=200 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=5, n_estimators=200, score=0.778, total=   0.3s
[CV]  learning_rate=1, max_depth=5, n_estimators=200 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=5, n_estimators=200, score=0.750, total=   0.2s
[CV]  learning_rate=1, max_dept


[CV]   learning_rate=1, max_depth=10, n_estimators=10, score=0.738, total=   0.0s
[CV]  learning_rate=1, max_depth=10, n_estimators=50 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=10, n_estimators=50, score=0.778, total=   0.1s
[CV]  learning_rate=1, max_depth=10, n_estimators=50 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=10, n_estimators=50, score=0.713, total=   0.1s
[CV]  learning_rate=1, max_dept

[CV]   learning_rate=1, max_depth=10, n_estimators=100, score=0.710, total=   0.2s
[CV]  learning_rate=1, max_depth=10, n_estimators=200 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=10, n_estimators=200, score=0.796, total=   0.3s
[CV]  learning_rate=1, max_depth=10, n_estimators=200 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=10, n_estimators=200, score=0.731, total=   0.3s
[CV]  learning_rate=1, max_de

[CV]   learning_rate=1, max_depth=20, n_estimators=50, score=0.731, total=   0.1s
[CV]  learning_rate=1, max_depth=20, n_estimators=50 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=20, n_estimators=50, score=0.766, total=   0.1s
[CV]  learning_rate=1, max_depth=20, n_estimators=50 .................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=20, n_estimators=50, score=0.710, total=   0.1s
[CV]  learning_rate=1, max_depth

[CV]   learning_rate=1, max_depth=20, n_estimators=200, score=0.731, total=   0.3s
[CV]  learning_rate=1, max_depth=20, n_estimators=200 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=20, n_estimators=200, score=0.766, total=   0.3s
[CV]  learning_rate=1, max_depth=20, n_estimators=200 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=1, max_depth=20, n_estimators=200, score=0.701, total=   0.2s
[CV]  learning_rate=1, max_de

[CV]   learning_rate=0.5, max_depth=3, n_estimators=50, score=0.757, total=   0.1s
[CV]  learning_rate=0.5, max_depth=3, n_estimators=50 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=3, n_estimators=50, score=0.692, total=   0.1s
[CV]  learning_rate=0.5, max_depth=3, n_estimators=50 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=3, n_estimators=50, score=0.776, total=   0.1s
[CV]  learning_rate=0.5, max_

[CV]   learning_rate=0.5, max_depth=3, n_estimators=200, score=0.748, total=   0.2s
[CV]  learning_rate=0.5, max_depth=3, n_estimators=200 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=3, n_estimators=200, score=0.710, total=   0.4s
[CV]  learning_rate=0.5, max_depth=3, n_estimators=200 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=3, n_estimators=200, score=0.757, total=   0.2s
[CV]  learning_rate=0.5, m


[CV]   learning_rate=0.5, max_depth=5, n_estimators=50, score=0.692, total=   0.1s
[CV]  learning_rate=0.5, max_depth=5, n_estimators=100 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=5, n_estimators=100, score=0.778, total=   0.1s
[CV]  learning_rate=0.5, max_depth=5, n_estimators=100 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=5, n_estimators=100, score=0.741, total=   0.1s
[CV]  learning_rate=0.5, m

[CV]   learning_rate=0.5, max_depth=5, n_estimators=200, score=0.729, total=   0.2s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=10 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=10, n_estimators=10, score=0.796, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=10 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=10, n_estimators=10, score=0.731, total=   0.0s
[CV]  learning_rate=0.5, m

[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=0.796, total=   0.2s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=0.731, total=   0.2s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=0.757, total=   0.2s
[CV]  learning_rate=0.5

[CV]   learning_rate=0.5, max_depth=20, n_estimators=10, score=0.682, total=   0.0s
[CV]  learning_rate=0.5, max_depth=20, n_estimators=10 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=20, n_estimators=10, score=0.729, total=   0.0s
[CV]  learning_rate=0.5, max_depth=20, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=20, n_estimators=50, score=0.806, total=   0.1s
[CV]  learning_rate=0.5, m


[CV]   learning_rate=0.5, max_depth=20, n_estimators=100, score=0.692, total=   0.2s
[CV]  learning_rate=0.5, max_depth=20, n_estimators=100 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=20, n_estimators=100, score=0.738, total=   0.2s
[CV]  learning_rate=0.5, max_depth=20, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.5, max_depth=20, n_estimators=200, score=0.787, total=   0.3s
[CV]  learning_rate=0.

[CV]   learning_rate=0.1, max_depth=3, n_estimators=50, score=0.778, total=   0.1s
[CV]  learning_rate=0.1, max_depth=3, n_estimators=50 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=3, n_estimators=50, score=0.713, total=   0.1s
[CV]  learning_rate=0.1, max_depth=3, n_estimators=50 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=3, n_estimators=50, score=0.757, total=   0.1s
[CV]  learning_rate=0.1, max_

[CV]   learning_rate=0.1, max_depth=3, n_estimators=200, score=0.778, total=   0.2s
[CV]  learning_rate=0.1, max_depth=3, n_estimators=200 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=3, n_estimators=200, score=0.769, total=   0.5s
[CV]  learning_rate=0.1, max_depth=3, n_estimators=200 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=3, n_estimators=200, score=0.748, total=   0.2s
[CV]  learning_rate=0.1, m


[CV]   learning_rate=0.1, max_depth=5, n_estimators=50, score=0.778, total=   0.1s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=50 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=5, n_estimators=50, score=0.759, total=   0.1s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=50 ................
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=5, n_estimators=50, score=0.785, total=   0.1s
[CV]  learning_rate=0.1, max

[CV]   learning_rate=0.1, max_depth=5, n_estimators=200, score=0.778, total=   0.2s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=200 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=5, n_estimators=200, score=0.750, total=   0.3s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=200 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=5, n_estimators=200, score=0.785, total=   0.2s
[CV]  learning_rate=0.1, m

[CV]   learning_rate=0.1, max_depth=10, n_estimators=50, score=0.713, total=   0.1s
[CV]  learning_rate=0.1, max_depth=10, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=10, n_estimators=50, score=0.785, total=   0.1s
[CV]  learning_rate=0.1, max_depth=10, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=10, n_estimators=50, score=0.738, total=   0.1s
[CV]  learning_rate=0.1, m

[CV]   learning_rate=0.1, max_depth=10, n_estimators=200, score=0.731, total=   0.3s
[CV]  learning_rate=0.1, max_depth=10, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=10, n_estimators=200, score=0.757, total=   0.3s
[CV]  learning_rate=0.1, max_depth=10, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=10, n_estimators=200, score=0.738, total=   0.3s
[CV]  learning_rate=0.1

[CV]   learning_rate=0.1, max_depth=20, n_estimators=50, score=0.766, total=   0.1s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=20, n_estimators=50, score=0.710, total=   0.1s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=20, n_estimators=50, score=0.720, total=   0.1s
[CV]  learning_rate=0.1, m

[CV]   learning_rate=0.1, max_depth=20, n_estimators=200, score=0.766, total=   0.3s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=20, n_estimators=200, score=0.701, total=   0.2s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.1, max_depth=20, n_estimators=200, score=0.757, total=   0.3s
[CV]  learning_rate=0.0

[CV]   learning_rate=0.01, max_depth=3, n_estimators=50, score=0.692, total=   0.1s
[CV]  learning_rate=0.01, max_depth=3, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=3, n_estimators=50, score=0.776, total=   0.1s
[CV]  learning_rate=0.01, max_depth=3, n_estimators=100 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=3, n_estimators=100, score=0.759, total=   0.1s
[CV]  learning_rate=0.01,

[CV]   learning_rate=0.01, max_depth=3, n_estimators=200, score=0.710, total=   0.2s
[CV]  learning_rate=0.01, max_depth=3, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=3, n_estimators=200, score=0.757, total=   0.2s
[CV]  learning_rate=0.01, max_depth=5, n_estimators=10 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=5, n_estimators=10, score=0.787, total=   0.0s
[CV]  learning_rate=0.01

[CV]  learning_rate=0.01, max_depth=5, n_estimators=50 ...............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=5, n_estimators=50, score=0.692, total=   0.1s
[CV]  learning_rate=0.01, max_depth=5, n_estimators=100 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=5, n_estimators=100, score=0.778, total=   0.1s
[CV]  learning_rate=0.01, max_depth=5, n_estimators=100 ..............
Parameters: {  learning_rate } might n

[CV]   learning_rate=0.01, max_depth=5, n_estimators=200, score=0.720, total=   0.2s
[CV]  learning_rate=0.01, max_depth=5, n_estimators=200 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=5, n_estimators=200, score=0.729, total=   0.3s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=10 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=10, n_estimators=10, score=0.796, total=   0.0s
[CV]  learning_rate=0.0

[CV]   learning_rate=0.01, max_depth=10, n_estimators=50, score=0.738, total=   0.1s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=50 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=10, n_estimators=50, score=0.738, total=   0.1s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=100 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=10, n_estimators=100, score=0.796, total=   0.2s
[CV]  learning_rate=0.

[CV]   learning_rate=0.01, max_depth=10, n_estimators=200, score=0.738, total=   0.3s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=200 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=10, n_estimators=200, score=0.710, total=   0.3s
[CV]  learning_rate=0.01, max_depth=20, n_estimators=10 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=20, n_estimators=10, score=0.787, total=   0.0s
[CV]  learning_rate=0

[CV]   learning_rate=0.01, max_depth=20, n_estimators=50, score=0.720, total=   0.1s
[CV]  learning_rate=0.01, max_depth=20, n_estimators=100 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=20, n_estimators=100, score=0.778, total=   0.2s
[CV]  learning_rate=0.01, max_depth=20, n_estimators=100 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.01, max_depth=20, n_estimators=100, score=0.731, total=   0.2s
[CV]  learning_rate=0

[CV]   learning_rate=0.01, max_depth=20, n_estimators=200, score=0.757, total=   0.3s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=10 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=3, n_estimators=10, score=0.852, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=10 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=3, n_estimators=10, score=0.750, total=   0.0s
[CV]  learning_rate=0.

[CV]   learning_rate=0.001, max_depth=3, n_estimators=100, score=0.759, total=   0.3s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=100 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=3, n_estimators=100, score=0.722, total=   0.3s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=100 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=3, n_estimators=100, score=0.738, total=   0.1s
[CV]  learning_rate=


[CV]   learning_rate=0.001, max_depth=5, n_estimators=50, score=0.778, total=   0.1s
[CV]  learning_rate=0.001, max_depth=5, n_estimators=50 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=5, n_estimators=50, score=0.759, total=   0.1s
[CV]  learning_rate=0.001, max_depth=5, n_estimators=50 ..............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=5, n_estimators=50, score=0.785, total=   0.1s
[CV]  learning_rate=0.

[CV]   learning_rate=0.001, max_depth=5, n_estimators=200, score=0.778, total=   0.3s
[CV]  learning_rate=0.001, max_depth=5, n_estimators=200 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=5, n_estimators=200, score=0.750, total=   0.2s
[CV]  learning_rate=0.001, max_depth=5, n_estimators=200 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=5, n_estimators=200, score=0.785, total=   0.2s
[CV]  learning_rate=


[CV]   learning_rate=0.001, max_depth=10, n_estimators=50, score=0.713, total=   0.1s
[CV]  learning_rate=0.001, max_depth=10, n_estimators=50 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=10, n_estimators=50, score=0.785, total=   0.1s
[CV]  learning_rate=0.001, max_depth=10, n_estimators=50 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=10, n_estimators=50, score=0.738, total=   0.1s
[CV]  learning_rate

[CV]   learning_rate=0.001, max_depth=10, n_estimators=200, score=0.731, total=   0.2s
[CV]  learning_rate=0.001, max_depth=10, n_estimators=200 ............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=10, n_estimators=200, score=0.757, total=   0.3s
[CV]  learning_rate=0.001, max_depth=10, n_estimators=200 ............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=10, n_estimators=200, score=0.738, total=   0.2s
[CV]  learning_ra

[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=0.731, total=   0.1s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=50 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=0.766, total=   0.1s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=50 .............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=0.710, total=   0.1s
[CV]  learning_rate=

[CV]   learning_rate=0.001, max_depth=20, n_estimators=200, score=0.731, total=   0.3s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=200 ............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=20, n_estimators=200, score=0.766, total=   0.3s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=200 ............
Parameters: {  learning_rate } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV]   learning_rate=0.001, max_depth=20, n_estimators=200, score=0.701, total=   0.3s
[CV]  learning_ra

[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:   56.7s finished


GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, random_state=None,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, validate_parameter

In [72]:
# To  find the parameters givingmaximum accuracy
grid.best_params_

{' learning_rate': 1, 'max_depth': 3, 'n_estimators': 10}

In [73]:
# Create new model using the same parameters
new_model=XGBClassifier(learning_rate= 1, max_depth= 5, n_estimators= 50)
new_model.fit(train_x, train_y)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=1, max_delta_step=0, max_depth=5,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=50, n_jobs=4, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [74]:
y_pred_new = new_model.predict(test_x)
predictions_new = [round(value) for value in y_pred_new]
accuracy_new = accuracy_score(test_y,predictions_new)
accuracy_new

0.7445887445887446

In [75]:
# As we have increased the accuracy of the model, we'll save this model

In [76]:
filename = 'xgboost_model1.pickle'
pickle.dump(new_model, open(filename, 'wb'))

loaded_model = pickle.load(open(filename, 'rb'))

In [84]:
# we'll save the scaler object as well for prediction
filename_scaler = 'scaler_model1.pickle'
pickle.dump(scaler, open(filename_scaler, 'wb'))

scaler_model = pickle.load(open(filename_scaler, 'rb'))

In [88]:
# Trying a random prediction
d=scaler_model.transform([[6,148,72,35,80,33.6,0.627,50]])
pred=loaded_model.predict(d)
print('This data belongs to class :',pred[0])

This data belongs to class : 1
