### This example is a notebook version of code from - https://github.com/dmlc/xgboost/blob/release_0.82/demo/gpu_acceleration/cover_type.py

### Import xgboost and other required modules

In [None]:
import xgboost as xgb
import numpy as np
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
import time

### Check the installed version of xgboost

In [None]:
xgb.__version__

### Fetch dataset using sklearn

In [None]:
cov = fetch_covtype()
X = cov.data
y = cov.target

### Create 0.75/0.25 train/test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
                                                    random_state=42)

### Convert input data from numpy to XGBoost format

In [None]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

### Specify sufficient boosting iterations to reach desirable accuracy

In [None]:
# To train a model with higher accuracy num_rounds has to be increased e.g. 3000
# as shown in here https://github.com/dmlc/xgboost/blob/bf3241368256ddd010d30d98ffc8a0a005f166e9/demo/gpu_acceleration/cover_type.py#L17
# but it takes a very long time to train in a CPU!
# Reduced the num_rounds to a very low value for CPU training to complete in reasonable time
num_round = 20

### Set parameters to use one GPU (leave most other parameters as default)

In [None]:
param_gpu = {'objective': 'multi:softmax', # Specify multiclass classification
             'num_class': 8, # Number of possible output classes
             'tree_method': 'gpu_hist', # Use GPU accelerated algorithm
             'n_gpus': 1, # Use one GPU
             'gpu_id': 0  # Use GPU with device-id 0
            }

### Train model (using GPU) 

In [None]:
gpu_res = {} # Store accuracy result
tmp = time.time()
xgb.train(param_gpu, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

### Set parameters to use CPUs (leave most other parameters as default)


In [None]:
param_cpus = {'objective': 'multi:softmax', # Specify multiclass classification
              'num_class': 8, # Number of possible output classes
              'tree_method': 'hist', # Use CPU accelerated algorithm
             }

### Train model using CPU algorithm

In [None]:
cpu_res = {}
tmp = time.time()
xgb.train(param_cpus, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res)
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))