# Understanding the Dask-cuDF and Dask-XGBoost APIs

### Disable NCCL P2P. Only necessary for versions of NCCL < 2.4

In [None]:
%env NCCL_P2P_DISABLE=1

### Import necessary modules and initialize the Dask-cuDF Cluster

Use `LocalCUDACluster` from Dask-CUDA to instantiate the single-node cluster

In [None]:
import cudf
import dask
import dask_cudf
import dask_xgboost
import pandas as pd
import numpy as np

from dask.distributed import Client, wait
from dask_cuda import LocalCUDACluster

import subprocess

cmd = "hostname --all-ip-addresses"
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
IPADDR = str(output.decode()).split()[0]

cluster = LocalCUDACluster(ip=IPADDR)
client = Client(cluster)
client

### Initialize a Random Dataset

Use `dask_cudf.DataFrame.query` to split the dataset into train-and-test sets

In [None]:
size = 1000000
npartitions = 8

pdf = pd.DataFrame({'x': np.random.randint(0, npartitions, size=size), 'y': np.random.normal(size=size)})
pdf = dask.dataframe.from_pandas(pdf, npartitions=npartitions)

ddf = dask_cudf.from_dask_dataframe(pdf)

x_train = ddf.query('y < 0.5')
y_train = x_train[['x']]

x_test = ddf.query('y > 0.5')
y_test = x_test[['x']]

### Define Parameters and Train with XGBoost

Use `dask_cudf.DataFrame.persist()` to ensure each GPU worker has ownership of data before training for optimal load-balance

In [None]:
params = {
  'num_rounds':   100,
  'max_depth':    8,
  'max_leaves':   2**8,
  'n_gpus':       1,
  'tree_method':  'gpu_hist',
  'objective':    'reg:squarederror',
  'grow_policy':  'lossguide'
}

x_train = x_train.persist()
y_train = y_train.persist()

bst = dask_xgboost.train(client, params, x_train, y_train, num_boost_round=params['num_rounds'])

### Compute Predictions and the RMSE Metric for the Model

Use `dask.dataframe.multi.concat` to build a `dask_cudf.DataFrame` from `[dask_cudf.Series]` to leverage a cleaner API for computing RMSE

In [None]:
pred = dask_xgboost.predict(client, bst, x_test)
test = dask.dataframe.multi.concat([pred], axis=1)

test['squared_error'] = (test[0] - y_test['x'])**2

In [None]:
rmse = np.sqrt(test.squared_error.mean().compute())
print('rmse value:', rmse)