In [1]:
############################################################################
##
## Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
##
## NVIDIA Sample Code
##
## Please refer to the NVIDIA end user license agreement (EULA) associated
## with this source code for terms and conditions that govern your use of
## this software. Any use, reproduction, disclosure, or distribution of
## this software and related documentation outside the terms of the EULA
## is strictly prohibited.
##
############################################################################

In this notebook we train XGBoost for fraud detection on Tabformer features.

In [1]:
import os
import subprocess

import dask_cudf
import cupy as cp
from dask_cuda import LocalCUDACluster
from dask.distributed import Client, wait
from utils import device_mem_size, get_rmm_size

import xgboost as xgb

## Set up paths

In [2]:
BASE_DIR = "./basedir"
dask_space = os.path.join(BASE_DIR, "dask_space")
# Define our processed data
processed_path = os.path.join(BASE_DIR, "processed_data_1gpu")

## Set up Dask Cluster

In [3]:
device_pool_frac = 0.8 # allocate 80% of total GPU memory on each GPU
device_size = device_mem_size(kind="total")
device_pool_size = int(device_pool_frac * device_size)

protocol = "ucx"  

# Select GPUs to place workers. Here 1st and 2nd GPU are used
visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0,1")

# Get the IP Address
cmd = "hostname --all-ip-addresses"
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
IPADDR = str(output.decode()).split()[0]
    
if protocol == 'ucx':
    cluster = LocalCUDACluster(
    ip=IPADDR,
    protocol=protocol,
    CUDA_VISIBLE_DEVICES=visible_devices,
    rmm_pool_size=get_rmm_size(device_pool_size),
    local_directory=dask_space,
    device_memory_limit=0.8,
    enable_tcp_over_ucx=True,
    enable_nvlink=True)
else:
    cluster = LocalCUDACluster(
    ip=IPADDR,
    protocol=protocol,
    CUDA_VISIBLE_DEVICES=visible_devices,
    rmm_pool_size=get_rmm_size(device_pool_size),
    local_directory=dask_space,
    device_memory_limit=0.8)
    

# Create the distributed client
client = Client(cluster)
client

distributed.preloading - INFO - Import preload module: dask_cuda.initialize
distributed.preloading - INFO - Import preload module: dask_cuda.initialize


0,1
Connection method: Cluster object,Cluster type: LocalCUDACluster
Dashboard: http://172.17.0.3:8787/status,

0,1
Status: running,Using processes: True
Dashboard: http://172.17.0.3:8787/status,Workers: 2
Total threads:  2,Total memory:  251.82 GiB

0,1
Comm: ucx://172.17.0.3:52139,Workers: 2
Dashboard: http://172.17.0.3:8787/status,Total threads:  2
Started:  Just now,Total memory:  251.82 GiB

0,1
Comm: ucx://172.17.0.3:59561,Total threads: 1
Dashboard: http://172.17.0.3:35733/status,Memory: 125.91 GiB
Nanny: ucx://172.17.0.3:54095,
Local directory: /workspace/basedir/dask_space/dask-worker-space/worker-jfr5obrg,Local directory: /workspace/basedir/dask_space/dask-worker-space/worker-jfr5obrg
GPU: Tesla V100-DGXS-32GB,GPU memory: 31.75 GiB

0,1
Comm: ucx://172.17.0.3:37657,Total threads: 1
Dashboard: http://172.17.0.3:37729/status,Memory: 125.91 GiB
Nanny: ucx://172.17.0.3:52437,
Local directory: /workspace/basedir/dask_space/dask-worker-space/worker-rhbtqtng,Local directory: /workspace/basedir/dask_space/dask-worker-space/worker-rhbtqtng
GPU: Tesla V100-DGXS-32GB,GPU memory: 31.75 GiB


In [4]:
X_train = dask_cudf.read_parquet(os.path.join(processed_path, 'X_train.parquet'))
X_test = dask_cudf.read_parquet(os.path.join(processed_path, 'X_test.parquet'))

In [5]:
y_train = X_train['label']
y_test = X_test['label']
X_train = X_train[X_train.columns.difference(['label'])]
X_test = X_test[X_test.columns.difference(['label'])]

In [6]:
# drop card_id and merch_id columns
X_train = X_train.drop(columns=['card_id', 'merchant_id'])
X_test = X_test.drop(columns=['card_id', 'merchant_id'])

In [7]:
X_train, X_test, y_train, y_test = client.persist(
        [X_train, X_test, y_train, y_test]
    )

wait([X_train, X_test, y_train, y_test]);

In [8]:
# parameters here are not optimized. Feel free to experiment!
xgb_params = {
    'max_depth':         3,
    'learning_rate':     0.1,
    'gamma':             0.1,
    'tree_method':       'gpu_hist',
    'objective':         'binary:logistic',
    'eval_metric':       'aucpr',
    'predictor':         'gpu_predictor',
}

In [9]:
dtrain = xgb.dask.DaskDMatrix(client, X_train, y_train)
dtest = xgb.dask.DaskDMatrix(client, X_test, y_test)

In [10]:
n_rounds = 100

In [11]:
model = xgb.dask.train(client=client, 
                       params=xgb_params, 
                       dtrain=dtrain, 
                       num_boost_round=n_rounds)

[23:24:55] task [xgboost.dask]:ucx://172.17.0.3:59561 got new rank 0
[23:24:55] task [xgboost.dask]:ucx://172.17.0.3:37657 got new rank 1


## Predictions and assessment

In [12]:
# make predictions on the test set using our trained model
y_score = xgb.dask.predict(client, model, dtest).compute()
y_true = y_test.astype(int).compute()

from sklearn.metrics import average_precision_score

y_true = y_true.to_array()
ap = average_precision_score(y_true, y_score)
print(f'Test Area Under Precision Recall Curve: {ap: 0.4f}')

Test Area Under Precision Recall Curve:  0.6933


## Important Cleanup

Run the code below to shutdown the running dask cluster and free GPU resources.

In [13]:
client.close()

In [14]:
client.shutdown()

**NOTE: Also please restart the kernel before moving on to the next notebook.**