# **Ray: Parallel & Distributed Computing**

In [1]:
%load_ext watermark
%watermark -a 'NavinKumarMNK' -v -p torch

Author: NavinKumarMNK

Python implementation: CPython
Python version       : 3.10.8
IPython version      : 8.8.0

torch: 1.13.1



# **Ray AI Runtime**

In [2]:
import ray
from pprint import PrettyPrinter

pp = PrettyPrinter(indent=4)
pprint = pp.pprint

## **Ray DataSet**

In [3]:
dataset = ray.data.read_csv('https://raw.githubusercontent.com/datasets/breast-cancer/master/data/breast-cancer.csv')

train_dataset, validation_dataset = dataset.train_test_split(test_size=0.2)
validation_dataset, test_dataset = validation_dataset.train_test_split(test_size=0.5)

print(train_dataset)
print(validation_dataset)
print(test_dataset)

2023-02-12 05:56:50,686	INFO worker.py:1538 -- Started a local Ray instance.
Read progress: 100%|████████████████████████████| 1/1 [00:00<00:00, 1138.52it/s]
Read progress: 100%|████████████████████████████| 1/1 [00:00<00:00, 1126.29it/s]

Dataset(num_blocks=1, num_rows=217, schema={age: string, mefalsepause: string, tumor-size: string, inv-falsedes: string, falsede-caps: bool, deg-malig: int64, breast: string, breast-quad: string, irradiat: bool, class: string})
Dataset(num_blocks=1, num_rows=27, schema={age: string, mefalsepause: string, tumor-size: string, inv-falsedes: string, falsede-caps: bool, deg-malig: int64, breast: string, breast-quad: string, irradiat: bool, class: string})
Dataset(num_blocks=1, num_rows=28, schema={age: string, mefalsepause: string, tumor-size: string, inv-falsedes: string, falsede-caps: bool, deg-malig: int64, breast: string, breast-quad: string, irradiat: bool, class: string})





In [6]:
train_dataset.show(1)

{'age': '40-49', 'mefalsepause': 'premefalse', 'tumor-size': '15-19', 'inv-falsedes': '0-2', 'falsede-caps': True, 'deg-malig': 3, 'breast': 'right', 'breast-quad': 'left_up', 'irradiat': False, 'class': 'recurrence-events'}


In [79]:
train_dataset.schema()

age: string
mefalsepause: string
tumor-size: string
inv-falsedes: string
falsede-caps: bool
deg-malig: int64
breast: string
breast-quad: string
irradiat: bool
class: string

In [98]:
from ray.data.preprocessors import *
preprocessor = Chain(
        SimpleImputer(columns=['falsede-caps'], strategy='constant', fill_value=False),
        StandardScaler(columns=["deg-malig"]),
        *(LabelEncoder(label_column=col) for col in ["age", "mefalsepause", "inv-falsedes", "tumor-size", "breast", "breast-quad", "class"]),
    )

In [99]:
print(preprocessor)

Chain(SimpleImputer(columns=['falsede-caps'], strategy='constant', fill_value=False), StandardScaler(columns=['deg-malig']), LabelEncoder(label_column='age'), LabelEncoder(label_column='mefalsepause'), LabelEncoder(label_column='inv-falsedes'), LabelEncoder(label_column='tumor-size'), LabelEncoder(label_column='breast'), LabelEncoder(label_column='breast-quad'), LabelEncoder(label_column='class'))


## **Ray XGBoost Scale-out**

In [100]:
from ray.air.config import ScalingConfig
try:
    from ray.train.xgboost import XGBoostTrainer
except ModuleNotFoundError:
    !pip install xgboost_ray
    from ray.train.xgboost import XGBoostTrainer

In [101]:
trainer = XGBoostTrainer(
                scaling_config=ScalingConfig(
                    num_workers = 2,
                    use_gpu=False,
                    
                ),
                label_column = "class",
                num_boost_round=20,
                params={
                    'objectie' :  'binary:logistics',
                    'eval_metric' : ["logloss", "error"],                   
                    'enable_categorical' : True
                },
                datasets={
                    "train" : train_dataset,
                    "valid" : test_dataset
                },
                preprocessor = preprocessor,
            )
           

In [103]:
model = trainer.fit()
print(model.metrics)

0,1
Current time:,2023-02-12 07:01:58
Running for:,00:00:06.30
Memory:,7.1/15.5 GiB

Trial name,status,loc,iter,total time (s),train-logloss,train-error,valid-logloss
XGBoostTrainer_072df_00000,TERMINATED,192.168.1.7:186380,21,4.33955,0.147306,0.0414747,1.78638


[2m[36m(_RemoteRayXGBoostActor pid=186452)[0m [07:01:57] task [xgboost.ray]:140490353421728 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=186451)[0m [07:01:57] task [xgboost.ray]:139633944096064 got new rank 1


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,train-error,train-logloss,training_iteration,trial_id,valid-error,valid-logloss,warmup_time
XGBoostTrainer_072df_00000,2023-02-12_07-01-58,True,,b86bab33f73746cf93e7e885552a851c,0,NavinKumarMNK,21,192.168.1.7,186380,True,4.33955,0.699057,4.33955,1676165518,0,,0.0414747,0.147306,21,072df_00000,0.25,1.78638,0.00866055


2023-02-12 07:01:59,079	INFO tune.py:762 -- Total run time: 6.41 seconds (6.30 seconds for the tuning loop).


{'train-logloss': 0.14730555501504702, 'train-error': 0.04147465437788019, 'valid-logloss': 1.7863764529382544, 'valid-error': 0.25, 'time_this_iter_s': 0.6990566253662109, 'should_checkpoint': True, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 21, 'trial_id': '072df_00000', 'experiment_id': 'b86bab33f73746cf93e7e885552a851c', 'date': '2023-02-12_07-01-58', 'timestamp': 1676165518, 'time_total_s': 4.339545965194702, 'pid': 186380, 'hostname': 'NavinKumarMNK', 'node_ip': '192.168.1.7', 'config': {}, 'time_since_restore': 4.339545965194702, 'timesteps_since_restore': 0, 'iterations_since_restore': 21, 'warmup_time': 0.008660554885864258, 'experiment_tag': '0'}


## **Ray Tune**

In [104]:
from ray import tune

param_space = {"params" : {
                "max_depth" : tune.randint(1, 9)
            }}

In [106]:
from ray.tune.tuner import Tuner, TuneConfig
tuner = Tuner (
    trainer, 
    param_space = param_space,
    tune_config = TuneConfig(
        num_samples=5,
        metric="train-logloss",
        mode="min"
    )
)
result_grid = tuner.fit()
best_result = result_grid.get_best_result()

  tuner = Tuner (


0,1
Current time:,2023-02-12 07:02:25
Running for:,00:00:14.74
Memory:,6.7/15.5 GiB

Trial name,status,loc,params/max_depth,iter,total time (s),train-logloss,train-error,valid-logloss
XGBoostTrainer_11b30_00000,TERMINATED,192.168.1.7:186759,2,21,9.02208,0.454106,0.198157,0.551062
XGBoostTrainer_11b30_00001,TERMINATED,192.168.1.7:186923,7,21,8.42697,0.0857333,0.0184332,1.94026
XGBoostTrainer_11b30_00002,TERMINATED,192.168.1.7:186925,1,21,8.63722,0.521009,0.253456,0.531917
XGBoostTrainer_11b30_00003,TERMINATED,192.168.1.7:186927,4,21,9.79514,0.290872,0.119816,0.460679
XGBoostTrainer_11b30_00004,TERMINATED,192.168.1.7:186929,1,21,9.57499,0.521009,0.253456,0.531917


[2m[36m(_RemoteRayXGBoostActor pid=187022)[0m [07:02:16] task [xgboost.ray]:140483418598768 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=187023)[0m [07:02:16] task [xgboost.ray]:139762663550224 got new rank 1


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,train-error,train-logloss,training_iteration,trial_id,valid-error,valid-logloss,warmup_time
XGBoostTrainer_11b30_00000,2023-02-12_07-02-21,True,,735c77eae0304c35a99eaef6e4d23ec0,0_max_depth=2,NavinKumarMNK,21,192.168.1.7,186759,True,9.02208,1.03867,9.02208,1676165541,0,,0.198157,0.454106,21,11b30_00000,0.214286,0.551062,0.00871205
XGBoostTrainer_11b30_00001,2023-02-12_07-02-24,True,,091a35d90c9e4bdf95f027a8fd7fe0b2,1_max_depth=7,NavinKumarMNK,21,192.168.1.7,186923,True,8.42697,0.386513,8.42697,1676165544,0,,0.0184332,0.0857333,21,11b30_00001,0.321429,1.94026,0.0138314
XGBoostTrainer_11b30_00002,2023-02-12_07-02-24,True,,04419bddf8cf49f99b289baece3baa62,2_max_depth=1,NavinKumarMNK,21,192.168.1.7,186925,True,8.63722,0.759419,8.63722,1676165544,0,,0.253456,0.521009,21,11b30_00002,0.214286,0.531917,0.0163476
XGBoostTrainer_11b30_00003,2023-02-12_07-02-25,True,,95ae9526c4134a389c3e2084bb18097d,3_max_depth=4,NavinKumarMNK,21,192.168.1.7,186927,True,9.79514,0.47133,9.79514,1676165545,0,,0.119816,0.290872,21,11b30_00003,0.214286,0.460679,0.010201
XGBoostTrainer_11b30_00004,2023-02-12_07-02-24,True,,5ef80c050aa547d1b71f9f0d681942cf,4_max_depth=1,NavinKumarMNK,21,192.168.1.7,186929,True,9.57499,0.537809,9.57499,1676165544,0,,0.253456,0.521009,21,11b30_00004,0.214286,0.531917,0.0109296


[2m[36m(_RemoteRayXGBoostActor pid=187438)[0m [07:02:22] task [xgboost.ray]:140542966230240 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=187439)[0m [07:02:22] task [xgboost.ray]:140314307374256 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=187907)[0m [07:02:22] task [xgboost.ray]:139997623184560 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=187661)[0m [07:02:22] task [xgboost.ray]:139850901361696 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=187660)[0m [07:02:22] task [xgboost.ray]:140624658345232 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=187857)[0m [07:02:22] task [xgboost.ray]:140256167521600 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=187444)[0m [07:02:23] task [xgboost.ray]:140276604202352 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=187445)[0m [07:02:23] task [xgboost.ray]:140489448418624 got new rank 1
2023-02-12 07:02:25,170	INFO tune.py:762 -- Total run time: 14.85 seconds (14.72 seconds for the tuning loop).


In [107]:
print(best_result)

Result(metrics={'train-logloss': 0.08573327591667683, 'train-error': 0.0184331797235023, 'valid-logloss': 1.9402604209691552, 'valid-error': 0.32142857142857145, 'should_checkpoint': True, 'done': True, 'trial_id': '11b30_00001', 'experiment_tag': '1_max_depth=7'}, error=None, log_dir=PosixPath('/home/mnk/ray_results/XGBoostTrainer_2023-02-12_07-02-10/XGBoostTrainer_11b30_00001_1_max_depth=7_2023-02-12_07-02-12'))


## **Ray Predict**

In [109]:
test_dataset.drop_columns(cols="class")

Map_Batches: 100%|████████████████████████████████| 1/1 [00:00<00:00,  1.17it/s]


VBox(children=(HTML(value='<h2>Dataset</h2>'), Tab(children=(HTML(value='<div class="scrollableTable jp-Render…

In [118]:
from ray.train.batch_predictor import BatchPredictor
from ray.train.xgboost import XGBoostPredictor

checkpoint = best_result.checkpoint
batch_prediction = BatchPredictor.from_checkpoint(checkpoint, XGBoostPredictor)
prediction = batch_prediction.predict(test_dataset)
prediction.show()

Map Progress (1 actors 1 pending): 100%|██████████| 1/1 [00:01<00:00,  1.30s/it]


RayTaskError(XGBoostError): [36mray::BlockWorker.map_block_nosplit()[39m (pid=192538, ip=192.168.1.7, repr=<ray.data._internal.compute.BlockWorker object at 0x7fd64eef08e0>)
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/data/_internal/compute.py", line 315, in map_block_nosplit
    return _map_block_nosplit(
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/data/_internal/compute.py", line 487, in _map_block_nosplit
    for new_block in block_fn(blocks, *fn_args, **fn_kwargs):
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/data/dataset.py", line 595, in transform
    yield from process_next_batch(batch)
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/data/dataset.py", line 577, in process_next_batch
    batch = batch_fn(batch, *fn_args, **fn_kwargs)
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/train/batch_predictor.py", line 266, in __call__
    prediction_output_batch: DataBatchType = self._predictor.predict(
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/train/xgboost/xgboost_predictor.py", line 122, in predict
    return Predictor.predict(
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/train/predictor.py", line 204, in predict
    return self._predict_pandas(data, **kwargs)
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/ray/train/xgboost/xgboost_predictor.py", line 163, in _predict_pandas
    df = pd.DataFrame(self.model.predict(matrix, **predict_kwargs))
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/core.py", line 2163, in predict
    _check_call(
  File "/home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/core.py", line 279, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [07:20:21] ../src/learner.cc:1510: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (9 vs. 10) : Number of columns does not match number of features in booster.
Stack trace:
  [bt] (0) /home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2c7c79) [0x7fd63f0c7c79]
  [bt] (1) /home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2d32c5) [0x7fd63f0d32c5]
  [bt] (2) /home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2d3368) [0x7fd63f0d3368]
  [bt] (3) /home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2ddb0e) [0x7fd63f0ddb0e]
  [bt] (4) /home/mnk/python3/envs/pytorch/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2ab) [0x7fd63ef3237b]
  [bt] (5) /home/mnk/python3/envs/pytorch/lib/python3.10/lib-dynload/../../libffi.so.8(+0xa052) [0x7fd7d4077052]
  [bt] (6) /home/mnk/python3/envs/pytorch/lib/python3.10/lib-dynload/../../libffi.so.8(+0x88cd) [0x7fd7d40758cd]
  [bt] (7) /home/mnk/python3/envs/pytorch/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x93ef) [0x7fd7d40873ef]
  [bt] (8) /home/mnk/python3/envs/pytorch/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x865a) [0x7fd7d408665a]