# A Guided Tour of Ray Core

In [1]:
import ray
import logging
from icecream import ic

ray.init(
    ignore_reinit_error=True,
    logging_level=logging.ERROR,
)

{'node_ip_address': '192.168.1.248',
 'raylet_ip_address': '192.168.1.248',
 'redis_address': '192.168.1.248:6379',
 'object_store_address': '/tmp/ray/session_2021-02-23_14-09-02_485431_123184/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-02-23_14-09-02_485431_123184/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2021-02-23_14-09-02_485431_123184',
 'metrics_export_port': 58273,
 'node_id': '1beae4e1662e26609ca4aff57df443dfed1db531139e98e93f1db592'}

In [None]:
import time

# just a regular Python function...
def my_function ():
    return 1

# By adding the `@ray.remote` decorator, a regular Python function
# becomes a Ray remote function.
@ray.remote
def my_function():
    return 1

# to invoke this remote function, use the `remote` method.
# This will immediately return an object ref (a future) and then create
# a task that will be executed on a worker prohttp://127.0.0.1:8265cess.
obj_ref = my_function.remote()

# the result can be retrieved with `ray.get`
assert ray.get(obj_ref) == 1

@ray.remote
def slow_function():
  time.sleep(10)
  return 1

# invocations of Ray remote functions happen in parallel, and all computation
# gets performed in the background, driven by Ray's internal event loop
for i in range(4):
    # this does not block.
    slow_function.remote()
    ic(i)

## Remote Objects

In [None]:
# put an object in Ray's object store
y = 1
object_ref = ray.put(y)

# get the value of one object ref
obj_ref = ray.put(1)

ic(ray.get(obj_ref))
assert ray.get(obj_ref) == 1

# get the values of multiple object refs in parallel
assert ray.get([ray.put(i) for i in range(3)]) == [0, 1, 2]

In [None]:
# set a timeout to return early from a `get` that is blocking for too long
from ray.exceptions import GetTimeoutError

@ray.remote
def long_running_function ():
    time.sleep(5)

obj_ref = long_running_function.remote()
ic(ray.get(obj_ref))

try:
    ray.get(obj_ref, timeout=4)
except GetTimeoutError:
    print("`get` timed out")

## Remote Classes

In [None]:
@ray.remote
class Counter (object):
    def __init__ (self):
        self.value = 0

    def increment (self):
        self.value += 1
        return self.value

# create an actor from this class
counter = Counter.remote()

# call the actor
obj_ref = counter.increment.remote()

ic(ray.get(obj_ref))
assert ray.get(obj_ref) == 1

## Parallel Iterators

In [None]:
import numpy as np

@ray.remote
def train (data_shard):
    for batch in data_shard:
        print("train on", batch)  # perform model update with batch

para_iter = (
    ray.util.iter.from_range(10, num_shards=2, repeat=True)
        .batch(3)
        .for_each(np.array)
)

work = [train.remote(shard) for shard in para_iter.shards()]
ray.get(work)

## Multiprocessing Pool

In [None]:
from ray.util.multiprocessing import Pool

def f (index):
    return index

pool = Pool()

for result in pool.map(f, range(10)):
    ic(result)

## JobLib

In [2]:
import numpy as np

from sklearn.datasets import load_digits
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

digits = load_digits()

param_space = {
    "C": np.logspace(-6, 6, 30),
    "gamma": np.logspace(-8, 8, 30),
    "tol": np.logspace(-4, -1, 30),
    "class_weight": [None, "balanced"],
}

model = SVC(kernel="rbf")
clf = RandomizedSearchCV(model, param_space, cv=5, n_iter=300, verbose=10)

ic(clf);

ic| clf: RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=300,
                            param_distributions={'C': array([1.00000000e-06, 2.59294380e-06, 6.72335754e-06, 1.74332882e-05,
                4.52035366e-05, 1.17210230e-04, 3.03919538e-04, 7.88046282e-04,
                2.04335972e-03, 5.29831691e-03, 1.37382380e-02, 3.56224789e-02,
                9.23670857e-02, 2.39502662e-01, 6.21016942e-01, 1.61026203e+00,
                4.17531894e+00, 1.08263673e+...
                2.80721620e+07, 1.00000000e+08]),
                                                 'tol': array([0.0001    , 0.0001269 , 0.00016103, 0.00020434, 0.00025929,
                0.00032903, 0.00041753, 0.00052983, 0.00067234, 0.00085317,
                0.00108264, 0.00137382, 0.00174333, 0.00221222, 0.00280722,
                0.00356225, 0.00452035, 0.00573615, 0.00727895, 0.00923671,
                0.01172102, 0.01487352, 0.01887392, 0.02395027, 0.03039195,
                0.0385662 , 0.04893901, 0.06210

In [3]:
import joblib
from ray.util.joblib import register_ray

register_ray()

with joblib.parallel_backend("ray"):
    search = clf.fit(digits.data, digits.target)
    ic(search)
    ic(search.best_params_)

Fitting 5 folds for each of 300 candidates, totalling 1500 fits
[2m[36m(pid=123301)[0m [CV 3/5; 1/300] START C=72.78953843983146, class_weight=balanced, gamma=13738.23795883261, tol=0.014873521072935119
[2m[36m(pid=123295)[0m [CV 1/5; 1/300] START C=72.78953843983146, class_weight=balanced, gamma=13738.23795883261, tol=0.014873521072935119
[2m[36m(pid=123299)[0m [CV 2/5; 2/300] START C=0.00011721022975334806, class_weight=balanced, gamma=100000000.0, tol=0.004520353656360241
[2m[36m(pid=123296)[0m [CV 3/5; 2/300] START C=0.00011721022975334806, class_weight=balanced, gamma=100000000.0, tol=0.004520353656360241
[2m[36m(pid=123303)[0m [CV 2/5; 1/300] START C=72.78953843983146, class_weight=balanced, gamma=13738.23795883261, tol=0.014873521072935119
[2m[36m(pid=123300)[0m [CV 4/5; 2/300] START C=0.00011721022975334806, class_weight=balanced, gamma=100000000.0, tol=0.004520353656360241
[2m[36m(pid=123304)[0m [CV 3/5; 6/300] START C=1000000.0, class_weight=None, gamma=2

ic| search: RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=300,
                               param_distributions={'C': array([1.00000000e-06, 2.59294380e-06, 6.72335754e-06, 1.74332882e-05,
                   4.52035366e-05, 1.17210230e-04, 3.03919538e-04, 7.88046282e-04,
                   2.04335972e-03, 5.29831691e-03, 1.37382380e-02, 3.56224789e-02,
                   9.23670857e-02, 2.39502662e-01, 6.21016942e-01, 1.61026203e+00,
                   4.17531894e+00, 1.08263673e+...
                   2.80721620e+07, 1.00000000e+08]),
                                                    'tol': array([0.0001    , 0.0001269 , 0.00016103, 0.00020434, 0.00025929,
                   0.00032903, 0.00041753, 0.00052983, 0.00067234, 0.00085317,
                   0.00108264, 0.00137382, 0.00174333, 0.00221222, 0.00280722,
                   0.00356225, 0.00452035, 0.00573615, 0.00727895, 0.00923671,
                   0.01172102, 0.01487352, 0.01887392, 0.02395027, 0.03039195,
           

[2m[36m(pid=123299)[0m [CV 5/5; 300/300] END C=4.520353656360241e-05, class_weight=balanced, gamma=1.6102620275609392e-06, tol=0.02395026619987486; total time=   0.2s


class_weight': None,
                          'gamma': 0.0009236708571873865,
                          'tol': 0.02395026619987486}


In [4]:
search.best_params_

{'tol': 0.02395026619987486,
 'gamma': 0.0009236708571873865,
 'class_weight': None,
 'C': 1.6102620275609392}