# Setting up a Ray cluster with SmartSim

## 1. Start the cluster
We set up a SmartSim experiment, which will handle the launch of the Ray cluster.

First we import the relevant modules.

In [1]:
from smartsim import Experiment, slurm
from smartsim.ray import RayCluster

On a Slurm system, we need to get an allocation.

In [2]:
NUM_WORKERS = 5
alloc = slurm.get_allocation(nodes=NUM_WORKERS+1, time="12:00:00")

07:04:26 osprey.us.cray.com SmartSim[123519] INFO Allocation successful with Job ID: 230489


In [3]:
exp = Experiment("ray-cluster", launcher='slurm')

cluster = RayCluster(name="ray-cluster", path='', launcher='slurm', workers=NUM_WORKERS, alloc=alloc)
exp.generate(cluster, overwrite=True)

exp.start(cluster, block=False, summary=False)

07:04:43 osprey.us.cray.com SmartSim[123519] INFO Working in previously created experiment
07:05:15 osprey.us.cray.com SmartSim[123519] INFO Ray cluster launched on nodes: ['prod-0031', 'prod-0035', 'prod-0033', 'prod-0036', 'prod-0034', 'prod-0032']


## 2. Start the ray driver script

In [4]:
cluster.start_ray_job('/lus/sonexion/arigazzi/smartsim-dev/SmartSim/tutorials/05_starting_ray/templates/ppo_tune.py')

## 3. Stop cluster and release allocation

In [7]:
exp.stop(cluster)

06:24:50 osprey.us.cray.com SmartSim[27790] INFO Stopping model worker_nodes with job name worker_nodes-CB6YHA9U2VXG
06:24:53 osprey.us.cray.com SmartSim[27790] INFO Stopping model head_node with job name head_node-CB6YH8AR09GV


In [9]:
slurm.release_allocation(alloc)

05:51:53 osprey.us.cray.com SmartSim[131434] INFO Releasing allocation: 230194
05:51:53 osprey.us.cray.com SmartSim[131434] INFO Successfully freed allocation 230194


In [6]:
!squeue

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            230066     bdw18 interact  builder  R    1:15:25      8 prod-[0001-0008]
            230213     bdw18 SmartSim arigazzi  R      31:11      6 prod-[0031-0036]
            230364     bdw18 Chpl-dis  chapelu  R       0:40     16 prod-[0037-0052]
