# Run ATS Demo 04 Superslab on NERSC (Cori)

## Before running this notebook

1. You must first **use the nersc_login.ipynb notebook to login to NERSC**, which saves a "NEWT session id" to your home directory (`~/.newt_sessionid.txt`). That file is read in when using this notebook. The session id typically expires every 12-14 days.

2. The first code cell in this noteboook sets the working directory. For this to work, you **must restart the kernel each time you run the first code cell**.

In [1]:
import os

# Change working dir so that we can import local module
# The following code works for me but might not be portable
# And you MUST RESTART THE KERNEL BEFORE RUNNING THIS CELL!
working_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
os.chdir(working_dir)
print('Working directory {}'.format(working_dir))
from reshpc import nersc_interface as rn


Working directory /home/john/projects/resonant-hpc/git/resonantrpc/dev/jupyter


In [2]:
nersc = rn.NerscInterface()
nersc.login()

sending command...


'OK'

In [3]:
# Get path to Cori $SCRATCH folder
scratch_folder = nersc.get_scratch_folder()
scratch_folder

sending command...


'/global/cscratch1/sd/johnt'

In [4]:
# Make sure root folder for superslab is on Cori
remote_root_folder = '{}/reshpc/demo04-superslab'.format(scratch_folder)
nersc.make_folder(remote_root_folder)

sending command...


'OK'

In [5]:
# Check if model file is in the root folder
model_filename = 'super_slab.exo'
local_model_path = os.path.join(working_dir, os.pardir, 'cmb', 'simulation-workflows', 'tests', 'test_demos', model_filename)
assert os.path.exists(local_model_path), 'model file not found: {}'.format(local_model_path)

remote_model_path = '{}/{}'.format(remote_root_folder, model_filename)
exists = nersc.file_exists(remote_model_path)
print('exists on Cori? {}'.format(exists))

sending command...
exists on Cori? True


In [6]:
# If the file doesn't exist on Cori, upload it now (1.1 MB)
if not exists:
    nersc.upload_file(local_model_path, remote_root_folder)
    print('Done')
else:
    print('OK')

OK


In [7]:
# Generate new job folder, name yymmdd_hhmm
import datetime
now = datetime.datetime.now()
datetime_code = now.strftime('%y%m%d-%H%M')
datetime_code

job_folder = '{}/{}'.format(remote_root_folder, datetime_code)
print('job_folder {}'.format(job_folder))

nersc.make_folder(job_folder)

job_folder /global/cscratch1/sd/johnt/reshpc/demo04-superslab/201026-1545
sending command...


'OK'

In [8]:
# Upload ats spec file file
xml_file = 'demo_04_superslab.xml'
local_file = os.path.join(working_dir, 'data', xml_file)
assert os.path.exists(local_file)

nersc.upload_file(local_file, job_folder)

sending command...


'OK'

In [9]:
# Specify Slurm script
ats = '/project/projectdirs/m2398/ideas/ats/install/cori/ats-0.88-basic/RelWithDebInfo/PrgEnv-gnu-6.0.5/bin/ats'
timeout_min = 15
nodes = 2
cores_per_node = 2

slurm_commands = [
    '#!/bin/bash',
    '#SBATCH --account=m2398',
    '#SBATCH --chdir={}'.format(job_folder),
    '#SBATCH --partition=debug',
    '#SBATCH --time=0:{}:0'.format(timeout_min),
    '#SBATCH --nodes={}'.format(nodes),
    '#SBATCH --tasks-per-node={}'.format(cores_per_node),
    '#SBATCH --constraint=haswell',
    'ulimit -s unlimited',
    'srun {} --xml_file={}'.format(ats, xml_file),
    ''
]
slurm_string = '\n'.join(slurm_commands)
print(slurm_string)

#!/bin/bash
#SBATCH --account=m2398
#SBATCH --chdir=/global/cscratch1/sd/johnt/reshpc/demo04-superslab/201026-1545
#SBATCH --partition=debug
#SBATCH --time=0:15:0
#SBATCH --nodes=2
#SBATCH --tasks-per-node=2
#SBATCH --constraint=haswell
ulimit -s unlimited
srun /project/projectdirs/m2398/ideas/ats/install/cori/ats-0.88-basic/RelWithDebInfo/PrgEnv-gnu-6.0.5/bin/ats --xml_file=demo_04_superslab.xml



In [10]:
# Submit job
result = nersc.submit_job(slurm_string, job_folder)
print(result)

uploading slurm script...
submitting job...
{'status': 'OK', 'error': '', 'jobid': '35534891'}


In [13]:
# Execute this cell until the job state is complete,
# generally this means the job state is NOT one of "PD", "R", "CG".
# All of the Slurm job state codes are listed at https://slurm.schedmd.com/squeue.html#lbAG

job_id = result.get('jobid')
if job_id:
    info = nersc.get_job_info(job_id, verbose=True)
#     print(info)
    state = info.get('status')
    label = nersc.job_state_label(state)
    print('Job State: {} ({})'.format(state, label))
    if state in ['PD', 'R', 'CG']:
        print('NOT done => continue checking')
else:
    print('Job was not submitted - go back and check for error in previous cell')

sending command...
Job State: CD (Completed)


In [14]:
# Once the job is done, list the contents of the NERSC folder
for item in nersc.list_folder(job_folder, glob_pattern='visdump_*'):
    name = item.get('name')
    size = int(item.get('size', 0))
    row = '{:8}  {}'.format(size, name)
    print(row)

sending command...
    9291  visdump_data.VisIt.xmf
130088040  visdump_data.h5
    4283  visdump_data.h5.0.xmf
    4315  visdump_data.h5.100.xmf
    4315  visdump_data.h5.101.xmf
    4315  visdump_data.h5.102.xmf
    4315  visdump_data.h5.103.xmf
    4315  visdump_data.h5.104.xmf
    4315  visdump_data.h5.105.xmf
    4315  visdump_data.h5.106.xmf
    4315  visdump_data.h5.107.xmf
    4315  visdump_data.h5.108.xmf
    4315  visdump_data.h5.109.xmf
    4299  visdump_data.h5.11.xmf
    4315  visdump_data.h5.110.xmf
    4315  visdump_data.h5.111.xmf
    4315  visdump_data.h5.112.xmf
    4315  visdump_data.h5.113.xmf
    4315  visdump_data.h5.114.xmf
    4315  visdump_data.h5.115.xmf
    4315  visdump_data.h5.116.xmf
    4315  visdump_data.h5.117.xmf
    4315  visdump_data.h5.118.xmf
    4315  visdump_data.h5.119.xmf
    4315  visdump_data.h5.120.xmf
    4315  visdump_data.h5.121.xmf
    4315  visdump_data.h5.122.xmf
    4315  visdump_data.h5.123.xmf
    4315  visdump_data.h5.124.xmf
    43

In [15]:
# Download the visdump files to a local folder
# With the current api, we explicitly generate a tgz file on Cori then download and untar it.

tarfile_name = 'visdump.tgz'
nersc_tarfile = '{}/reshpc/{}'.format(scratch_folder, tarfile_name)
nersc.make_tgzfile(job_folder, glob_pattern='visdump_*', tarfile=nersc_tarfile)

# Create local data folder and download tar file
local_folder = os.path.expanduser('~/.reshpc/data/{}'.format(datetime_code))
print('Using local folder {}'.format(local_folder))
if os.path.exists(local_folder):
    shutil.rmtree(local_folder)
os.makedirs(local_folder)
nersc.download_file(nersc_tarfile, local_folder)


sending command...
Using local folder /home/john/.reshpc/data/201026-1545
sending command...


'OK'

In [16]:
# Expand tarfile
import tarfile

# tarfile_name = 'data.tgz'
local_tarfile = os.path.join(local_folder, tarfile_name)
tar = tarfile.open(local_tarfile)
tar.extractall(local_folder)
tar.close()

# Optional
# os.remove(local_tarfile)


In [18]:
# List downloaded files
dirs = os.listdir(local_folder)
dirs.sort()
for file in dirs:
    print(file)


visdump.tgz
visdump_data.VisIt.xmf
visdump_data.h5
visdump_data.h5.0.xmf
visdump_data.h5.100.xmf
visdump_data.h5.101.xmf
visdump_data.h5.102.xmf
visdump_data.h5.103.xmf
visdump_data.h5.104.xmf
visdump_data.h5.105.xmf
visdump_data.h5.106.xmf
visdump_data.h5.107.xmf
visdump_data.h5.108.xmf
visdump_data.h5.109.xmf
visdump_data.h5.11.xmf
visdump_data.h5.110.xmf
visdump_data.h5.111.xmf
visdump_data.h5.112.xmf
visdump_data.h5.113.xmf
visdump_data.h5.114.xmf
visdump_data.h5.115.xmf
visdump_data.h5.116.xmf
visdump_data.h5.117.xmf
visdump_data.h5.118.xmf
visdump_data.h5.119.xmf
visdump_data.h5.120.xmf
visdump_data.h5.121.xmf
visdump_data.h5.122.xmf
visdump_data.h5.123.xmf
visdump_data.h5.124.xmf
visdump_data.h5.125.xmf
visdump_data.h5.126.xmf
visdump_data.h5.127.xmf
visdump_data.h5.128.xmf
visdump_data.h5.129.xmf
visdump_data.h5.13.xmf
visdump_data.h5.130.xmf
visdump_data.h5.131.xmf
visdump_data.h5.132.xmf
visdump_data.h5.133.xmf
visdump_data.h5.134.xmf
visdump_data.h5.135.xmf
visdump_data.h5.1