In [None]:
%%capture
%%bash

# %%capture -> suppresses the output of the cell

# Setup the enviroment

module load unstable python-dev python

python -m venv ./python-venv
source ./python-venv/bin/activate

pip install psutil

In [None]:
# change the width of the current notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
%%capture

# Setup the enviroment

import os
import os.path
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.ticker as ticker
import subprocess

plt.rcParams['font.size'] = '16'

gs = globals()

In [None]:
%%capture

# Submit jobs for the performance monitoring

# You can always call env and check the available environment variables

# CI has already allocated a job for the runner.
# Therefore we take all the relevant info from the already initialized env vars.
ACCOUNT_=!sacct --format=account -j $SLURM_JOBID | tail -n1 | tr -d ' '
ACCOUNT_ = ACCOUNT_[0]

SJP_ = !echo $SLURM_JOB_PARTITION
SJP_ = SJP_[0]

# submitted job ids
JOBS_ = []

# *****************************************************************************
# Strong Scaling
# *****************************************************************************
# --constraint=clx, i.e., Cascade Lake nodes (Intel codename clx)
# --cpus-per-task=2 -> no multithreading, i.e., one process/task every physical core
# --exclusive, i.e., exclusive use of the node
# --mem=0, i.e., use the whole memory of the node
common_strong_scaling_params="--account={} --partition={} --constraint=clx \
    --cpus-per-task=2 --exclusive --mem=0 --output=%x.out strong_scaling.batch".format(ACCOUNT_, SJP_)

'''
_sjob=!sbatch --time=0-08:00:00 --job-name=strong_scaling_32   --nodes=1   --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-04:00:00 --job-name=strong_scaling_64   --nodes=2   --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-02:00:00 --job-name=strong_scaling_128  --nodes=4   --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-01:00:00 --job-name=strong_scaling_256  --nodes=8   --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
'''

_sjob=!sbatch --time=0-00:05:00 --job-name=TEST1  --nodes=1  --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
        
'''
_sjob=!sbatch --time=0-00:15:00 --job-name=strong_scaling_1024 --nodes=32  --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
        
_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_2048 --nodes=64  --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_4096 --nodes=128 --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
        
_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_8192 --nodes=256 --ntasks-per-node=32 $common_strong_scaling_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
'''
        
# *****************************************************************************
# Caliper
# *****************************************************************************
common_caliper_params="--account={} --partition={} --constraint=clx \
    --cpus-per-task=2 --exclusive --mem=0 --output=%x.out caliper.batch".format(ACCOUNT_, SJP_)

'''
_sjob=!sbatch --time=0-04:00:00 --job-name=strong_scaling_32   --nodes=1   --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-02:00:00 --job-name=strong_scaling_64   --nodes=2   --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-01:00:00 --job-name=strong_scaling_128  --nodes=4   --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
'''

_sjob=!sbatch --time=0-00:05:00 --job-name=TEST2  --nodes=1   --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

'''
_sjob=!sbatch --time=0-00:15:00 --job-name=strong_scaling_512  --nodes=16  --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
        
_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_1024 --nodes=32  --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])

_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_2048 --nodes=64  --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
        
_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_4096 --nodes=128 --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
        
_sjob=!sbatch --time=0-00:10:00 --job-name=strong_scaling_8192 --nodes=256 --ntasks-per-node=32 $common_caliper_params | tail -n 1 | grep -o "job.*" | cut -c 5-
JOBS_.append(_sjob[-1])
'''

In [None]:
%%capture
import time

# Wait until all jobs are finished

print("Waiting for all the jobs to finish ...")

user_ = !echo $SLURM_JOB_USER
print("user_ : ", user_[0])

perf_jobs = 1 # just an initialization
squeue = !squeue -u $SLURM_JOB_USER
while (perf_jobs != 0):
    time.sleep(1 * 60) # wait for 1 minute and check again if the jobs are finished!
    squeue = !squeue -u $SLURM_JOB_USER
    perf_jobs = 0
    for i,row in enumerate(squeue):
        row_ = row.split(" ")
        row_ = [el for el in row_ if el !=""]
        if i == 0:
            for j,name in enumerate(row_):
                if name == 'JOBID':
                    break
            continue
        if row_[j] in JOBS_:
            perf_jobs += 1

print("All the submitted jobs have finished.")
print("Next step: Graph Generation.")