# Benchmark Runner
After the prep work is done you can run the benchmark. First, pick your computer of choice.

In [None]:
import ipywidgets as widgets
w = widgets.Dropdown(options=["ims", "fugaku", "hokusai",
                              "hokudai", "archer2"],
                     description="Computer:")
display(w)

Then we have to do some basic configuration.

In [None]:
from remotemanager import Dataset
from remotemanager.transport import rsync, scp

from time import sleep
from os.path import join

from remotemanager import Logger
Logger.path = 'benchmark_remotemanager_log'
Logger.level = 'debug'

computer = w.value

indir = "inp"
natoms = 54  # number of atoms in a molecule

# Small System to Compute
geom = "2CzPN_2"

# Large Systems to Compute
geoms = ["2CzPN_14", "2CzPN_18", "2CzPN_22", "2CzPN_26",
         "2CzPN_30", "2CzPN_34", "2CzPN_38", "2CzPN_42",
         "2CzPN_50", "2CzPN_58", "2CzPN_66", "2CzPN_74",
         "2CzPN_82", "2CzPN_90", "2CzPN_98", "2CzPN_106",
         "2CzPN_114", "2CzPN_122", "2CzPN_138", "2CzPN_154"
        ]

In [None]:
%load_ext remotemanager

Setup your computer.

In [None]:
transport = None
from ims import IMS, IMSLogin
from remotemanager import URL
url = IMS()
url.mpi = 1
url.omp = 1
url.time = 1800
url.path_to_bigdft = "/home/users/anp/binaries/gcc"
remote_dir = join("runs", "2023", "benchmark-technical")

furl = IMSLogin()
furl.path_to_bigdft = "/home/users/anp/binaries/gcc"

# For scanning the MPI/OMP choices
mpi_omp = [(4, 32), (8, 16), (16, 8), (32, 4), (64, 2)]

# Parameters for the large calculations
large_mpi = 128
large_omp = 8
large_elapse = 14400

Set transport to rsync if not overridden

In [None]:
if transport is None:
    transport = rsync(url)

Validate that the url is working

In [None]:
url.cmd('ls')

A routine that runs a calculation using the correct parameters for benchmarking.

In [None]:
def treat_system(sname, mpi, omp, indir=indir, linear=False):
    from BigDFT.IO import read_pdb
    from BigDFT.Inputfiles import Inputfile
    from BigDFT.Calculators import SystemCalculator
    from os.path import join
    
    # System
    with open(join(indir, sname + ".pdb")) as ifile:
        sys = read_pdb(ifile)
    
    # Input
    inp = Inputfile()
    inp.set_xc("PBE")
    inp.set_hgrid(0.5)
    inp.set_rmult(coarse=5.0, fine=7.0)
    inp.set_psp_nlcc()
    
    if linear:
        inp["import"] = "linear"
        inp["perf"] = {"check_sumrho": 0, "check_overlap": 0}
        inp["lin_general"] = {"charge_multipoles": 0, "output_mat": 0}
    
    # Compute
    calc = SystemCalculator(skip=True)
    
    # Determine the run name
    run_name = sname + "_" + str(mpi) + "_" + str(omp)
    log = calc.run(sys=sys, input=inp, name=run_name)

## Cubic Single Node Performance
The first benchmark checks the single node performance using a few combinations of threads and mpi ranks.

In [None]:
single_ds = Dataset(function = treat_system,
                    url = url, 
                    transport = transport,
                    extra_files_send=[indir],
                    dbfile="single_db_" + computer)

Add runs with each parameter.

In [None]:
for comb in mpi_omp:
    args = {"sname": geom, "mpi": comb[0], "omp": comb[1]}
    single_ds.append_run(arguments = args, mpi=comb[0], omp=comb[1],
                         remote_dir=remote_dir)

In [None]:
single_ds.run()

In [None]:
while not single_ds.all_finished:
    print('not finished, sleeping')
    sleep(120)

Fetch the time to solution data.

In [None]:
%%sanzu url=furl, transport=transport
%%sanzu remote_dir=remote_dir, dbfile="single_fetch_" + computer 
%%sargs mpi_omp=mpi_omp, sname=geom
from yaml import load, SafeLoader
data = {}
for r in mpi_omp:
    rname = sname + "_" + str(r[0]) + "_" + str(r[1])
    with open("time-" + rname + ".yaml") as ifile:
        full = load(ifile, Loader=SafeLoader)
    data[rname] = {"time": full["WFN_OPT"]["Classes"]["Total"][1]}
data

In [None]:
times = magic_dataset.results[0]

Plot.

In [None]:
from matplotlib import pyplot as plt
fig, axs = plt.subplots(1, 1, figsize=(4, 3))
axs.plot([x["time"] for x in times.values()] , 'k.', markersize=14)
axs.set_ylabel("Time (s)", fontsize=14)
axs.set_xticks(range(len(list(times))))
axs.set_xticklabels(["MPI:" + str(comb[0]) + 
                     "; OMP:" + str(comb[1]) 
                     for comb in mpi_omp], rotation=90)
pass

## Linear Scaling Calculations
We will now switch to the linear scaling mode. We will run calculations of various size on a reasonable sized partition, and examine the performance.

In [None]:
LVC_set = Dataset(function = treat_system,
                  url = url, dbfile="scale_db_" + computer)
url.time = large_elapse

In [None]:
for g in geoms:
    args = {"sname": g, "mpi": large_mpi, "omp": large_omp, "linear": True}
    LVC_set.append_run(arguments = args, remote_dir=remote_dir,
                       mpi=large_mpi, omp=large_omp)

In [None]:
LVC_set.run()

In [None]:
while not single_ds.all_finished:
    sleep(120)

Fetch the times.

In [None]:
%%sanzu url=furl, remote_dir=remote_dir
%%sanzu dbfile="scale_fetch_" + computer
%%sargs geoms=geoms, mpi=large_mpi, omp=large_omp
from yaml import load, SafeLoader
timing = {}
for g in geoms:
    run_name = g + "_" + str(mpi) + "_" + str(omp)
    with open("time-" + run_name + ".yaml") as ifile:
        full = load(ifile, Loader=SafeLoader)
    timing[run_name] = {"time": full["WFN_OPT"]["Classes"]["Total"][1]}

memory = {}
for g in geoms:
    run_name = g + "_" + str(mpi) + "_" + str(omp)
    with open("log-" + run_name + ".yaml") as ifile:
        full = load(ifile, Loader=SafeLoader)
    memory[run_name] = full["Memory Consumption Report"]["Memory occupation"]["Peak Value (MB)"]
timing, memory

In [None]:
magic_dataset.fetch_results()
times, memory = magic_dataset.results[0]

Plot the time vs. number of atoms.

In [None]:
from matplotlib import pyplot as plt
fig, axs = plt.subplots(2, 1, figsize=(5, 4))
nats = [int(x.split("_")[-1])*54 for x in geoms]
minutes = [times[x + "_" + str(large_mpi) + 
                 "_" + str(large_omp)]["time"]/60 for x in geoms]
axs[0].plot([(m/a) * large_mpi*large_omp 
            for m, a in zip(minutes, nats)] , 
            'k.', markersize=14, label="Time")
axs[0].set_title("CPU Minutes per Atom", fontsize=12)
axs[0].set_xticks(range(len(list(times))))
axs[0].set_xticklabels(nats, rotation=90)
axs[0].set_yticks(range(12, 22, 2))

mem = [memory[x + "_" + str(large_mpi) + 
             "_" + str(large_omp)] for x in geoms]
axs[1].plot([m / 1024
            for m, a in zip(mem, nats)] , 
           'k.', markersize=14, label="Memory")
axs[1].set_xlabel("Number of Atoms", fontsize=14)
axs[1].set_title("Peak Memory per Process (GB)", fontsize=12)
axs[1].set_xticks(range(len(list(times))))
axs[1].set_xticklabels(nats, rotation=90)
axs[1].set_yticks(range(0, 20, 4))

# axs.legend()
fig.tight_layout()
fig.savefig("benchmark.png", dpi=300)