# Remote Compile
In this notebook, we will demonstrate the use of `remotemanager` for compiling the code. This is not something you would usually do; working out of the shell on your remote machine should be sufficient for most use cases. But consider the case where you want to compile with a few different option sets. In that case, having a notebook that performs the different builds in an automated way can be helpful.

First define the computer we want to use.

In [None]:
from spring import SpringLogin
from os.path import join

url = SpringLogin()
url.conda = "cmake"
install_loc = join("binaries", "rc", "build")
upstream_loc = join("binaries", "rc", "upstream")
source_loc = join("devel")

In [None]:
%load_ext remotemanager

## Compilation
First we need to download the release version of BigDFT.

In [None]:
%%sanzu url=url
%%sargs sdir = source_loc
from os.path import expanduser, join
from os import system, chdir, getcwd

old_dir = getcwd()
try:
    chdir(join(expanduser("~"), sdir))
    burl = "https://gitlab.com/l_sim/bigdft-suite/-/archive/1.9.4/bigdft-suite-1.9.4.tar.gz"
    ret = system("wget " + burl + " > /dev/null 2>&1")
    if ret != 0:
        raise Exception("Trouble with wget")
    ret = system("tar -xvf bigdft-suite-1.9.4.tar.gz > /dev/null 2>&1")
    if ret != 0:
        raise Exception("Trouble with untar")
finally:
    chdir(old_dir)
    
join(expanduser("~"), sdir)

Now we need to create the `build.rc` file. 

In [None]:
def env_configuration():
    env = {}
    env["FC"] = "mpiifort"
    env["CC"] = "icc"
    env["CXX"] = "icpc"
    env["FCFLAGS"] = fcflags
    env["FCFLAGS"] += ' -I"${MKLROOT}/include"'
    env["CXXFLAGS"] = "-std=c++11"
    env["LIBS"] = "-lstdc++"
    env["--with-ext-linalg"] = algebra_flags

    return " ".join(['"' + x + '=' + y + '"' for x, y in env.items()])

In [None]:
def ntpoly_configuration():
    ''' 
    For NTPoly we need to specify the cmake options.
    ''' 
    from os import getcwd, path

    cmake_flags = {}
    cmake_flags["CMAKE_Fortran_FLAGS_RELEASE"] = fcflags
    cmake_flags["CMAKE_Fortran_COMPILER"] = "mpiifort"
    cmake_flags["CMAKE_C_COMPILER"] = "mpiicc"
    cmake_flags["CMAKE_CXX_COMPILER"] = "mpiicpc"
    cmake_flags["CMAKE_PREFIX_PATH"] = path.join(getcwd(), "install")

    return " ".join(['-D' + x + '="' + y + '"' for x, y in cmake_flags.items()])

In [None]:
def buildrc(flags, algebra_flags):
    from inspect import getsource
    ostr = ""
    ostr += "fcflags = '" + flags + "'"
    ostr += "\n"
    ostr += "algebra_flags = '" + algebra_flags + "'"
    ostr += "\n"
    ostr += getsource(env_configuration)
    ostr += "\n"
    ostr += getsource(ntpoly_configuration)
    ostr += "\n"
    ostr += "autogenargs = env_configuration()\n"
    ostr += "module_cmakeargs.update({ 'ntpoly': ntpoly_configuration() })\n"
    
    return ostr

Let's write a function that does the building.

In [None]:
from remotemanager import RemoteFunction

@RemoteFunction
def builds(sdir, bdir, udir, buildrc, upstream=False):
    from os.path import expanduser, join
    from os import system, chdir, getcwd, makedirs

    # Directory structure
    source = join(expanduser("~"), sdir, "bigdft-suite-1.9.4")
    updir = join(expanduser("~"), udir)
    build = join(expanduser("~"), bdir)
    makedirs(build, exist_ok=True)
    makedirs(updir, exist_ok=True)
    
    # Write the buildrc
    if upstream:
        with open(join(updir, "buildrc"), "w") as ofile:
            ofile.write(buildrc)
    else:
        with open(join(build, "buildrc"), "w") as ofile:
            ofile.write("extra_prefixes=['" + join(expanduser("~"), udir, "install") + "']\n")
            ofile.write(buildrc)

    # CD in and Run
    old_dir = getcwd()
    try:
        if upstream:
            chdir(updir)
            ret = system("python " + join(source, "bundler", "jhbuild.py") + 
                         " -f buildrc build upstream-suite")
        else:
            chdir(build)
            ret = system("python " + join(source, "Installer.py -y build -a no_upstream"))
        if ret != 0:
            raise Exception("compilation failed")
    finally:
        chdir(old_dir)

Upstream comes first.

In [None]:
algebra_flags = "-L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 " + \
                "-lmkl_intel_thread -lmkl_core -liomp5 " + \
                " -lpthread -lm -ldl"
fflags = "-O2 -qopenmp"

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, True)

Now the main BigDFT source.

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, False)

What about a version that uses `O3`? 

In [None]:
install_loc = join("binaries", "rc", "build-O3")
fflags = "-O3 -qopenmp"

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, False)

We can also try some processor specific optimizations for the Skylake processor we are targeting.

In [None]:
install_loc = join("binaries", "rc", "build-avx")
fflags = "-O2 -qopenmp -xSKYLAKE-AVX512"

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, False)

And interprocedural optimization.

In [None]:
install_loc = join("binaries", "rc", "build-ipo")
fflags = "-O2 -qopenmp -ipo"

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, False)

Let's try GCC as well.

In [None]:
url.scl = True

In [None]:
def env_configuration():
    env = {}
    env["FC"] = "mpif90"
    env["CC"] = "gcc"
    env["CXX"] = "g++"
    env["FCFLAGS"] = fcflags
    env["FCFLAGS"] += ' -I"${MKLROOT}/include" -static-libgfortran'
    env["LIBS"] = "-lstdc++"
    env["--with-ext-linalg"] = algebra_flags

    return " ".join(['"' + x + '=' + y + '"' for x, y in env.items()])

In [None]:
def ntpoly_configuration():
    ''' 
    For NTPoly we need to specify the cmake options.
    ''' 
    from os import getcwd, path

    cmake_flags = {}
    cmake_flags["CMAKE_Fortran_FLAGS_RELEASE"] = fcflags
    cmake_flags["CMAKE_Fortran_COMPILER"] = "mpif90"
    cmake_flags["CMAKE_C_COMPILER"] = "mpicc"
    cmake_flags["CMAKE_CXX_COMPILER"] = "mpicxx"
    cmake_flags["CMAKE_PREFIX_PATH"] = path.join(getcwd(), "install")

    return " ".join(['-D' + x + '="' + y + '"' for x, y in cmake_flags.items()])

In [None]:
install_loc = join("binaries", "rc", "build-gcc")
upstream_loc = join("binaries", "rc", "upstream-gcc")
fflags = "-O2 -fopenmp -march=skylake-avx512"

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, True)

In [None]:
%%sanzu url=url
%%sargs idir = install_loc, udir = upstream_loc, sdir = source_loc
%%sargs rc = buildrc(fflags, algebra_flags)
builds(sdir, idir, udir, rc, False)

## Running
Now that we've built several versions, let's try comparing the performance. First, define a computer to use with compute nodes.

In [None]:
from spring import Spring
curl = Spring()
curl.conda = "thermal_iop"
curl.mpi = 4
curl.omp = 11
curl.queue = "spring1"

Then a comprehensive function.

In [None]:
@RemoteFunction
def get_times(sname, geom):
    from BigDFT.IO import read_pdb
    from BigDFT.Inputfiles import Inputfile
    from BigDFT.Calculators import SystemCalculator
    from yaml import load, SafeLoader
    
    # System
    with open(geom + ".pdb") as ifile:
        sys = read_pdb(ifile)
    
    # Input
    inp = Inputfile()
    inp.set_xc("PBE")
    inp.set_hgrid(0.5)
    inp.set_rmult(coarse=5.0, fine=7.0)
    inp.set_psp_nlcc()
    
    # Compute
    calc = SystemCalculator(skip=True)
    
    # Determine the run name
    log = calc.run(sys=sys, input=inp, name=sname + "_" + geom)
    
    # Return the time
    with open("time-" + sname + "_" + geom + ".yaml") as ifile:
        time = load(ifile, Loader=SafeLoader)
    return {x: y[1] for x, y in time["WFN_OPT"]["Classes"].items()}

We can run this using the various builds.

In [None]:
times = {}
geom = "2CzPN_2"

In [None]:
curl.path_to_bigdft = "~/binaries/rc/build"

In [None]:
%%sanzu url=curl, extra_files_send=[geom + ".pdb"]
%%sargs sname = "intel-o2", geom=geom
get_times(sname, geom)

In [None]:
curl.path_to_bigdft = "~/binaries/rc/build-O3"

In [None]:
%%sanzu url=curl, extra_files_send=[geom + ".pdb"]
%%sargs sname = "intel-o3", geom=geom
get_times(sname, geom)

In [None]:
curl.path_to_bigdft = "~/binaries/rc/build-avx"

In [None]:
%%sanzu url=curl, extra_files_send=[geom + ".pdb"]
%%sargs sname = "intel-avx", geom=geom
get_times(sname, geom)

In [None]:
curl.path_to_bigdft = "~/binaries/rc/build-ipo"

In [None]:
%%sanzu url=curl, extra_files_send=[geom + ".pdb"]
%%sargs sname = "intel-ipo", geom=geom
get_times(sname, geom)

In [None]:
curl.path_to_bigdft = "~/binaries/rc/build-gcc"

In [None]:
%%sanzu url=curl, extra_files_send=[geom + ".pdb"]
%%sargs sname = "gcc", geom=geom
get_times(sname, geom)

Compare the results.

In [None]:
times = {}
times["Intel/O2"] = magic_dataset.results[0]
times["Intel/O3"] = magic_dataset.results[1]
times["Intel/AVX-Skylake"] = magic_dataset.results[2]
times["Intel/IPO"] = magic_dataset.results[3]
times["GCC"] = magic_dataset.results[4]

In [None]:
order = sorted(list(times), key=lambda x: times[x]["Total"], reverse=True)

In [None]:
from matplotlib import pyplot as plt

fig, axs = plt.subplots(figsize=(6, 3))
width = .1
offset = -2*width
for k in order:
    v = times[k]
    xvals = range(len(v.values()))
    axs.bar([x + offset for x in xvals], [v[x] for x in v], 
            width, label=k)
    axs.set_xticks(xvals)
    axs.set_xticklabels(list(v), rotation=90)
    offset += width
axs.set_ylabel("Time (s)", fontsize=14)
_ = axs.legend()
plt.savefig("compiler.png", dpi=300, bbox_inches="tight")