In [12]:
import os

# set env variables
os.environ["OMP_NUM_THREADS"] = "56" # there are 56x2x2 = 224 threads available in the CPU
os.environ["NUM_GPUS_USED"] = "4" # set how many gpus we wish to utilize. should be 1-4, only used when use_gpu = True
os.environ["OMP_TARGET_OFFLOAD"] = "MANDATORY" # controls the program behavior when offloading a target region, only relevant when use_gpu = True
os.environ["IGC_ForceOCLSIMDWidth"] = "16" # maximize SIMD usage in the GPU
# os.environ["LIBOMPTARGET_PLUGIN_PROFILE"] = "T,usec" # enables basic plugin profiling and displays the result when program finishes. can be used to get HtD and DtH timings
# os.environ["LIBOMPTARGET_DEBUG"] = "1" # controls whether debugging information will be displayed from the offload runtime

# set variables
use_gpu = True # should be False for CPU ONLY, and True for usage of GPU offloading
RUN_CLASS = "A" # should be in S,W,A-D
VTUNE_OR_ADV_RUN = 0 # should be 0 or in 3-8
USE_TIMER = 0 # should be 0 or 1

# set constants
USER = os.environ.get("USER")
RUN_DIR = "NPB-OMP-GPU" if use_gpu else "NPB-OMP"
RUN_PATH = f"/home/{USER}/Multicore_Processors_and_Embedded_Systems/NAS-OMP_Project/{RUN_DIR}"
EXE_PATH = f"./bin/lu.{RUN_CLASS}"
TIMER_FLAG_FILE = "timer.flag"

VTUNE_CPU_HOTSPOTS_DIR = "./vtune_hotspots"
ADV_VEC_DIR = "./adv_vectorization"
ADV_OFFLOAD_DIR = "./adv_offload_model"
VTUNE_GPU_OFFLOAD_DIR = "./vtune_gpu_offload_unopt"
VTUNE_GPU_HOTSPOTS_DIR = "./vtune_gpu_hotspots_unopt"
ADV_GPU_ROOFLINE_DIR = "./adv_gpu_roofline_opt"

# navigate to path from which we compile the code
os.chdir(f"{RUN_PATH}")

# set timer flag file
if USE_TIMER == 0:
    if os.path.isfile(f"{TIMER_FLAG_FILE}"):
        os.system(f"mv {TIMER_FLAG_FILE} {TIMER_FLAG_FILE}.off")
elif USE_TIMER == 1:
    if os.path.isfile(f"{TIMER_FLAG_FILE}.off"):
        os.system(f"mv {TIMER_FLAG_FILE}.off {TIMER_FLAG_FILE}")

# compile code
os.system(f"make lu CLASS={RUN_CLASS}")

# execute code
if VTUNE_OR_ADV_RUN == 0:
    os.system(f"./bin/lu.{RUN_CLASS}")

# analysis with vtune and advisor
if VTUNE_OR_ADV_RUN == 3:
    os.system(f"rm -r {VTUNE_CPU_HOTSPOTS_DIR}")
    os.system(f"vtune -collect hotspots -knob sampling-mode=hw --result-dir={VTUNE_CPU_HOTSPOTS_DIR} -- {EXE_PATH}")
elif VTUNE_OR_ADV_RUN == 4:
    os.system(f"rm -r {ADV_VEC_DIR}")
    os.system(f"advisor --collect=survey --project-dir={ADV_VEC_DIR} -- {EXE_PATH}")
elif VTUNE_OR_ADV_RUN == 5:
    os.system(f"rm -r {ADV_OFFLOAD_DIR}")
    os.system(f"advisor --collect=offload --config=gen12_tgl --project-dir={ADV_OFFLOAD_DIR} -- {EXE_PATH}")
elif VTUNE_OR_ADV_RUN == 6 and use_gpu:
    os.system(f"rm -r {VTUNE_GPU_OFFLOAD_DIR}")
    os.system(f"vtune -collect gpu-offload  --result-dir={VTUNE_GPU_OFFLOAD_DIR} -- {EXE_PATH}")
elif VTUNE_OR_ADV_RUN == 7 and use_gpu:
    os.system(f"rm -r {VTUNE_GPU_HOTSPOTS_DIR}")
    os.system(f"vtune -collect gpu-hotspots --result-dir={VTUNE_GPU_HOTSPOTS_DIR} -- {EXE_PATH}")
elif VTUNE_OR_ADV_RUN == 8 and use_gpu:
    os.system(f"rm -r {ADV_GPU_ROOFLINE_DIR}")
    os.system(f"advisor --collect=roofline --profile-gpu --search-dir src:r=src --project-dir={ADV_GPU_ROOFLINE_DIR} -- {EXE_PATH}")

   =      NAS PARALLEL BENCHMARKS 4.1        =
   =      OpenMP Versions                    =
   =      C++                                =

cd LU; make CLASS=A
make[1]: Entering directory '/home/u1ac0f5875ac3fb97b35b88ea34e5b24/Multicore_Processors_and_Embedded_Systems/NAS-OMP_Project/NPB-OMP-GPU/LU'
make[2]: Entering directory '/home/u1ac0f5875ac3fb97b35b88ea34e5b24/Multicore_Processors_and_Embedded_Systems/NAS-OMP_Project/NPB-OMP-GPU/sys'
make[2]: Nothing to be done for 'all'.
make[2]: Leaving directory '/home/u1ac0f5875ac3fb97b35b88ea34e5b24/Multicore_Processors_and_Embedded_Systems/NAS-OMP_Project/NPB-OMP-GPU/sys'
../sys/setparams lu A
icpx -std=c++14  -c -I../common  -Ofast -qopenmp -xSAPPHIRERAPIDS -ipo   -fopenmp-targets=spir64   lu.cpp
icpx -std=c++14 -Ofast -qopenmp -xSAPPHIRERAPIDS -ipo -fopenmp-targets=spir64   -o ../bin/lu.A lu.o ../common/c_print_results.o ../common/c_timers.o ../common/c_wtime.o 
make[1]: Leaving directory '/home/u1ac0f5875ac3fb97b35b88ea34e5b24/Multico