From 63c2ae4b030536e765fb19b4f0291d720ec8a0e9 Mon Sep 17 00:00:00 2001 From: Ansh Gupta <38015438+anshgupta1234@users.noreply.github.com> Date: Fri, 31 Mar 2023 15:15:20 -0400 Subject: [PATCH] Add docs for Nsight Update running.md Update running.md Co-Authored-By: Henry Le Berre --- docs/documentation/running.md | 6 ++++++ toolchain/mfc/args.py | 8 ++++---- toolchain/mfc/run/engines.py | 8 ++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/docs/documentation/running.md b/docs/documentation/running.md index a029c77aca..90ece94502 100644 --- a/docs/documentation/running.md +++ b/docs/documentation/running.md @@ -89,6 +89,12 @@ modified by users. **Disclaimer**: IBM's JSRUN on LSF-managed computers does not use the traditional node-based approach to allocate resources. Therefore, the MFC constructs equivalent resource-sets in task and GPU count. +### Profiling with NVIDIA Nsight + +MFC provides two different argument to facilitate profiling with NVIDIA Nsight. **Please ensure that the used argument is placed at the end so that their respective flags can be appended.** +- Nsight Systems (Nsys): `./mfc.sh run ... --nsys [nsys flags]` allows one to visualize MFC's system-wide performance with [NVIDIA Nsight Systems](https://developer.nvidia.com/nsight-systems). NSys is best for getting a general understanding of the order and execution times of major subroutines (WENO, Riemann, etc.) in MFC. When used, `--nsys` will run the simulation and generate `.nsys-rep` files in the case directory for all targets. These files can then be imported into Nsight System's GUI, which can be downloaded [here](https://developer.nvidia.com/nsight-systems/get-started#latest-Platforms). It is best to run case files with a few timesteps so that the report files remain small. Learn more about NVIDIA Nsight Systems [here](https://docs.nvidia.com/nsight-systems/UserGuide/index.html). +- Nsight Compute (NCU): `./mfc.sh run ... --ncu [ncu flags]` allows one to conduct kernel-level profiling with [NVIDIA Nsight Compute](https://developer.nvidia.com/nsight-compute). NCU provides profiling information for every subroutine called and is more detailed than NSys. When used, `--ncu` will output profiling information for all subroutines, including elapsed clock cycles, memory used, and more after the simulation is run. Please note that adding this argument will significantly slow down the simulation and should only be used on case files with a few timesteps. Learn more about NVIDIA Nsight Compute [here](https://docs.nvidia.com/nsight-compute/NsightCompute/index.html). + ### Restarting Cases When running a simulation, MFC generates a `./restart_data` folder in the case directory that contains `lustre_*.dat` files that can be used to restart a simulation from saved timesteps. This allows a user to run a simulation to some timestep $X$, then later continue it to run to another timestep $Y$, where $Y > X$. The user can also choose to add new patches at the intermediate timestep. diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index ccbea11887..253346c099 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -69,7 +69,7 @@ def add_common_arguments(p, mask = None): test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with UUIDs or hashes L.") test.add_argument("-b", "--binary", choices=binaries, type=str, default=None, help="(Serial) Override MPI execution binary") test.add_argument("-r", "--relentless", action="store_true", default=False, help="Run all tests, even if multiple fail.") - test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") + test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") # === RUN === @@ -86,11 +86,11 @@ def add_common_arguments(p, mask = None): run.add_argument("-a", "--account", metavar="ACCOUNT", type=str, default="", help="(Batch) Account to charge.") run.add_argument("-@", "--email", metavar="EMAIL", type=str, default="", help="(Batch) Email for job notification.") run.add_argument("-#", "--name", metavar="NAME", type=str, default="MFC", help="(Batch) Job name.") - run.add_argument("-f", "--flags", metavar="FLAGS", nargs="+", type=str, default=[], help="(Batch) Additional batch options.") + run.add_argument("-f", "--flags", metavar="FLAGS", nargs='+', type=str, default=[], help="(Batch) Additional batch options.") run.add_argument("-b", "--binary", choices=binaries, type=str, default=None, help="(Interactive) Override MPI execution binary") run.add_argument("-s", "--scratch", action="store_true", default=False, help="Build from scratch.") - run.add_argument("--ncu", action="store_true", default=False, help="Profile with NVIDIA Nsight Compute.") - run.add_argument("--nsys", action="store_true", default=False, help="Profile with NVIDIA Nsight Systems.") + run.add_argument("--ncu", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Compute.") + run.add_argument("--nsys", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Systems.") run.add_argument( "--dry-run", action="store_true", default=False, help="(Batch) Run without submitting batch file.") run.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") run.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py index 2e9b0e66b8..8ecfc5e3d5 100644 --- a/toolchain/mfc/run/engines.py +++ b/toolchain/mfc/run/engines.py @@ -8,18 +8,18 @@ def profiler_prepend(): - if ARG("ncu"): + if ARG("ncu") is not None: if not common.does_command_exist("ncu"): raise common.MFCException("Failed to locate [bold green]NVIDIA Nsight Compute[/bold green] (ncu).") return ["ncu", "--nvtx", "--mode=launch-and-attach", - "--cache-control=none", "--clock-control=none"] + "--cache-control=none", "--clock-control=none"] + ARG("ncu") - if ARG("nsys"): + if ARG("nsys") is not None: if not common.does_command_exist("nsys"): raise common.MFCException("Failed to locate [bold green]NVIDIA Nsight Systems[/bold green] (nsys).") - return ["nsys", "profile", "--stats=true", "--trace=mpi,nvtx,openacc"] + return ["nsys", "profile", "--stats=true", "--trace=mpi,nvtx,openacc"] + ARG("nsys") return []