From 1bfa31202ae585e72f711259d3d8a3888317f86d Mon Sep 17 00:00:00 2001 From: Stephen Shao Date: Sun, 21 Sep 2025 23:42:34 -0400 Subject: [PATCH] update all the Makefiles to replace the deprecated nvprof with modern NVIDIA Nsight tools. --- modules/module1/examples/Makefile | 2 +- modules/module2/examples/Makefile | 6 +++--- modules/module3/examples/Makefile | 2 +- modules/module4/examples/Makefile | 8 ++++---- modules/module5/examples/Makefile | 11 ++++++----- modules/module6/examples/Makefile | 2 +- modules/module7/examples/Makefile | 2 +- modules/module8/examples/Makefile | 2 +- modules/module9/examples/Makefile | 2 +- 9 files changed, 19 insertions(+), 18 deletions(-) diff --git a/modules/module1/examples/Makefile b/modules/module1/examples/Makefile index 799301a..fe99f79 100644 --- a/modules/module1/examples/Makefile +++ b/modules/module1/examples/Makefile @@ -206,7 +206,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif diff --git a/modules/module2/examples/Makefile b/modules/module2/examples/Makefile index fa5edf4..e563026 100644 --- a/modules/module2/examples/Makefile +++ b/modules/module2/examples/Makefile @@ -159,7 +159,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif @@ -260,8 +260,8 @@ profile_memory: cuda @echo " ncu --metrics l1tex__throughput.avg.pct_of_peak_sustained_elapsed ./02_memory_coalescing_cuda" @echo " ncu --metrics dram__bytes_read.sum,dram__bytes_write.sum ./05_memory_bandwidth_optimization_cuda" @echo "" - @echo "Legacy nvprof (if available):" - @echo " nvprof --metrics achieved_occupancy,gld_efficiency,gst_efficiency ./03_texture_memory_cuda" + @echo "Modern NVIDIA Nsight Systems:" + @echo " nsys profile --cuda-event-trace=false -o profile.nsys-rep ./03_texture_memory_cuda" diff --git a/modules/module3/examples/Makefile b/modules/module3/examples/Makefile index 81a1f24..43893ad 100644 --- a/modules/module3/examples/Makefile +++ b/modules/module3/examples/Makefile @@ -164,7 +164,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif diff --git a/modules/module4/examples/Makefile b/modules/module4/examples/Makefile index f020cbe..53e3863 100644 --- a/modules/module4/examples/Makefile +++ b/modules/module4/examples/Makefile @@ -173,7 +173,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif @@ -419,9 +419,9 @@ profile_examples: all @echo " nsys profile --trace=cuda,nvtx --stats=true ./02_multi_gpu_programming" @echo " nsys profile --trace=cuda,nvtx,osrt --stats=true ./03_unified_memory" @echo "" - @echo "Legacy nvprof:" - @echo " nvprof --print-gpu-trace ./01_cuda_streams_basics" - @echo " nvprof --print-api-trace ./02_multi_gpu_programming" + @echo "Modern NVIDIA Nsight Systems:" + @echo " nsys profile --cuda-event-trace=false -o trace.nsys-rep ./01_cuda_streams_basics" + @echo " nsys profile --cuda-event-trace=false -o multi_gpu.nsys-rep ./02_multi_gpu_programming" @echo "" @echo "Multi-GPU Analysis:" @echo " nsys profile --trace=cuda,nvtx --stats=true -o multi_gpu_trace ./02_multi_gpu_programming" diff --git a/modules/module5/examples/Makefile b/modules/module5/examples/Makefile index b495e4d..6f83c3e 100644 --- a/modules/module5/examples/Makefile +++ b/modules/module5/examples/Makefile @@ -57,7 +57,7 @@ HIP_DEBUG_FLAGS += --offload-arch=$(GPU_ARCH) CXX_FLAGS = -std=c++17 -O3 -fopenmp # Profiling flags -NVPROF_FLAGS = --print-gpu-trace --log-file %s.nvprof +NSYS_FLAGS = --cuda-event-trace=false --force-overwrite ROCPROF_FLAGS = --hip-trace --stats --output-file %s.csv # Directories @@ -144,7 +144,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif @@ -171,11 +171,12 @@ run: all # Performance profiling targets .PHONY: profile-cuda profile-cuda: $(CUDA_TARGETS) - @echo "Profiling CUDA examples with nvprof..." + @echo "Profiling CUDA examples with nsys..." + @mkdir -p $(PROFILE_DIR) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof $(NVPROF_FLAGS) $$target > $(PROFILE_DIR)/$$(basename $$target).nvprof 2>&1; \ + nsys profile $(NSYS_FLAGS) -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target > $(PROFILE_DIR)/$$(basename $$target).nsys.log 2>&1; \ fi; \ done @@ -330,7 +331,7 @@ help: @echo " validate - Validate optimization correctness" @echo "" @echo "Profiling Targets:" - @echo " profile-cuda - Profile CUDA examples with nvprof" + @echo " profile-cuda - Profile CUDA examples with nsys" @echo " profile-hip - Profile HIP examples with rocprof" @echo " profile-detailed-cuda - Detailed profiling with Nsight Compute" @echo " memcheck-cuda - Run CUDA memory checker" diff --git a/modules/module6/examples/Makefile b/modules/module6/examples/Makefile index 393c985..c91787f 100644 --- a/modules/module6/examples/Makefile +++ b/modules/module6/examples/Makefile @@ -142,7 +142,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif diff --git a/modules/module7/examples/Makefile b/modules/module7/examples/Makefile index 95c778c..48c803c 100644 --- a/modules/module7/examples/Makefile +++ b/modules/module7/examples/Makefile @@ -150,7 +150,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif diff --git a/modules/module8/examples/Makefile b/modules/module8/examples/Makefile index 0ef37ab..12244a2 100644 --- a/modules/module8/examples/Makefile +++ b/modules/module8/examples/Makefile @@ -207,7 +207,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif diff --git a/modules/module9/examples/Makefile b/modules/module9/examples/Makefile index 8fa458a..48f397b 100644 --- a/modules/module9/examples/Makefile +++ b/modules/module9/examples/Makefile @@ -222,7 +222,7 @@ ifeq ($(BUILD_CUDA),1) @for target in $(CUDA_TARGETS); do \ if [ -f $$target ]; then \ echo "Profiling $$target..."; \ - nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \ + nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \ fi; \ done endif