Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/module1/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down
6 changes: 3 additions & 3 deletions modules/module2/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down Expand Up @@ -260,8 +260,8 @@ profile_memory: cuda
@echo " ncu --metrics l1tex__throughput.avg.pct_of_peak_sustained_elapsed ./02_memory_coalescing_cuda"
@echo " ncu --metrics dram__bytes_read.sum,dram__bytes_write.sum ./05_memory_bandwidth_optimization_cuda"
@echo ""
@echo "Legacy nvprof (if available):"
@echo " nvprof --metrics achieved_occupancy,gld_efficiency,gst_efficiency ./03_texture_memory_cuda"
@echo "Modern NVIDIA Nsight Systems:"
@echo " nsys profile --cuda-event-trace=false -o profile.nsys-rep ./03_texture_memory_cuda"



Expand Down
2 changes: 1 addition & 1 deletion modules/module3/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down
8 changes: 4 additions & 4 deletions modules/module4/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down Expand Up @@ -419,9 +419,9 @@ profile_examples: all
@echo " nsys profile --trace=cuda,nvtx --stats=true ./02_multi_gpu_programming"
@echo " nsys profile --trace=cuda,nvtx,osrt --stats=true ./03_unified_memory"
@echo ""
@echo "Legacy nvprof:"
@echo " nvprof --print-gpu-trace ./01_cuda_streams_basics"
@echo " nvprof --print-api-trace ./02_multi_gpu_programming"
@echo "Modern NVIDIA Nsight Systems:"
@echo " nsys profile --cuda-event-trace=false -o trace.nsys-rep ./01_cuda_streams_basics"
@echo " nsys profile --cuda-event-trace=false -o multi_gpu.nsys-rep ./02_multi_gpu_programming"
@echo ""
@echo "Multi-GPU Analysis:"
@echo " nsys profile --trace=cuda,nvtx --stats=true -o multi_gpu_trace ./02_multi_gpu_programming"
Expand Down
11 changes: 6 additions & 5 deletions modules/module5/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ HIP_DEBUG_FLAGS += --offload-arch=$(GPU_ARCH)
CXX_FLAGS = -std=c++17 -O3 -fopenmp

# Profiling flags
NVPROF_FLAGS = --print-gpu-trace --log-file %s.nvprof
NSYS_FLAGS = --cuda-event-trace=false --force-overwrite
ROCPROF_FLAGS = --hip-trace --stats --output-file %s.csv

# Directories
Expand Down Expand Up @@ -144,7 +144,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand All @@ -171,11 +171,12 @@ run: all
# Performance profiling targets
.PHONY: profile-cuda
profile-cuda: $(CUDA_TARGETS)
@echo "Profiling CUDA examples with nvprof..."
@echo "Profiling CUDA examples with nsys..."
@mkdir -p $(PROFILE_DIR)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof $(NVPROF_FLAGS) $$target > $(PROFILE_DIR)/$$(basename $$target).nvprof 2>&1; \
nsys profile $(NSYS_FLAGS) -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target > $(PROFILE_DIR)/$$(basename $$target).nsys.log 2>&1; \
fi; \
done

Expand Down Expand Up @@ -330,7 +331,7 @@ help:
@echo " validate - Validate optimization correctness"
@echo ""
@echo "Profiling Targets:"
@echo " profile-cuda - Profile CUDA examples with nvprof"
@echo " profile-cuda - Profile CUDA examples with nsys"
@echo " profile-hip - Profile HIP examples with rocprof"
@echo " profile-detailed-cuda - Detailed profiling with Nsight Compute"
@echo " memcheck-cuda - Run CUDA memory checker"
Expand Down
2 changes: 1 addition & 1 deletion modules/module6/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down
2 changes: 1 addition & 1 deletion modules/module7/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down
2 changes: 1 addition & 1 deletion modules/module8/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down
2 changes: 1 addition & 1 deletion modules/module9/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ ifeq ($(BUILD_CUDA),1)
@for target in $(CUDA_TARGETS); do \
if [ -f $$target ]; then \
echo "Profiling $$target..."; \
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
fi; \
done
endif
Expand Down