From 8e03d100354a90a9121cb16c6ad6a40bdd81533d Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Mon, 11 May 2020 15:24:47 -0500 Subject: [PATCH] Add ref counting for rsmi init and shutdown Also, clean lint from kfd_ioctl.h file. Change-Id: I5a2ae127ab6ab6676a1b075ed10858d0ebfe13c1 --- include/rocm_smi/kfd_ioctl.h | 711 +++++++++--------- include/rocm_smi/rocm_smi.h | 2 + include/rocm_smi/rocm_smi_main.h | 20 +- include/rocm_smi/rocm_smi_utils.h | 79 ++ src/rocm_smi.cc | 71 +- src/rocm_smi_main.cc | 13 +- .../functional/init_shutdown_refcount.cc | 226 ++++++ .../functional/init_shutdown_refcount.h | 74 ++ tests/rocm_smi_test/main.cc | 19 +- 9 files changed, 837 insertions(+), 378 deletions(-) create mode 100755 tests/rocm_smi_test/functional/init_shutdown_refcount.cc create mode 100755 tests/rocm_smi_test/functional/init_shutdown_refcount.h diff --git a/include/rocm_smi/kfd_ioctl.h b/include/rocm_smi/kfd_ioctl.h index 7eb58269..9356cd16 100755 --- a/include/rocm_smi/kfd_ioctl.h +++ b/include/rocm_smi/kfd_ioctl.h @@ -28,82 +28,82 @@ #define KFD_IOCTL_MAJOR_VERSION 1 #define KFD_IOCTL_MINOR_VERSION 2 -#define KFD_IOCTL_DBG_MAJOR_VERSION 1 -#define KFD_IOCTL_DBG_MINOR_VERSION 0 +#define KFD_IOCTL_DBG_MAJOR_VERSION 1 +#define KFD_IOCTL_DBG_MINOR_VERSION 0 struct kfd_ioctl_get_version_args { - __u32 major_version; /* from KFD */ - __u32 minor_version; /* from KFD */ + __u32 major_version; /* from KFD */ + __u32 minor_version; /* from KFD */ }; /* For kfd_ioctl_create_queue_args.queue_type. */ -#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 -#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 -#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 -#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 +#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 +#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 -#define KFD_MAX_QUEUE_PERCENTAGE 100 -#define KFD_MAX_QUEUE_PRIORITY 15 +#define KFD_MAX_QUEUE_PERCENTAGE 100 +#define KFD_MAX_QUEUE_PRIORITY 15 struct kfd_ioctl_create_queue_args { - __u64 ring_base_address; /* to KFD */ - __u64 write_pointer_address; /* from KFD */ - __u64 read_pointer_address; /* from KFD */ - __u64 doorbell_offset; /* from KFD */ + __u64 ring_base_address; /* to KFD */ + __u64 write_pointer_address; /* from KFD */ + __u64 read_pointer_address; /* from KFD */ + __u64 doorbell_offset; /* from KFD */ - __u32 ring_size; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 queue_type; /* to KFD */ - __u32 queue_percentage; /* to KFD */ - __u32 queue_priority; /* to KFD */ - __u32 queue_id; /* from KFD */ + __u32 ring_size; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 queue_type; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* from KFD */ - __u64 eop_buffer_address; /* to KFD */ - __u64 eop_buffer_size; /* to KFD */ - __u64 ctx_save_restore_address; /* to KFD */ - __u32 ctx_save_restore_size; /* to KFD */ - __u32 ctl_stack_size; /* to KFD */ + __u64 eop_buffer_address; /* to KFD */ + __u64 eop_buffer_size; /* to KFD */ + __u64 ctx_save_restore_address; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { - __u32 queue_id; /* to KFD */ - __u32 pad; + __u32 queue_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_update_queue_args { - __u64 ring_base_address; /* to KFD */ + __u64 ring_base_address; /* to KFD */ - __u32 queue_id; /* to KFD */ - __u32 ring_size; /* to KFD */ - __u32 queue_percentage; /* to KFD */ - __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 ring_size; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ }; struct kfd_ioctl_set_cu_mask_args { - __u32 queue_id; /* to KFD */ - __u32 num_cu_mask; /* to KFD */ - __u64 cu_mask_ptr; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 num_cu_mask; /* to KFD */ + __u64 cu_mask_ptr; /* to KFD */ }; struct kfd_ioctl_get_queue_wave_state_args { - __u64 ctl_stack_address; /* to KFD */ - __u32 ctl_stack_used_size; /* from KFD */ - __u32 save_area_used_size; /* from KFD */ - __u32 queue_id; /* to KFD */ - __u32 pad; + __u64 ctl_stack_address; /* to KFD */ + __u32 ctl_stack_used_size; /* from KFD */ + __u32 save_area_used_size; /* from KFD */ + __u32 queue_id; /* to KFD */ + __u32 pad; }; struct kfd_queue_snapshot_entry { - __u64 ring_base_address; - __u64 write_pointer_address; - __u64 read_pointer_address; - __u64 ctx_save_restore_address; - __u32 queue_id; - __u32 gpu_id; - __u32 ring_size; - __u32 queue_type; - __u32 queue_status; - __u32 reserved[19]; + __u64 ring_base_address; + __u64 write_pointer_address; + __u64 read_pointer_address; + __u64 ctx_save_restore_address; + __u32 queue_id; + __u32 gpu_id; + __u32 ring_size; + __u32 queue_type; + __u32 queue_status; + __u32 reserved[19]; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ @@ -111,13 +111,13 @@ struct kfd_queue_snapshot_entry { #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 struct kfd_ioctl_set_memory_policy_args { - __u64 alternate_aperture_base; /* to KFD */ - __u64 alternate_aperture_size; /* to KFD */ + __u64 alternate_aperture_base; /* to KFD */ + __u64 alternate_aperture_size; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 default_policy; /* to KFD */ - __u32 alternate_policy; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 default_policy; /* to KFD */ + __u32 alternate_policy; /* to KFD */ + __u32 pad; }; /* @@ -128,24 +128,24 @@ struct kfd_ioctl_set_memory_policy_args { */ struct kfd_ioctl_get_clock_counters_args { - __u64 gpu_clock_counter; /* from KFD */ - __u64 cpu_clock_counter; /* from KFD */ - __u64 system_clock_counter; /* from KFD */ - __u64 system_clock_freq; /* from KFD */ + __u64 gpu_clock_counter; /* from KFD */ + __u64 cpu_clock_counter; /* from KFD */ + __u64 system_clock_counter; /* from KFD */ + __u64 system_clock_freq; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_process_device_apertures { - __u64 lds_base; /* from KFD */ - __u64 lds_limit; /* from KFD */ - __u64 scratch_base; /* from KFD */ - __u64 scratch_limit; /* from KFD */ - __u64 gpuvm_base; /* from KFD */ - __u64 gpuvm_limit; /* from KFD */ - __u32 gpu_id; /* from KFD */ - __u32 pad; + __u64 lds_base; /* from KFD */ + __u64 lds_limit; /* from KFD */ + __u64 scratch_base; /* from KFD */ + __u64 scratch_limit; /* from KFD */ + __u64 gpuvm_base; /* from KFD */ + __u64 gpuvm_limit; /* from KFD */ + __u32 gpu_id; /* from KFD */ + __u32 pad; }; /* @@ -155,25 +155,25 @@ struct kfd_process_device_apertures { */ #define NUM_OF_SUPPORTED_GPUS 7 struct kfd_ioctl_get_process_apertures_args { - struct kfd_process_device_apertures - process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ + struct kfd_process_device_apertures + process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ - /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ - __u32 num_of_nodes; - __u32 pad; + /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ + __u32 num_of_nodes; + __u32 pad; }; struct kfd_ioctl_get_process_apertures_new_args { - /* User allocated. Pointer to struct kfd_process_device_apertures - * filled in by Kernel - */ - __u64 kfd_process_device_apertures_ptr; - /* to KFD - indicates amount of memory present in - * kfd_process_device_apertures_ptr - * from KFD - Number of entries filled by KFD. - */ - __u32 num_of_nodes; - __u32 pad; + /* User allocated. Pointer to struct kfd_process_device_apertures + * filled in by Kernel + */ + __u64 kfd_process_device_apertures_ptr; + /* to KFD - indicates amount of memory present in + * kfd_process_device_apertures_ptr + * from KFD - Number of entries filled by KFD. + */ + __u32 num_of_nodes; + __u32 pad; }; #define MAX_ALLOWED_NUM_POINTS 100 @@ -181,35 +181,35 @@ struct kfd_ioctl_get_process_apertures_new_args { #define MAX_ALLOWED_WAC_BUFF_SIZE 128 struct kfd_ioctl_dbg_register_args { - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_unregister_args { - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_address_watch_args { - __u64 content_ptr; /* a pointer to the actual content */ - __u32 gpu_id; /* to KFD */ - __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; struct kfd_ioctl_dbg_wave_control_args { - __u64 content_ptr; /* a pointer to the actual content */ - __u32 gpu_id; /* to KFD */ - __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; /* mapping event types to API spec */ -#define KFD_DBG_EV_STATUS_TRAP 1 -#define KFD_DBG_EV_STATUS_VMFAULT 2 -#define KFD_DBG_EV_STATUS_SUSPENDED 4 -#define KFD_DBG_EV_STATUS_NEW_QUEUE 8 -#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1 +#define KFD_DBG_EV_STATUS_TRAP 1 +#define KFD_DBG_EV_STATUS_VMFAULT 2 +#define KFD_DBG_EV_STATUS_SUSPENDED 4 +#define KFD_DBG_EV_STATUS_NEW_QUEUE 8 +#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1 -#define KFD_INVALID_QUEUEID 0xffffffff +#define KFD_INVALID_QUEUEID 0xffffffff /* KFD_IOC_DBG_TRAP_ENABLE: * ptr: unused @@ -273,179 +273,177 @@ struct kfd_ioctl_dbg_wave_control_args { * data2: minor version (OUT) * data3: unused */ -#define KFD_IOC_DBG_TRAP_GET_VERSION 7 +#define KFD_IOC_DBG_TRAP_GET_VERSION 7 struct kfd_ioctl_dbg_trap_args { - __u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */ - __u32 pid; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 op; /* to KFD */ - __u32 data1; /* to KFD */ - __u32 data2; /* to KFD */ - __u32 data3; /* to KFD */ + __u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */ + __u32 pid; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 op; /* to KFD */ + __u32 data1; /* to KFD */ + __u32 data2; /* to KFD */ + __u32 data3; /* to KFD */ }; /* Matching HSA_EVENTTYPE */ -#define KFD_IOC_EVENT_SIGNAL 0 -#define KFD_IOC_EVENT_NODECHANGE 1 -#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 -#define KFD_IOC_EVENT_HW_EXCEPTION 3 -#define KFD_IOC_EVENT_SYSTEM_EVENT 4 -#define KFD_IOC_EVENT_DEBUG_EVENT 5 -#define KFD_IOC_EVENT_PROFILE_EVENT 6 -#define KFD_IOC_EVENT_QUEUE_EVENT 7 -#define KFD_IOC_EVENT_MEMORY 8 - -#define KFD_IOC_WAIT_RESULT_COMPLETE 0 -#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 -#define KFD_IOC_WAIT_RESULT_FAIL 2 - -#define KFD_SIGNAL_EVENT_LIMIT 4096 +#define KFD_IOC_EVENT_SIGNAL 0 +#define KFD_IOC_EVENT_NODECHANGE 1 +#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 +#define KFD_IOC_EVENT_HW_EXCEPTION 3 +#define KFD_IOC_EVENT_SYSTEM_EVENT 4 +#define KFD_IOC_EVENT_DEBUG_EVENT 5 +#define KFD_IOC_EVENT_PROFILE_EVENT 6 +#define KFD_IOC_EVENT_QUEUE_EVENT 7 +#define KFD_IOC_EVENT_MEMORY 8 + +#define KFD_IOC_WAIT_RESULT_COMPLETE 0 +#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 +#define KFD_IOC_WAIT_RESULT_FAIL 2 + +#define KFD_SIGNAL_EVENT_LIMIT 4096 /* For kfd_event_data.hw_exception_data.reset_type. */ -#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 -#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 +#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 +#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 /* For kfd_event_data.hw_exception_data.reset_cause. */ -#define KFD_HW_EXCEPTION_GPU_HANG 0 -#define KFD_HW_EXCEPTION_ECC 1 +#define KFD_HW_EXCEPTION_GPU_HANG 0 +#define KFD_HW_EXCEPTION_ECC 1 /* For kfd_hsa_memory_exception_data.ErrorType */ -#define KFD_MEM_ERR_NO_RAS 0 -#define KFD_MEM_ERR_SRAM_ECC 1 -#define KFD_MEM_ERR_POISON_CONSUMED 2 -#define KFD_MEM_ERR_GPU_HANG 3 +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC 1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG 3 struct kfd_ioctl_create_event_args { - __u64 event_page_offset; /* from KFD */ - __u32 event_trigger_data; /* from KFD - signal events only */ - __u32 event_type; /* to KFD */ - __u32 auto_reset; /* to KFD */ - __u32 node_id; /* to KFD - only valid for certain - event types */ - __u32 event_id; /* from KFD */ - __u32 event_slot_index; /* from KFD */ + __u64 event_page_offset; /* from KFD */ + __u32 event_trigger_data; /* from KFD - signal events only */ + __u32 event_type; /* to KFD */ + __u32 auto_reset; /* to KFD */ + __u32 node_id; /* to KFD - only valid for certain event types */ + __u32 event_id; /* from KFD */ + __u32 event_slot_index; /* from KFD */ }; struct kfd_ioctl_destroy_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_set_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_reset_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_memory_exception_failure { - __u32 NotPresent; /* Page not present or supervisor privilege */ - __u32 ReadOnly; /* Write access to a read-only page */ - __u32 NoExecute; /* Execute access to a page marked NX */ - __u32 imprecise; /* Can't determine the exact fault address */ + __u32 NotPresent; /* Page not present or supervisor privilege */ + __u32 ReadOnly; /* Write access to a read-only page */ + __u32 NoExecute; /* Execute access to a page marked NX */ + __u32 imprecise; /* Can't determine the exact fault address */ }; /* memory exception data */ struct kfd_hsa_memory_exception_data { - struct kfd_memory_exception_failure failure; - __u64 va; - __u32 gpu_id; - __u32 ErrorType; /* 0 = no RAS error, - * 1 = ECC_SRAM, - * 2 = Link_SYNFLOOD (poison), - * 3 = GPU hang (not attributable to a specific cause), - * other values reserved - */ + struct kfd_memory_exception_failure failure; + __u64 va; + __u32 gpu_id; + __u32 ErrorType; // 0 = no RAS error, + // 1 = ECC_SRAM, + // 2 = Link_SYNFLOOD (poison), + // 3 = GPU hang (not attributable to a specific cause), + // other values reserved }; /* hw exception data */ struct kfd_hsa_hw_exception_data { - __u32 reset_type; - __u32 reset_cause; - __u32 memory_lost; - __u32 gpu_id; + __u32 reset_type; + __u32 reset_cause; + __u32 memory_lost; + __u32 gpu_id; }; /* Event data */ struct kfd_event_data { - union { - struct kfd_hsa_memory_exception_data memory_exception_data; - struct kfd_hsa_hw_exception_data hw_exception_data; - }; /* From KFD */ - __u64 kfd_event_data_ext; /* pointer to an extension structure - for future exception types */ - __u32 event_id; /* to KFD */ - __u32 pad; + union { + struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; + }; /* From KFD */ + __u64 kfd_event_data_ext; // pointer to an extension structure + // for future exception types + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_wait_events_args { - __u64 events_ptr; /* pointed to struct - kfd_event_data array, to KFD */ - __u32 num_events; /* to KFD */ - __u32 wait_for_all; /* to KFD */ - __u32 timeout; /* to KFD */ - __u32 wait_result; /* from KFD */ + __u64 events_ptr; // pointed to struct + // kfd_event_data array, to KFD + __u32 num_events; /* to KFD */ + __u32 wait_for_all; /* to KFD */ + __u32 timeout; /* to KFD */ + __u32 wait_result; /* from KFD */ }; struct kfd_ioctl_set_scratch_backing_va_args { - __u64 va_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u64 va_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_tile_config_args { - /* to KFD: pointer to tile array */ - __u64 tile_config_ptr; - /* to KFD: pointer to macro tile array */ - __u64 macro_tile_config_ptr; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_tile_configs; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_macro_tile_configs; - - __u32 gpu_id; /* to KFD */ - __u32 gb_addr_config; /* from KFD */ - __u32 num_banks; /* from KFD */ - __u32 num_ranks; /* from KFD */ - /* struct size can be extended later if needed - * without breaking ABI compatibility - */ + /* to KFD: pointer to tile array */ + __u64 tile_config_ptr; + /* to KFD: pointer to macro tile array */ + __u64 macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + __u32 num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + __u32 num_macro_tile_configs; + + __u32 gpu_id; /* to KFD */ + __u32 gb_addr_config; /* from KFD */ + __u32 num_banks; /* from KFD */ + __u32 num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ }; struct kfd_ioctl_set_trap_handler_args { - __u64 tba_addr; /* to KFD */ - __u64 tma_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u64 tba_addr; /* to KFD */ + __u64 tma_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_acquire_vm_args { - __u32 drm_fd; /* to KFD */ - __u32 gpu_id; /* to KFD */ + __u32 drm_fd; /* to KFD */ + __u32 gpu_id; /* to KFD */ }; /* Allocation flags: memory types */ -#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) -#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) -#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) -#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) -#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) +#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) +#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) +#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) +#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* Allocation flags: attributes/access options */ -#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) -#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) -#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) -#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) -#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) -#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) +#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) +#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) +#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) +#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) +#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) +#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* Allocate memory for later SVM (shared virtual memory) mapping. * @@ -460,12 +458,12 @@ struct kfd_ioctl_acquire_vm_args { * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above */ struct kfd_ioctl_alloc_memory_of_gpu_args { - __u64 va_addr; /* to KFD */ - __u64 size; /* to KFD */ - __u64 handle; /* from KFD */ - __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ - __u32 gpu_id; /* to KFD */ - __u32 flags; + __u64 va_addr; /* to KFD */ + __u64 size; /* to KFD */ + __u64 handle; /* from KFD */ + __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ + __u32 gpu_id; /* to KFD */ + __u32 flags; }; /* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu @@ -473,7 +471,7 @@ struct kfd_ioctl_alloc_memory_of_gpu_args { * @handle: memory handle returned by alloc */ struct kfd_ioctl_free_memory_of_gpu_args { - __u64 handle; /* to KFD */ + __u64 handle; /* to KFD */ }; /* Map memory to one or more GPUs @@ -492,10 +490,10 @@ struct kfd_ioctl_free_memory_of_gpu_args { * n_devices. */ struct kfd_ioctl_map_memory_to_gpu_args { - __u64 handle; /* to KFD */ - __u64 device_ids_array_ptr; /* to KFD */ - __u32 n_devices; /* to KFD */ - __u32 n_success; /* to/from KFD */ + __u64 handle; /* to KFD */ + __u64 device_ids_array_ptr; /* to KFD */ + __u32 n_devices; /* to KFD */ + __u32 n_success; /* to/from KFD */ }; /* Unmap memory from one or more GPUs @@ -503,10 +501,10 @@ struct kfd_ioctl_map_memory_to_gpu_args { * same arguments as for mapping */ struct kfd_ioctl_unmap_memory_from_gpu_args { - __u64 handle; /* to KFD */ - __u64 device_ids_array_ptr; /* to KFD */ - __u32 n_devices; /* to KFD */ - __u32 n_success; /* to/from KFD */ + __u64 handle; /* to KFD */ + __u64 device_ids_array_ptr; /* to KFD */ + __u32 n_devices; /* to KFD */ + __u32 n_success; /* to/from KFD */ }; /* Allocate GWS for specific queue @@ -517,28 +515,27 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { * only support contiguous GWS allocation */ struct kfd_ioctl_alloc_queue_gws_args { - __u32 queue_id; /* to KFD */ - __u32 num_gws; /* to KFD */ - __u32 first_gws; /* from KFD */ - __u32 pad; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ + __u32 pad; /* to KFD */ }; struct kfd_ioctl_get_dmabuf_info_args { - __u64 size; /* from KFD */ - __u64 metadata_ptr; /* to KFD */ - __u32 metadata_size; /* to KFD (space allocated by user) - * from KFD (actual metadata size) - */ - __u32 gpu_id; /* from KFD */ - __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ - __u32 dmabuf_fd; /* to KFD */ + __u64 size; /* from KFD */ + __u64 metadata_ptr; /* to KFD */ + __u32 metadata_size; // to KFD (space allocated by user) + // from KFD (actual metadata size) + __u32 gpu_id; /* from KFD */ + __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ + __u32 dmabuf_fd; /* to KFD */ }; struct kfd_ioctl_import_dmabuf_args { - __u64 va_addr; /* to KFD */ - __u64 handle; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 dmabuf_fd; /* to KFD */ + __u64 va_addr; /* to KFD */ + __u64 handle; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 dmabuf_fd; /* to KFD */ }; /* @@ -548,36 +545,36 @@ struct kfd_ioctl_import_dmabuf_args { #define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 struct kfd_ioctl_smi_events_args { - __u32 gpuid; /* to KFD */ - __u32 anon_fd; /* from KFD */ + __u32 gpuid; /* to KFD */ + __u32 anon_fd; /* from KFD */ }; /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { - KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, - KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, }; struct kfd_ioctl_ipc_export_handle_args { - __u64 handle; /* to KFD */ - __u32 share_handle[4]; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u64 handle; /* to KFD */ + __u32 share_handle[4]; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_ipc_import_handle_args { - __u64 handle; /* from KFD */ - __u64 va_addr; /* to KFD */ - __u64 mmap_offset; /* from KFD */ - __u32 share_handle[4]; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u64 handle; /* from KFD */ + __u64 va_addr; /* to KFD */ + __u64 mmap_offset; /* from KFD */ + __u32 share_handle[4]; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_memory_range { - __u64 va_addr; - __u64 size; + __u64 va_addr; + __u64 size; }; /* flags definitions @@ -587,143 +584,143 @@ struct kfd_memory_range { #define KFD_CROSS_MEMORY_RW_BIT (1 << 0) #define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT) #define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT) -#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) +#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) // NOLINT struct kfd_ioctl_cross_memory_copy_args { - /* to KFD: Process ID of the remote process */ - __u32 pid; - /* to KFD: See above definition */ - __u32 flags; - /* to KFD: Source GPU VM range */ - __u64 src_mem_range_array; - /* to KFD: Size of above array */ - __u64 src_mem_array_size; - /* to KFD: Destination GPU VM range */ - __u64 dst_mem_range_array; - /* to KFD: Size of above array */ - __u64 dst_mem_array_size; - /* from KFD: Total amount of bytes copied */ - __u64 bytes_copied; + /* to KFD: Process ID of the remote process */ + __u32 pid; + /* to KFD: See above definition */ + __u32 flags; + /* to KFD: Source GPU VM range */ + __u64 src_mem_range_array; + /* to KFD: Size of above array */ + __u64 src_mem_array_size; + /* to KFD: Destination GPU VM range */ + __u64 dst_mem_range_array; + /* to KFD: Size of above array */ + __u64 dst_mem_array_size; + /* from KFD: Total amount of bytes copied */ + __u64 bytes_copied; }; #define AMDKFD_IOCTL_BASE 'K' -#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) -#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) +#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOC_GET_VERSION \ - AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) +#define AMDKFD_IOC_GET_VERSION \ + AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) -#define AMDKFD_IOC_CREATE_QUEUE \ - AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) +#define AMDKFD_IOC_CREATE_QUEUE \ + AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) -#define AMDKFD_IOC_DESTROY_QUEUE \ - AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) +#define AMDKFD_IOC_DESTROY_QUEUE \ + AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) -#define AMDKFD_IOC_SET_MEMORY_POLICY \ - AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) +#define AMDKFD_IOC_SET_MEMORY_POLICY \ + AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) -#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ - AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) +#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ + AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) -#define AMDKFD_IOC_GET_PROCESS_APERTURES \ - AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) +#define AMDKFD_IOC_GET_PROCESS_APERTURES \ + AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) -#define AMDKFD_IOC_UPDATE_QUEUE \ - AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) +#define AMDKFD_IOC_UPDATE_QUEUE \ + AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) -#define AMDKFD_IOC_CREATE_EVENT \ - AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) +#define AMDKFD_IOC_CREATE_EVENT \ + AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) -#define AMDKFD_IOC_DESTROY_EVENT \ - AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) +#define AMDKFD_IOC_DESTROY_EVENT \ + AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) -#define AMDKFD_IOC_SET_EVENT \ - AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) +#define AMDKFD_IOC_SET_EVENT \ + AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) -#define AMDKFD_IOC_RESET_EVENT \ - AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) +#define AMDKFD_IOC_RESET_EVENT \ + AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) -#define AMDKFD_IOC_WAIT_EVENTS \ - AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) +#define AMDKFD_IOC_WAIT_EVENTS \ + AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ - AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) +#define AMDKFD_IOC_DBG_REGISTER \ + AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ - AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) +#define AMDKFD_IOC_DBG_UNREGISTER \ + AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ - AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) +#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ + AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ - AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) +#define AMDKFD_IOC_DBG_WAVE_CONTROL \ + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) -#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ - AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ + AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) -#define AMDKFD_IOC_GET_TILE_CONFIG \ - AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) -#define AMDKFD_IOC_SET_TRAP_HANDLER \ - AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) +#define AMDKFD_IOC_SET_TRAP_HANDLER \ + AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) -#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ - AMDKFD_IOWR(0x14, \ - struct kfd_ioctl_get_process_apertures_new_args) +#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ + AMDKFD_IOWR(0x14, \ + struct kfd_ioctl_get_process_apertures_new_args) -#define AMDKFD_IOC_ACQUIRE_VM \ - AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) +#define AMDKFD_IOC_ACQUIRE_VM \ + AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) -#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ - AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) +#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ + AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) -#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ - AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) +#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ + AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) -#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ - AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) +#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ + AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) -#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ - AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) +#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ + AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) -#define AMDKFD_IOC_SET_CU_MASK \ - AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) +#define AMDKFD_IOC_SET_CU_MASK \ + AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) -#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ - AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) +#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ + AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) -#define AMDKFD_IOC_GET_DMABUF_INFO \ - AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) +#define AMDKFD_IOC_GET_DMABUF_INFO \ + AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) -#define AMDKFD_IOC_IMPORT_DMABUF \ - AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_IMPORT_DMABUF \ + AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) -#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ - AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) -#define AMDKFD_IOC_SMI_EVENTS \ - AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) +#define AMDKFD_IOC_SMI_EVENTS \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) -#define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x20 +#define AMDKFD_COMMAND_START 0x01 +#define AMDKFD_COMMAND_END 0x20 /* non-upstream ioctls */ #define AMDKFD_IOC_IPC_IMPORT_HANDLE \ - AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args) + AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args) -#define AMDKFD_IOC_IPC_EXPORT_HANDLE \ - AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args) +#define AMDKFD_IOC_IPC_EXPORT_HANDLE \ + AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args) -#define AMDKFD_IOC_DBG_TRAP \ - AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args) +#define AMDKFD_IOC_DBG_TRAP \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args) -#define AMDKFD_IOC_CROSS_MEMORY_COPY \ - AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args) +#define AMDKFD_IOC_CROSS_MEMORY_COPY \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args) -#define AMDKFD_COMMAND_START 0x01 +#define AMDKFD_COMMAND_START 0x01 #undef AMDKFD_COMMAND_END -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x22 #endif // INCLUDE_ROCM_SMI_KFD_IOCTL_H_ diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index 5cf748a6..a6b66b30 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -123,6 +123,8 @@ typedef enum { RSMI_STATUS_BUSY, //!< A resource or mutex could not be //!< acquired because it is already //!< being used + RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter + //!< exceeded INT32_MAX RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred } rsmi_status_t; diff --git a/include/rocm_smi/rocm_smi_main.h b/include/rocm_smi/rocm_smi_main.h index bb8b326d..3503c483 100755 --- a/include/rocm_smi/rocm_smi_main.h +++ b/include/rocm_smi/rocm_smi_main.h @@ -94,8 +94,18 @@ class RocmSMI { int kfd_notif_evt_fh(void) const {return kfd_notif_evt_fh_;} void set_kfd_notif_evt_fh(int fd) {kfd_notif_evt_fh_ = fd;} std::mutex *kfd_notif_evt_fh_mutex(void) {return &kfd_notif_evt_fh_mutex_;} - int kfd_notif_evt_fh_refcnt_inc() {return ++kfd_notif_evt_fh_refcnt_;} - int kfd_notif_evt_fh_refcnt_dec() {return --kfd_notif_evt_fh_refcnt_;} + std::mutex *bootstrap_mutex(void) {return &bootstrap_mutex_;} + + uint32_t ref_count(void) const {return ref_count_;} + uint32_t ref_count_inc(void) {return ++ref_count_;} + uint32_t ref_count_dec(void) {return --ref_count_;} + + uint32_t kfd_notif_evt_fh_refcnt(void) const { + return kfd_notif_evt_fh_refcnt_;} + uint32_t kfd_notif_evt_fh_refcnt_inc(void) { + return ++kfd_notif_evt_fh_refcnt_;} + uint32_t kfd_notif_evt_fh_refcnt_dec(void) { + return --kfd_notif_evt_fh_refcnt_;} private: std::vector> devices_; @@ -114,8 +124,12 @@ class RocmSMI { uint32_t euid_; int kfd_notif_evt_fh_; - int kfd_notif_evt_fh_refcnt_; std::mutex kfd_notif_evt_fh_mutex_; + uint32_t kfd_notif_evt_fh_refcnt_; // Access to this should be protected + // by kfd_notif_evt_fh_mutex_ + std::mutex bootstrap_mutex_; + uint32_t ref_count_; // Access to this should be protected + // by bootstrap_mutex_ }; } // namespace smi diff --git a/include/rocm_smi/rocm_smi_utils.h b/include/rocm_smi/rocm_smi_utils.h index be5b75c2..4c042fd1 100755 --- a/include/rocm_smi/rocm_smi_utils.h +++ b/include/rocm_smi/rocm_smi_utils.h @@ -105,6 +105,85 @@ struct ScopedPthread { pthread_wrap& pthrd_ref_; bool mutex_not_acquired_; // Use for AcquireNB (not for Aquire()) }; + + +#define PASTE2(x, y) x##y +#define PASTE(x, y) PASTE2(x, y) + +#define __forceinline __inline__ __attribute__((always_inline)) + +template +class ScopeGuard { + public: + explicit __forceinline ScopeGuard(const lambda& release) + : release_(release), dismiss_(false) {} + + ScopeGuard(const ScopeGuard& rhs) {*this = rhs; } + + __forceinline ~ScopeGuard() { + if (!dismiss_) release_(); + } + __forceinline ScopeGuard& operator=(const ScopeGuard& rhs) { + dismiss_ = rhs.dismiss_; + release_ = rhs.release_; + rhs.dismiss_ = true; + } + __forceinline void Dismiss() { dismiss_ = true; } + + private: + lambda release_; + bool dismiss_; +}; + +template +static __forceinline ScopeGuard MakeScopeGuard(lambda rel) { + return ScopeGuard(rel); +} + +#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...) \ + auto lname = __VA_ARGS__; \ + amd::smi::ScopeGuard sname(lname); +#define MAKE_SCOPE_GUARD(...) \ + MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), \ + PASTE(scopeGuard, __COUNTER__), __VA_ARGS__) +#define MAKE_NAMED_SCOPE_GUARD(name, ...) \ + MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, \ + __VA_ARGS__) + + +// A macro to disallow the copy and move constructor and operator= functions +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + TypeName(TypeName&&) = delete; \ + void operator=(const TypeName&) = delete; \ + void operator=(TypeName&&) = delete; + +template +class ScopedAcquire { + public: + /// @brief: When constructing, acquire the lock. + /// @param: lock(Input), pointer to an existing lock. + explicit ScopedAcquire(LockType* lock) : lock_(lock), doRelease(true) { + lock_->Acquire();} + + /// @brief: when destructing, release the lock. + ~ScopedAcquire() { + if (doRelease) lock_->Release(); + } + + /// @brief: Release the lock early. Avoid using when possible. + void Release() { + lock_->Release(); + doRelease = false; + } + + private: + LockType* lock_; + bool doRelease; + /// @brief: Disable copiable and assignable ability. + DISALLOW_COPY_AND_ASSIGN(ScopedAcquire); +}; + } // namespace smi } // namespace amd diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 1cb73a6f..cb0df0e0 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -165,20 +165,20 @@ static rsmi_status_t handleException() { return RSMI_STATUS_NOT_SUPPORTED; \ } \ return RSMI_STATUS_INVALID_ARGS; \ - } \ + } #define CHK_SUPPORT(RT_PTR, VR, SUB_VR) \ GET_DEV_FROM_INDX \ CHK_API_SUPPORT_ONLY((RT_PTR), (VR), (SUB_VR)) #define CHK_SUPPORT_NAME_ONLY(RT_PTR) \ - CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT) \ + CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT) #define CHK_SUPPORT_VAR(RT_PTR, VR) \ - CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT) \ + CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT) #define CHK_SUPPORT_SUBVAR_ONLY(RT_PTR, SUB_VR) \ - CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR)) \ + CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR)) static pthread_mutex_t *get_mutex(uint32_t dv_ind) { amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); @@ -540,9 +540,29 @@ static bool is_power_of_2(uint64_t n) { rsmi_status_t rsmi_init(uint64_t flags) { TRY - amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); - smi.Initialize(flags); + std::lock_guard guard(*smi.bootstrap_mutex()); + + if (smi.ref_count() == INT32_MAX) { + return RSMI_STATUS_REFCOUNT_OVERFLOW; + } + + (void)smi.ref_count_inc(); + + // If smi.Initialize() throws, we should clean up and dec. ref_count_. + // Otherwise, if no issues, the Dismiss() will prevent the ref_count_ + // decrement. + MAKE_NAMED_SCOPE_GUARD(refGuard, [&]() { (void)smi.ref_count_dec(); }); + + if (smi.ref_count() == 1) { + try { + smi.Initialize(flags); + } catch(...) { + smi.Cleanup(); + throw; + } + } + refGuard.Dismiss(); return RSMI_STATUS_SUCCESS; CATCH @@ -555,9 +575,17 @@ rsmi_shut_down(void) { TRY amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); + std::lock_guard guard(*smi.bootstrap_mutex()); + + if (smi.ref_count() == 0) { + return RSMI_STATUS_INIT_ERROR; + } - smi.Cleanup(); + (void)smi.ref_count_dec(); + if (smi.ref_count() == 0) { + smi.Cleanup(); + } return RSMI_STATUS_SUCCESS; CATCH } @@ -2371,6 +2399,15 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) { "type that was expected"; break; + case RSMI_STATUS_BUSY: + *status_string = "A resource or mutex could not be acquired " + "because it is already being used"; + break; + + case RSMI_STATUS_REFCOUNT_OVERFLOW: + *status_string = "An internal reference counter exceeded INT32_MAX"; + break; + case RSMI_STATUS_UNKNOWN_ERROR: *status_string = "An unknown error prevented the call from completing" " successfully"; @@ -3186,6 +3223,7 @@ rsmi_event_notification_init(uint32_t dv_ind) { std::lock_guard guard(*smi.kfd_notif_evt_fh_mutex()); if (smi.kfd_notif_evt_fh() == -1) { + assert(smi.kfd_notif_evt_fh_refcnt() == 0); int kfd_fd = open(kPathKFDIoctl, O_RDWR | O_CLOEXEC); if (kfd_fd <= 0) { @@ -3199,8 +3237,7 @@ rsmi_event_notification_init(uint32_t dv_ind) { smi.set_kfd_notif_evt_fh(kfd_fd); } - smi.kfd_notif_evt_fh_refcnt_inc(); - + (void)smi.kfd_notif_evt_fh_refcnt_inc(); struct kfd_ioctl_smi_events_args args; assert(dev->kfd_gpu_id() <= UINT32_MAX); @@ -3354,7 +3391,7 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) { dev->set_evt_notif_anon_file_ptr(nullptr); dev->set_evt_notif_anon_fd(-1); - if (!smi.kfd_notif_evt_fh_refcnt_dec()) { + if (smi.kfd_notif_evt_fh_refcnt_dec() == 0) { int ret = close(smi.kfd_notif_evt_fh()); smi.set_kfd_notif_evt_fh(-1); if (ret < 0) { @@ -3385,3 +3422,17 @@ rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds) { sleep(seconds); return RSMI_STATUS_SUCCESS; } + +int32_t +rsmi_test_refcount(uint64_t refcnt_type) { + (void)refcnt_type; + + amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); + std::lock_guard guard(*smi.bootstrap_mutex()); + + if (smi.ref_count() == 0 && smi.monitor_devices().size() != 0) { + return -1; + } + + return smi.ref_count(); +} diff --git a/src/rocm_smi_main.cc b/src/rocm_smi_main.cc index 9cd02244..75d98ce9 100755 --- a/src/rocm_smi_main.cc +++ b/src/rocm_smi_main.cc @@ -244,6 +244,12 @@ RocmSMI::Initialize(uint64_t flags) { auto i = 0; uint32_t ret; + assert(ref_count_ == 1); + if (ref_count_ != 1) { + throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR, + "Unexpected: RocmSMI ref_count_ != 1"); + } + init_options_ = flags; euid_ = geteuid(); @@ -299,6 +305,10 @@ RocmSMI::Initialize(uint64_t flags) { void RocmSMI::Cleanup() { + s_monitor_devices.clear(); + devices_.clear(); + monitors_.clear(); + if (kfd_notif_evt_fh() >= 0) { int ret = close(kfd_notif_evt_fh()); if (ret < 0) { @@ -306,9 +316,6 @@ RocmSMI::Cleanup() { "Failed to close kfd file handle on shutdown."); } } - s_monitor_devices.clear(); - devices_.clear(); - monitors_.clear(); } RocmSMI::RocmSMI(uint64_t flags) : init_options_(flags), diff --git a/tests/rocm_smi_test/functional/init_shutdown_refcount.cc b/tests/rocm_smi_test/functional/init_shutdown_refcount.cc new file mode 100755 index 00000000..ac0ff97d --- /dev/null +++ b/tests/rocm_smi_test/functional/init_shutdown_refcount.cc @@ -0,0 +1,226 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2020, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include + +#include +#include +#include // NOLINT +#include +#include // NOLINT + +#include "rocm_smi_test/functional/init_shutdown_refcount.h" +#include "gtest/gtest.h" +#include "rocm_smi/rocm_smi.h" +#include "rocm_smi_test/test_common.h" + +extern int32_t +rsmi_test_refcount(uint64_t refcnt_type); + +static void rand_sleep_mod(int msec) { + unsigned int seed = time(NULL); + std::mt19937_64 eng{seed}; + std::uniform_int_distribution<> dist{10, msec}; + std::this_thread::sleep_for(std::chrono::milliseconds{dist(eng)}); +} + +static void* RSMIInitFunction(void* args) { + rsmi_status_t status; + + (void)args; + rand_sleep_mod(100); + status = rsmi_init(0); + EXPECT_EQ(RSMI_STATUS_SUCCESS, status); + pthread_exit(nullptr); + return nullptr; +} + +static void* RSMIShutDownFunction(void* args) { + rsmi_status_t status; + + (void)args; + rand_sleep_mod(100); + status = rsmi_shut_down(); + EXPECT_EQ(RSMI_STATUS_SUCCESS, status); + pthread_exit(nullptr); + return nullptr; +} + +static void *RSMIInitShutDownFunction(void* args) { + rsmi_status_t status; + + (void)args; + rand_sleep_mod(100); + status = rsmi_init(0); + EXPECT_EQ(RSMI_STATUS_SUCCESS, status); + + rand_sleep_mod(100); + + status = rsmi_shut_down(); + EXPECT_EQ(RSMI_STATUS_SUCCESS, status); + pthread_exit(nullptr); + return nullptr; +} + +static const int NumOfThreads = 100; + +TestConcurrentInit::TestConcurrentInit(void) : TestBase() { + set_title("RSMI Concurrent Init Test"); + set_description("This test initializes RSMI concurrently to verify " + "reference counting functionality."); +} + +TestConcurrentInit::~TestConcurrentInit(void) { +} + +void TestConcurrentInit::SetUp(void) { + // TestBase::SetUp(); // Skip usual SetUp to avoid doing the usual rsmi_init + return; +} + +// Compare required profile for this test case with what we're actually +// running on +void TestConcurrentInit::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestConcurrentInit::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestConcurrentInit::Close() { + // This will close handles opened within rsmitst utility calls and call + // rsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +// Compare required profile for this test case with what we're actually +// running on +void TestConcurrentInit::Run(void) { + if (setup_failed_) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + return; + } + + pthread_t ThreadId[NumOfThreads]; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + std::cout << "Testing concurrent rsmi_init()..." << std::endl; + for (int Id = 0; Id < NumOfThreads; ++Id) { + int ThreadStatus = pthread_create(&ThreadId[Id], &attr, + RSMIInitFunction, nullptr); + ASSERT_EQ(0, ThreadStatus) << "pthead_create failed."; + } + + for (int Id = 0; Id < NumOfThreads; ++Id) { + int err = pthread_join(ThreadId[Id], nullptr); + ASSERT_EQ(0, err) << "pthread_join failed."; + } + + // Invoke hsa_shut_down and verify that all the hsa_init's were counted. + // HSA should be exactly closed after NumOfThreads calls. + for (int Id = 0; Id < NumOfThreads; ++Id) { + rsmi_status_t err = rsmi_shut_down(); + ASSERT_EQ(RSMI_STATUS_SUCCESS, err) << "An rsmi_init was missed."; + } + + rsmi_status_t err = rsmi_shut_down(); + ASSERT_EQ(RSMI_INITIALIZATION_ERROR, err) << + "rsmi_init reference count was too high."; + + int32_t refcnt = rsmi_test_refcount(0); + ASSERT_EQ(0, refcnt); + + std::cout << "Concurrent rsmi_init() test passed." << std::endl << std::endl; + std::cout << "Testing concurrent rsmi_shut_down()..." << std::endl; + + // Invoke hsa_shut_down and verify that all the hsa_init's were counted. + // HSA should be exactly closed after NumOfThreads calls. + for (int Id = 0; Id < NumOfThreads; ++Id) { + rsmi_status_t err = rsmi_init(0); + ASSERT_EQ(RSMI_STATUS_SUCCESS, err); + } + + for (int Id = 0; Id < NumOfThreads; ++Id) { + int ThreadStatus = + pthread_create(&ThreadId[Id], &attr, RSMIShutDownFunction, nullptr); + ASSERT_EQ(0, ThreadStatus) << "pthead_create failed."; + } + + for (int Id = 0; Id < NumOfThreads; ++Id) { + int err = pthread_join(ThreadId[Id], nullptr); + ASSERT_EQ(0, err) << "pthread_join failed."; + } + + refcnt = rsmi_test_refcount(0); + ASSERT_EQ(0, refcnt); + + std::cout << "Concurrent rsmi_shut_down() passed." << std::endl; + + std::cout << + "Testing concurrent rsmi_init() followed by rsmi_shut_down()..." << + std::endl; + + for (int Id = 0; Id < NumOfThreads; ++Id) { + int ThreadStatus = + pthread_create(&ThreadId[Id], &attr, RSMIInitShutDownFunction, nullptr); + ASSERT_EQ(0, ThreadStatus) << "pthead_create failed."; + } + + for (int Id = 0; Id < NumOfThreads; ++Id) { + int err = pthread_join(ThreadId[Id], nullptr); + ASSERT_EQ(0, err) << "pthread_join failed."; + } + + refcnt = rsmi_test_refcount(0); + ASSERT_EQ(0, refcnt); + + std::cout << + "Concurrent rsmi_init() followed by rsmi_shut_down() passed." << + std::endl; +} diff --git a/tests/rocm_smi_test/functional/init_shutdown_refcount.h b/tests/rocm_smi_test/functional/init_shutdown_refcount.h new file mode 100755 index 00000000..9d7c3212 --- /dev/null +++ b/tests/rocm_smi_test/functional/init_shutdown_refcount.h @@ -0,0 +1,74 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2020, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_INIT_SHUTDOWN_REFCOUNT_H_ +#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_INIT_SHUTDOWN_REFCOUNT_H_ + +#include "rocm_smi_test/test_base.h" + +class TestConcurrentInit : public TestBase { + public: + TestConcurrentInit(); + + // @Brief: Destructor for the TestConcurrentInit class + virtual ~TestConcurrentInit(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_INIT_SHUTDOWN_REFCOUNT_H_ diff --git a/tests/rocm_smi_test/main.cc b/tests/rocm_smi_test/main.cc index 4ad0fe8f..f8c50d54 100755 --- a/tests/rocm_smi_test/main.cc +++ b/tests/rocm_smi_test/main.cc @@ -79,6 +79,7 @@ #include "functional/api_support_read.h" #include "functional/mutual_exclusion.h" #include "functional/evt_notif_read_write.h" +#include "functional/init_shutdown_refcount.h" static RSMITstGlobals *sRSMIGlvalues = nullptr; @@ -226,17 +227,25 @@ TEST(rsmitstReadOnly, TestAPISupportRead) { RunGenericTest(&tst); } TEST(rsmitstReadOnly, TestMutualExclusion) { - TestMutualExclusion test; + TestMutualExclusion tst; - test.DisplayTestInfo(); - test.SetUp(); - test.Run(); - RunCustomTestEpilog(&test); + tst.DisplayTestInfo(); + tst.SetUp(); + tst.Run(); + RunCustomTestEpilog(&tst); } TEST(rsmitstReadWrite, TestEvtNotifReadWrite) { TestEvtNotifReadWrite tst; RunGenericTest(&tst); } +TEST(rsmitstReadOnly, TestConcurrentInit) { + TestConcurrentInit tst; + tst.DisplayTestInfo(); + // tst.SetUp(); // Avoid extra rsmi_init + tst.Run(); + // RunCustomTestEpilog(&tst); // Avoid extra rsmi_shut_down + tst.DisplayResults(); +} int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv);