diff --git a/cuda_core/cuda/core/system/_clock.pxi b/cuda_core/cuda/core/system/_clock.pxi index 89e9020f4c..5d08d2a567 100644 --- a/cuda_core/cuda/core/system/_clock.pxi +++ b/cuda_core/cuda/core/system/_clock.pxi @@ -129,7 +129,7 @@ cdef class ClockInfo: Returns ------- - ClockOffsets + :obj:`~_device.ClockOffsets` An object with the min, max and current clock offset. """ return ClockOffsets(nvml.device_get_clock_offsets(self._handle, self._clock_type, pstate)) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index f661c4e685..23fcf81e92 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -159,7 +159,7 @@ cdef class Device: @property def arch(self) -> DeviceArch: """ - Device architecture. + :obj:`~DeviceArch` device architecture. For example, a Tesla V100 will report ``DeviceArchitecture.name == "VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name == @@ -177,7 +177,7 @@ cdef class Device: @property def brand(self) -> BrandType: """ - Brand of the device + :obj:`~BrandType` brand of the device """ return BrandType(nvml.device_get_brand(self._handle)) @@ -289,7 +289,7 @@ cdef class Device: Returns ------- - Iterator of Device + Iterator over :obj:`~Device` An iterator over available devices. """ for device_id in range(nvml.device_get_count_v2()): @@ -301,7 +301,7 @@ cdef class Device: @property def addressing_mode(self) -> AddressingMode: """ - Get the addressing mode of the device. + Get the :obj:`~AddressingMode` of the device. Addressing modes can be one of: @@ -334,7 +334,7 @@ cdef class Device: Returns ------- - Iterator of Device + Iterator of :obj:`~Device` An iterator over available devices. """ cdef Device device @@ -411,7 +411,7 @@ cdef class Device: def clock(self, clock_type: ClockType) -> ClockInfo: """ - Get information about and manage a specific clock on a device. + :obj:`~_device.ClockInfo` object to get information about and manage a specific clock on a device. """ return ClockInfo(self._handle, clock_type) @@ -442,7 +442,7 @@ cdef class Device: def get_current_clock_event_reasons(self) -> list[ClocksEventReasons]: """ - Retrieves the current clocks event reasons. + Retrieves the current :obj:`~ClocksEventReasons`. For all fully supported products. """ @@ -452,7 +452,7 @@ cdef class Device: def get_supported_clock_event_reasons(self) -> list[ClocksEventReasons]: """ - Retrieves supported clocks event reasons that can be returned by + Retrieves supported :obj:`~ClocksEventReasons` that can be returned by :meth:`get_current_clock_event_reasons`. For all fully supported products. @@ -470,7 +470,7 @@ cdef class Device: @property def cooler(self) -> CoolerInfo: """ - Get information about cooler on a device. + :obj:`~_device.CoolerInfo` object with cooler information for the device. """ return CoolerInfo(nvml.device_get_cooler_info(self._handle)) @@ -481,7 +481,7 @@ cdef class Device: @property def attributes(self) -> DeviceAttributes: """ - Get various device attributes. + :obj:`~_device.DeviceAttributes` object with various device attributes. For Ampere™ or newer fully supported devices. Only available on Linux systems. @@ -549,9 +549,9 @@ cdef class Device: Returns ------- - :class:`DeviceEvents` + :obj:`~_device.DeviceEvents` An object representing the registered events. Call - :meth:`DeviceEvents.wait` on this object to wait for events. + :meth:`~_device.DeviceEvents.wait` on this object to wait for events. Raises ------ @@ -582,7 +582,7 @@ cdef class Device: def fan(self, fan: int = 0) -> FanInfo: """ - Get information and manage a specific fan on a device. + :obj:`~_device.FanInfo` object to get information and manage a specific fan on a device. """ if fan < 0 or fan >= self.num_fans: raise ValueError(f"Fan index {fan} is out of range [0, {self.num_fans})") @@ -605,14 +605,14 @@ cdef class Device: Each value specified can raise its own exception. That exception will be raised when attempting to access the corresponding ``value`` from the - returned :class:`FieldValues` container. + returned :obj:`~_device.FieldValues` container. To confirm that there are no exceptions in the entire container, call - :meth:`FieldValues.validate`. + :meth:`~_device.FieldValues.validate`. Parameters ---------- - field_ids: list of int or tuple of (int, int) + field_ids: list[int | tuple[int, int]] List of field IDs to query. Each item may be either a single value from the :class:`FieldId` @@ -620,7 +620,7 @@ cdef class Device: Returns ------- - :class:`FieldValues` + :obj:`~_device.FieldValues` Container of field values corresponding to the requested field IDs. """ return FieldValues(nvml.device_get_field_values(self._handle, field_ids)) @@ -631,7 +631,7 @@ cdef class Device: Parameters ---------- - field_ids: list of int or tuple of (int, int) + field_ids: list[int | tuple[int, int]] List of field IDs to clear. Each item may be either a single value from the :class:`FieldId` @@ -646,7 +646,7 @@ cdef class Device: @property def inforom(self) -> InforomInfo: """ - Accessor for InfoROM information. + :obj:`~_device.InforomInfo` object with InfoROM information. For all products with an InfoROM. """ @@ -659,7 +659,7 @@ cdef class Device: @property def bar1_memory_info(self) -> BAR1MemoryInfo: """ - Get information about BAR1 memory. + :obj:`~_device.BAR1MemoryInfo` object with BAR1 memory information. BAR1 is used to map the FB (device memory) so that it can be directly accessed by the CPU or by 3rd party devices (peer-to-peer on the PCIE @@ -670,7 +670,7 @@ cdef class Device: @property def memory_info(self) -> MemoryInfo: """ - Object with memory information. + :obj:`~_device.MemoryInfo` object with memory information. """ return MemoryInfo(nvml.device_get_memory_info_v2(self._handle)) @@ -681,7 +681,7 @@ cdef class Device: @property def pci_info(self) -> PciInfo: """ - The PCI attributes of this device. + :obj:`~_device.PciInfo` object with the PCI attributes of this device. """ return PciInfo(nvml.device_get_pci_info_ext(self._handle), self._handle) @@ -703,7 +703,7 @@ cdef class Device: @property def dynamic_pstates_info(self) -> GpuDynamicPstatesInfo: """ - Retrieve performance monitor samples from the associated subdevice. + :obj:`~_device.GpuDynamicPstatesInfo` object with performance monitor samples from the associated subdevice. """ return GpuDynamicPstatesInfo(nvml.device_get_dynamic_pstates_info(self._handle)) @@ -713,6 +713,11 @@ cdef class Device: The returned list contains a contiguous list of valid P-States supported by the device. + + Return + ------ + list[Pstates] + A list of supported P-States for the device. """ return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)] @@ -723,7 +728,7 @@ cdef class Device: @property def repair_status(self) -> RepairStatus: """ - Get the repair status for TPC/Channel repair. + :obj:`~_device.RepairStatus` object with TPC/Channel repair status. For Ampere™ or newer fully supported devices. """ @@ -736,7 +741,7 @@ cdef class Device: @property def temperature(self) -> Temperature: """ - Get information about temperatures on a device. + :obj:`~_device.Temperature` object with temperature information for the device. """ return Temperature(self._handle) @@ -822,46 +827,27 @@ def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex) -> __all__ = [ "AddressingMode", "AffinityScope", - "BAR1MemoryInfo", "BrandType", "ClockId", - "ClockInfo", - "ClockOffsets", "ClocksEventReasons", "ClockType", "CoolerControl", - "CoolerInfo", "CoolerTarget", "Device", "DeviceArch", - "DeviceAttributes", - "DeviceEvents", - "EventData", "EventType", "FanControlPolicy", - "FanInfo", "FieldId", - "FieldValue", - "FieldValues", "get_p2p_status", "get_topology_common_ancestor", - "GpuDynamicPstatesInfo", - "GpuDynamicPstatesUtilization", "GpuP2PCapsIndex", "GpuP2PStatus", "GpuTopologyLevel", - "InforomInfo", "InforomObject", - "MemoryInfo", "PcieUtilCounter", - "PciInfo", "Pstates", - "RepairStatus", - "Temperature", "TemperatureSensors", "TemperatureThresholds", "ThermalController", - "ThermalSensor", - "ThermalSettings", "ThermalTarget", ] diff --git a/cuda_core/cuda/core/system/_system_events.pyx b/cuda_core/cuda/core/system/_system_events.pyx index d8a64b619b..81f69d872a 100644 --- a/cuda_core/cuda/core/system/_system_events.pyx +++ b/cuda_core/cuda/core/system/_system_events.pyx @@ -26,7 +26,7 @@ cdef class SystemEvent: @property def event_type(self) -> SystemEventType: """ - The type of event that was triggered. + The :obj:`~SystemEventType` that was triggered. """ return SystemEventType(self._event_data.event_type) @@ -40,7 +40,7 @@ cdef class SystemEvent: @property def device(self) -> _device.Device: """ - The device associated with this event. + The :obj:`~_device.Device` associated with this event. """ return _device.Device(pci_bus_id=self.gpu_id) @@ -56,6 +56,9 @@ cdef class SystemEvents: return len(self._event_data) def __getitem__(self, idx: int) -> SystemEvent: + """ + Get the :obj:`~_system_events.SystemEvent` at the specified index. + """ return SystemEvent(self._event_data[idx]) @@ -107,6 +110,12 @@ cdef class RegisteredSystemEvents: buffer_size: int The maximum number of events to retrieve. Must be at least 1. + Returns + ------- + :obj:`~_system_events.SystemEvents` + A set of events that were received. The number of events returned may + be less than the specified buffer size if fewer events were available. + Raises ------ :class:`cuda.core.system.TimeoutError` @@ -142,9 +151,9 @@ def register_events(events: SystemEventType | int | list[SystemEventType | int]) Returns ------- - :class:`RegisteredSystemEvents` + :obj:`~_system_events.RegisteredSystemEvents` An object representing the registered events. Call - :meth:`RegisteredSystemEvents.wait` on this object to wait for events. + :meth:`~_system_events.RegisteredSystemEvents.wait` on this object to wait for events. Raises ------ @@ -156,8 +165,5 @@ def register_events(events: SystemEventType | int | list[SystemEventType | int]) __all__ = [ "register_events", - "RegisteredSystemEvents", - "SystemEvent", - "SystemEvents", "SystemEventType", ] diff --git a/cuda_core/cuda/core/system/_temperature.pxi b/cuda_core/cuda/core/system/_temperature.pxi index c56eb719d1..8f8e10a570 100644 --- a/cuda_core/cuda/core/system/_temperature.pxi +++ b/cuda_core/cuda/core/system/_temperature.pxi @@ -140,7 +140,7 @@ cdef class Temperature: Returns ------- - :class:`ThermalSettings` + :obj:`~_device.ThermalSettings` The thermal settings for the specified sensor. """ return ThermalSettings(nvml.device_get_thermal_settings(self._handle, sensor_index)) diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index 005866ddb2..8bd3638da0 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -199,9 +199,6 @@ Events :toctree: generated/ system.register_events - system.RegisteredSystemEvents - system.SystemEvent - system.SystemEvents system.SystemEventType Enums @@ -215,6 +212,7 @@ Enums system.BrandType system.ClockId system.ClocksEventReasons + system.ClockType system.CoolerControl system.CoolerTarget system.DeviceArch @@ -238,29 +236,6 @@ Types :template: autosummary/cyclass.rst system.Device - system.BAR1MemoryInfo - system.ClockInfo - system.ClockOffsets - system.ClockType - system.CoolerInfo - system.DeviceAttributes - system.DeviceEvents - system.EventData - system.FanInfo - system.FieldValue - system.FieldValues - system.GpuDynamicPstatesInfo - system.GpuDynamicPstatesUtilization - system.GpuP2PCapsIndex - system.GpuP2PStatus - system.GpuTopologyLevel - system.InforomInfo - system.MemoryInfo - system.PciInfo - system.RepairStatus - system.Temperature - system.ThermalSensor - system.ThermalSettings .. module:: cuda.core.utils diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index becd1746cc..de3e6bf77f 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -51,3 +51,36 @@ CUDA protocols :template: protocol.rst typing.IsStreamT + +NVML +---- + +.. autosummary:: + :toctree: generated/ + :template: autosummary/cyclass.rst + + system._device.BAR1MemoryInfo + system._device.ClockInfo + system._device.ClockOffsets + system._device.CoolerInfo + system._device.DeviceAttributes + system._device.DeviceEvents + system._device.EventData + system._device.FanInfo + system._device.FieldValue + system._device.FieldValues + system._device.GpuDynamicPstatesInfo + system._device.GpuDynamicPstatesUtilization + system._device.GpuP2PCapsIndex + system._device.GpuP2PStatus + system._device.GpuTopologyLevel + system._device.InforomInfo + system._device.MemoryInfo + system._device.PciInfo + system._device.RepairStatus + system._device.Temperature + system._device.ThermalSensor + system._device.ThermalSettings + system._system_events.RegisteredSystemEvents + system._system_events.SystemEvent + system._system_events.SystemEvents diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index 2a094d8211..85a541018d 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -83,7 +83,7 @@ def test_device_bar1_memory(): bar1_memory_info.used, ) - assert isinstance(bar1_memory_info, system.BAR1MemoryInfo) + assert isinstance(bar1_memory_info, _device.BAR1MemoryInfo) assert isinstance(free, int) assert isinstance(total, int) assert isinstance(used, int) @@ -140,7 +140,7 @@ def test_device_memory(): memory_info = device.memory_info free, total, used, reserved = memory_info.free, memory_info.total, memory_info.used, memory_info.reserved - assert isinstance(memory_info, system.MemoryInfo) + assert isinstance(memory_info, _device.MemoryInfo) assert isinstance(free, int) assert isinstance(total, int) assert isinstance(used, int) @@ -163,7 +163,7 @@ def test_device_name(): def test_device_pci_info(): for device in system.Device.get_all_devices(): pci_info = device.pci_info - assert isinstance(pci_info, system.PciInfo) + assert isinstance(pci_info, _device.PciInfo) assert isinstance(pci_info.bus_id, str) assert re.match("[a-f0-9]{8}:[a-f0-9]{2}:[a-f0-9]{2}.[a-f0-9]", pci_info.bus_id.lower()) @@ -317,7 +317,7 @@ def test_device_attributes(): # that's not the case. with unsupported_before(device, None): attributes = device.attributes - assert isinstance(attributes, system.DeviceAttributes) + assert isinstance(attributes, _device.DeviceAttributes) assert isinstance(attributes.multiprocessor_count, int) assert attributes.multiprocessor_count > 0 @@ -371,7 +371,7 @@ def test_field_values(): with pytest.raises(TypeError): field_values["invalid_index"] - assert isinstance(field_values, system.FieldValues) + assert isinstance(field_values, _device.FieldValues) assert len(field_values) == len(field_ids) raw_values = field_values.get_all_values() @@ -453,7 +453,7 @@ def test_repair_status(): # this seems to also work on some TURING systems. with unsupported_before(device, None): repair_status = device.repair_status - assert isinstance(repair_status, system.RepairStatus) + assert isinstance(repair_status, _device.RepairStatus) assert isinstance(repair_status.channel_repair_pending, bool) assert isinstance(repair_status.tpc_repair_pending, bool) @@ -557,7 +557,7 @@ def test_clock(): for device in system.Device.get_all_devices(): for clock_type in system.ClockType: clock = device.clock(clock_type) - assert isinstance(clock, system.ClockInfo) + assert isinstance(clock, _device.ClockInfo) # These are ordered from oldest API to newest API so we test as much # as we can on each hardware architecture. @@ -589,7 +589,7 @@ def test_clock(): except (system.InvalidArgumentError, system.NotFoundError): pass else: - assert isinstance(offsets, system.ClockOffsets) + assert isinstance(offsets, _device.ClockOffsets) assert isinstance(offsets.clock_offset_mhz, int) assert isinstance(offsets.max_offset_mhz, int) assert isinstance(offsets.min_offset_mhz, int) @@ -622,7 +622,7 @@ def test_fan(): for fan_idx in range(device.num_fans): fan_info = device.fan(fan_idx) - assert isinstance(fan_info, system.FanInfo) + assert isinstance(fan_info, _device.FanInfo) speed = fan_info.speed assert isinstance(speed, int) @@ -663,7 +663,7 @@ def test_cooler(): with unsupported_before(device, DeviceArch.MAXWELL): cooler_info = device.cooler - assert isinstance(cooler_info, system.CoolerInfo) + assert isinstance(cooler_info, _device.CoolerInfo) signal_type = cooler_info.signal_type assert isinstance(signal_type, system.CoolerControl) @@ -675,7 +675,7 @@ def test_cooler(): def test_temperature(): for device in system.Device.get_all_devices(): temperature = device.temperature - assert isinstance(temperature, system.Temperature) + assert isinstance(temperature, _device.Temperature) sensor = temperature.sensor() assert isinstance(sensor, int) @@ -696,10 +696,10 @@ def test_temperature(): with unsupported_before(device, None): thermals = temperature.thermal_settings(system.ThermalTarget.ALL) - assert isinstance(thermals, system.ThermalSettings) + assert isinstance(thermals, _device.ThermalSettings) for i, sensor in enumerate(thermals): - assert isinstance(sensor, system.ThermalSensor) + assert isinstance(sensor, _device.ThermalSensor) assert isinstance(sensor.target, system.ThermalTarget) assert isinstance(sensor.controller, system.ThermalController) assert isinstance(sensor.default_min_temp, int) @@ -720,7 +720,7 @@ def test_pstates(): assert all(isinstance(p, system.Pstates) for p in pstates) dynamic_pstates_info = device.dynamic_pstates_info - assert isinstance(dynamic_pstates_info, system.GpuDynamicPstatesInfo) + assert isinstance(dynamic_pstates_info, _device.GpuDynamicPstatesInfo) assert len(dynamic_pstates_info) == nvml.MAX_GPU_UTILIZATIONS