Skip to content

Commit

Permalink
cpufeatures: check OS register support (#919)
Browse files Browse the repository at this point in the history
  • Loading branch information
newpavlov committed Jun 15, 2023
1 parent 6ad2abf commit 95219ac
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 46 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions cpufeatures/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.2.8 (2023-06-15)
### Fixed
- Check OS register support on x86 targets ([#919])

[#919]: https://github.com/RustCrypto/utils/issues/919

## 0.2.7 (2023-04-20)
### Added
- Support freestanding/UEFI `x86` targets ([#821])
Expand Down
2 changes: 1 addition & 1 deletion cpufeatures/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cpufeatures"
version = "0.2.7"
version = "0.2.8"
description = """
Lightweight runtime CPU feature detection for x86/x86_64 and aarch64 with
no_std support and support for mobile targets including Android and iOS
Expand Down
119 changes: 75 additions & 44 deletions cpufeatures/src/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@
//! Portable, `no_std`-friendly implementation that relies on the x86 `CPUID`
//! instruction for feature detection.

// Evaluate the given `$body` expression any of the supplied target features
// are not enabled. Otherwise returns true.
//
// The `$body` expression is not evaluated on SGX targets, and returns false
// on these targets unless *all* supplied target features are enabled.
/// Evaluate the given `$body` expression any of the supplied target features
/// are not enabled. Otherwise returns true.
///
/// The `$body` expression is not evaluated on SGX targets, and returns false
/// on these targets unless *all* supplied target features are enabled.
#[macro_export]
#[doc(hidden)]
macro_rules! __unless_target_features {
($($tf:tt),+ => $body:expr ) => {{
#[cfg(not(all($(target_feature=$tf,)*)))]
{
#[cfg(not(any(target_env = "sgx", target_os = "none", target_os = "uefi")))]
#[cfg(not(any(target_env = "sgx", target_os = "", target_os = "uefi")))]
$body

// CPUID is not available on SGX. Freestanding and UEFI targets
// do not support SIMD features with default compilation flags.
#[cfg(any(target_env = "sgx", target_os = "none", target_os = "uefi"))]
#[cfg(any(target_env = "sgx", target_os = "", target_os = "uefi"))]
false
}

Expand All @@ -28,7 +28,7 @@ macro_rules! __unless_target_features {
}};
}

// Use CPUID to detect the presence of all supplied target features.
/// Use CPUID to detect the presence of all supplied target features.
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
Expand Down Expand Up @@ -61,54 +61,85 @@ macro_rules! __detect_target_features {
}};
}

/// Check that OS supports required SIMD registers
#[macro_export]
#[doc(hidden)]
macro_rules! __xgetbv {
($cr:expr, $mask:expr) => {{
#[cfg(target_arch = "x86")]
use core::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as arch;

// Check bits 26 and 27
let xmask = 0b11 << 26;
let xsave = $cr[0].ecx & xmask == xmask;
if xsave {
let xcr0 = unsafe { arch::_xgetbv(arch::_XCR_XFEATURE_ENABLED_MASK) };
(xcr0 & $mask) == $mask
} else {
false
}
}};
}

macro_rules! __expand_check_macro {
($(($name:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => {
($(($name:tt, $reg_cap:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => {
#[macro_export]
#[doc(hidden)]
macro_rules! check {
$(
($cr:expr, $name) => {
true
($cr:expr, $name) => {{
// Register bits are listed here:
// https://wiki.osdev.org/CPU_Registers_x86#Extended_Control_Registers
let reg_cap = match $reg_cap {
// Bit 1
"xmm" => $crate::__xgetbv!($cr, 0b10),
// Bits 1 and 2
"ymm" => $crate::__xgetbv!($cr, 0b110),
// Bits 1, 2, 5, 6, and 7
"zmm" => $crate::__xgetbv!($cr, 0b1110_0110),
_ => true,
};
reg_cap
$(
& ($cr[$i].$reg & (1 << $offset) != 0)
)*
};
}};
)*
}
};
}

// Note that according to the [Intel manual][0] AVX2 and FMA require
// that we check availability of AVX before using them.
//
// [0]: https://www.intel.com/content/dam/develop/external/us/en/documents/36945
__expand_check_macro! {
("mmx", 0, edx, 23),
("sse", 0, edx, 25),
("sse2", 0, edx, 26),
("sse3", 0, ecx, 0),
("pclmulqdq", 0, ecx, 1),
("ssse3", 0, ecx, 9),
("fma", 0, ecx, 28, 0, ecx, 12),
("sse4.1", 0, ecx, 19),
("sse4.2", 0, ecx, 20),
("popcnt", 0, ecx, 23),
("aes", 0, ecx, 25),
("avx", 0, ecx, 28),
("rdrand", 0, ecx, 30),
("sgx", 1, ebx, 2),
("bmi1", 1, ebx, 3),
("avx2", 0, ecx, 28, 1, ebx, 5),
("bmi2", 1, ebx, 8),
("avx512f", 1, ebx, 16),
("avx512dq", 1, ebx, 17),
("rdseed", 1, ebx, 18),
("adx", 1, ebx, 19),
("avx512ifma", 1, ebx, 21),
("avx512pf", 1, ebx, 26),
("avx512er", 1, ebx, 27),
("avx512cd", 1, ebx, 28),
("sha", 1, ebx, 29),
("avx512bw", 1, ebx, 30),
("avx512vl", 1, ebx, 31),
("sse3", "xmm", 0, ecx, 0),
("pclmulqdq", "xmm", 0, ecx, 1),
("ssse3", "xmm", 0, ecx, 9),
("fma", "xmm", 0, ecx, 12, 0, ecx, 28),
("sse4.1", "xmm", 0, ecx, 19),
("sse4.2", "xmm", 0, ecx, 20),
("popcnt", "", 0, ecx, 23),
("aes", "xmm", 0, ecx, 25),
("avx", "xmm", 0, ecx, 28),
("rdrand", "", 0, ecx, 30),

("mmx", "", 0, edx, 23),
("sse", "xmm", 0, edx, 25),
("sse2", "xmm", 0, edx, 26),

("sgx", "", 1, ebx, 2),
("bmi1", "", 1, ebx, 3),
("bmi2", "", 1, ebx, 8),
("avx2", "ymm", 1, ebx, 5, 0, ecx, 28),
("avx512f", "zmm", 1, ebx, 16),
("avx512dq", "zmm", 1, ebx, 17),
("rdseed", "", 1, ebx, 18),
("adx", "", 1, ebx, 19),
("avx512ifma", "zmm", 1, ebx, 21),
("avx512pf", "zmm", 1, ebx, 26),
("avx512er", "zmm", 1, ebx, 27),
("avx512cd", "zmm", 1, ebx, 28),
("sha", "xmm", 1, ebx, 29),
("avx512bw", "zmm", 1, ebx, 30),
("avx512vl", "zmm", 1, ebx, 31),
}

0 comments on commit 95219ac

Please sign in to comment.