Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpufeatures: check OS register support #919

Merged
merged 5 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions cpufeatures/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.2.8 (2023-06-15)
### Fixed
- Check OS register support on x86 targets ([#919])

[#919]: https://github.com/RustCrypto/utils/issues/919

## 0.2.7 (2023-04-20)
### Added
- Support freestanding/UEFI `x86` targets ([#821])
Expand Down
2 changes: 1 addition & 1 deletion cpufeatures/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cpufeatures"
version = "0.2.7"
version = "0.2.8"
description = """
Lightweight runtime CPU feature detection for x86/x86_64 and aarch64 with
no_std support and support for mobile targets including Android and iOS
Expand Down
119 changes: 75 additions & 44 deletions cpufeatures/src/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@
//! Portable, `no_std`-friendly implementation that relies on the x86 `CPUID`
//! instruction for feature detection.

// Evaluate the given `$body` expression any of the supplied target features
// are not enabled. Otherwise returns true.
//
// The `$body` expression is not evaluated on SGX targets, and returns false
// on these targets unless *all* supplied target features are enabled.
/// Evaluate the given `$body` expression any of the supplied target features
/// are not enabled. Otherwise returns true.
///
/// The `$body` expression is not evaluated on SGX targets, and returns false
/// on these targets unless *all* supplied target features are enabled.
#[macro_export]
#[doc(hidden)]
macro_rules! __unless_target_features {
($($tf:tt),+ => $body:expr ) => {{
#[cfg(not(all($(target_feature=$tf,)*)))]
{
#[cfg(not(any(target_env = "sgx", target_os = "none", target_os = "uefi")))]
#[cfg(not(any(target_env = "sgx", target_os = "", target_os = "uefi")))]
$body

// CPUID is not available on SGX. Freestanding and UEFI targets
// do not support SIMD features with default compilation flags.
#[cfg(any(target_env = "sgx", target_os = "none", target_os = "uefi"))]
#[cfg(any(target_env = "sgx", target_os = "", target_os = "uefi"))]
false
}

Expand All @@ -28,7 +28,7 @@ macro_rules! __unless_target_features {
}};
}

// Use CPUID to detect the presence of all supplied target features.
/// Use CPUID to detect the presence of all supplied target features.
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
Expand Down Expand Up @@ -61,54 +61,85 @@ macro_rules! __detect_target_features {
}};
}

/// Check that OS supports required SIMD registers
#[macro_export]
#[doc(hidden)]
macro_rules! __xgetbv {
($cr:expr, $mask:expr) => {{
#[cfg(target_arch = "x86")]
use core::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as arch;

// Check bits 26 and 27
let xmask = 0b11 << 26;
let xsave = $cr[0].ecx & xmask == xmask;
if xsave {
let xcr0 = unsafe { arch::_xgetbv(arch::_XCR_XFEATURE_ENABLED_MASK) };
(xcr0 & $mask) == $mask
} else {
false
}
}};
}

macro_rules! __expand_check_macro {
($(($name:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => {
($(($name:tt, $reg_cap:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => {
#[macro_export]
#[doc(hidden)]
macro_rules! check {
$(
($cr:expr, $name) => {
true
($cr:expr, $name) => {{
// Register bits are listed here:
// https://wiki.osdev.org/CPU_Registers_x86#Extended_Control_Registers
let reg_cap = match $reg_cap {
// Bit 1
"xmm" => $crate::__xgetbv!($cr, 0b10),
// Bits 1 and 2
"ymm" => $crate::__xgetbv!($cr, 0b110),
// Bits 1, 2, 5, 6, and 7
"zmm" => $crate::__xgetbv!($cr, 0b1110_0110),
_ => true,
};
reg_cap
$(
& ($cr[$i].$reg & (1 << $offset) != 0)
)*
};
}};
)*
}
};
}

// Note that according to the [Intel manual][0] AVX2 and FMA require
// that we check availability of AVX before using them.
//
// [0]: https://www.intel.com/content/dam/develop/external/us/en/documents/36945
__expand_check_macro! {
("mmx", 0, edx, 23),
("sse", 0, edx, 25),
("sse2", 0, edx, 26),
("sse3", 0, ecx, 0),
("pclmulqdq", 0, ecx, 1),
("ssse3", 0, ecx, 9),
("fma", 0, ecx, 28, 0, ecx, 12),
("sse4.1", 0, ecx, 19),
("sse4.2", 0, ecx, 20),
("popcnt", 0, ecx, 23),
("aes", 0, ecx, 25),
("avx", 0, ecx, 28),
("rdrand", 0, ecx, 30),
("sgx", 1, ebx, 2),
("bmi1", 1, ebx, 3),
("avx2", 0, ecx, 28, 1, ebx, 5),
("bmi2", 1, ebx, 8),
("avx512f", 1, ebx, 16),
("avx512dq", 1, ebx, 17),
("rdseed", 1, ebx, 18),
("adx", 1, ebx, 19),
("avx512ifma", 1, ebx, 21),
("avx512pf", 1, ebx, 26),
("avx512er", 1, ebx, 27),
("avx512cd", 1, ebx, 28),
("sha", 1, ebx, 29),
("avx512bw", 1, ebx, 30),
("avx512vl", 1, ebx, 31),
("sse3", "xmm", 0, ecx, 0),
newpavlov marked this conversation as resolved.
Show resolved Hide resolved
("pclmulqdq", "xmm", 0, ecx, 1),
("ssse3", "xmm", 0, ecx, 9),
("fma", "xmm", 0, ecx, 12, 0, ecx, 28),
("sse4.1", "xmm", 0, ecx, 19),
("sse4.2", "xmm", 0, ecx, 20),
("popcnt", "", 0, ecx, 23),
("aes", "xmm", 0, ecx, 25),
("avx", "xmm", 0, ecx, 28),
("rdrand", "", 0, ecx, 30),

("mmx", "", 0, edx, 23),
("sse", "xmm", 0, edx, 25),
("sse2", "xmm", 0, edx, 26),

("sgx", "", 1, ebx, 2),
("bmi1", "", 1, ebx, 3),
("bmi2", "", 1, ebx, 8),
("avx2", "ymm", 1, ebx, 5, 0, ecx, 28),
("avx512f", "zmm", 1, ebx, 16),
("avx512dq", "zmm", 1, ebx, 17),
("rdseed", "", 1, ebx, 18),
("adx", "", 1, ebx, 19),
("avx512ifma", "zmm", 1, ebx, 21),
("avx512pf", "zmm", 1, ebx, 26),
("avx512er", "zmm", 1, ebx, 27),
("avx512cd", "zmm", 1, ebx, 28),
("sha", "xmm", 1, ebx, 29),
("avx512bw", "zmm", 1, ebx, 30),
("avx512vl", "zmm", 1, ebx, 31),
}
Loading