Skip to content

Commit

Permalink
Replace some as casts with .cast()
Browse files Browse the repository at this point in the history
  • Loading branch information
newpavlov committed Jan 10, 2024
1 parent 7ca3d97 commit 596f639
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 32 deletions.
2 changes: 1 addition & 1 deletion jh/benches/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ macro_rules! mach_bench {
input: *const [u8; 64],
) {
for _ in 0..160 {
jh::f8_impl(m, state, input as *const _);
jh::f8_impl(m, state, input.cast());
}
}
b.iter(|| unsafe { runner(m, &mut state, &input) });
Expand Down
2 changes: 1 addition & 1 deletion jh/src/compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ union X2Bytes<M: Machine> {
#[doc(hidden)]
pub fn f8_impl<M: Machine>(mach: M, state: &mut [vec128_storage; 8], data: *const u8) {
#[allow(clippy::cast_ptr_alignment)]
let data = data as *const M::u128x1;
let data: *const M::u128x1 = data.cast();
let mut y = X8::<M>(
mach.unpack(state[0]),
mach.unpack(state[1]),
Expand Down
18 changes: 5 additions & 13 deletions sha1/src/compress/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,12 @@ unsafe fn digest_blocks(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
#[allow(non_snake_case)]
let MASK: __m128i = _mm_set_epi64x(0x0001_0203_0405_0607, 0x0809_0A0B_0C0D_0E0F);

let mut state_abcd = _mm_set_epi32(
state[0] as i32,
state[1] as i32,
state[2] as i32,
state[3] as i32,
);
let mut state_abcd = _mm_loadu_si128(state.as_ptr().cast());
state_abcd = _mm_shuffle_epi32(state_abcd, 0b00011011);
let mut state_e = _mm_set_epi32(state[4] as i32, 0, 0, 0);

for block in blocks {
// SAFETY: we use only unaligned loads with this pointer
#[allow(clippy::cast_ptr_alignment)]
let block_ptr = block.as_ptr() as *const __m128i;
let block_ptr: *const __m128i = block.as_ptr().cast();

let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(0)), MASK);
let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(1)), MASK);
Expand Down Expand Up @@ -90,10 +84,8 @@ unsafe fn digest_blocks(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
state_e = _mm_sha1nexte_epu32(h1, state_e);
}

state[0] = _mm_extract_epi32(state_abcd, 3) as u32;
state[1] = _mm_extract_epi32(state_abcd, 2) as u32;
state[2] = _mm_extract_epi32(state_abcd, 1) as u32;
state[3] = _mm_extract_epi32(state_abcd, 0) as u32;
state_abcd = _mm_shuffle_epi32(state_abcd, 0b00011011);
_mm_storeu_si128(state.as_mut_ptr().cast(), state_abcd);
state[4] = _mm_extract_epi32(state_e, 3) as u32;
}

Expand Down
14 changes: 7 additions & 7 deletions sha2/src/sha256/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
0x0405_0607_0001_0203u64 as i64,
);

let state_ptr = state.as_ptr() as *const __m128i;
let state_ptr: *const __m128i = state.as_ptr().cast();
let dcba = _mm_loadu_si128(state_ptr.add(0));
let efgh = _mm_loadu_si128(state_ptr.add(1));

Expand All @@ -59,11 +59,11 @@ unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
let abef_save = abef;
let cdgh_save = cdgh;

let data_ptr = block.as_ptr() as *const __m128i;
let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(0)), MASK);
let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(1)), MASK);
let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(2)), MASK);
let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(3)), MASK);
let block_ptr: *const __m128i = block.as_ptr().cast();
let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(0)), MASK);
let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(1)), MASK);
let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(2)), MASK);
let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(3)), MASK);
let mut w4;

rounds4!(abef, cdgh, w0, 0);
Expand Down Expand Up @@ -92,7 +92,7 @@ unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
let hgef = _mm_alignr_epi8(dchg, feba, 8);

let state_ptr_mut = state.as_mut_ptr() as *mut __m128i;
let state_ptr_mut: *mut __m128i = state.as_mut_ptr().cast();
_mm_storeu_si128(state_ptr_mut.add(0), dcba);
_mm_storeu_si128(state_ptr_mut.add(1), hgef);
}
Expand Down
20 changes: 10 additions & 10 deletions sha2/src/sha512/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
let mut x = [_mm256_setzero_si256(); 8];

for i in (start_block..blocks.len()).step_by(2) {
load_data_avx2(&mut x, &mut ms, &mut t2, blocks.as_ptr().add(i) as *const _);
load_data_avx2(&mut x, &mut ms, &mut t2, blocks.as_ptr().add(i).cast());

// First block
let mut current_state = *state;
Expand All @@ -61,7 +61,7 @@ unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {

// Reduced to single iteration
let mut current_state = *state;
load_data_avx(&mut x, &mut ms, block.as_ptr() as *const _);
load_data_avx(&mut x, &mut ms, block.as_ptr().cast());
rounds_0_63_avx(&mut current_state, &mut x, &mut ms);
rounds_64_79(&mut current_state, &ms);
accumulate_state(state, &current_state);
Expand All @@ -74,12 +74,12 @@ unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const

macro_rules! unrolled_iterations {
($($i:literal),*) => {$(
x[$i] = _mm_loadu_si128(data.add($i) as *const _);
x[$i] = _mm_loadu_si128(data.add($i).cast());
x[$i] = _mm_shuffle_epi8(x[$i], MASK);

let y = _mm_add_epi64(
x[$i],
_mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _),
_mm_loadu_si128(K64.as_ptr().add(2 * $i).cast()),
);

ms[$i] = y;
Expand All @@ -106,12 +106,12 @@ unsafe fn load_data_avx2(

macro_rules! unrolled_iterations {
($($i:literal),*) => {$(
x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add(8 + $i) as *const _), 1);
x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add($i) as *const _), 0);
x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add(8 + $i).cast()), 1);
x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add($i).cast()), 0);

x[$i] = _mm256_shuffle_epi8(x[$i], MASK);

let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
let t = _mm_loadu_si128(K64.as_ptr().add($i * 2).cast());
let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));

ms[$i] = _mm256_extracti128_si256(y, 0);
Expand All @@ -128,7 +128,7 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &

for _ in 0..4 {
for j in 0..8 {
let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
let k64 = _mm_loadu_si128(K64.as_ptr().add(k64_idx).cast());
let y = sha512_update_x_avx(x, k64);

{
Expand Down Expand Up @@ -338,12 +338,12 @@ fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {

#[inline(always)]
fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] {
unsafe { &*(ms as *const MsgSchedule as *const _) }
unsafe { &*(ms.as_ptr().cast()) }
}

#[inline(always)]
fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] {
unsafe { &*(rs as *const RoundStates as *const _) }
unsafe { &*(rs.as_ptr().cast()) }
}

type State = [u64; SHA512_HASH_WORDS_NUM];
Expand Down

0 comments on commit 596f639

Please sign in to comment.