From ca9534bdcc54aa0ad7494ed7df011cfb347e1bc0 Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Sun, 25 Oct 2020 07:48:14 -0700 Subject: [PATCH] aes-soft: use fixslicing for AES-128/AES-256 encryption Fixslicing is defined for these operations only, but we can replace bitslicing in these capacities with the faster fixslicing approach. This is useful for AES-CTR, which needs only the encryption operation. AES-192, as well as AES-128/AES-256 encryption, still leverage the previous bitsliced implementation. --- aes/aes-soft/benches/aes128.rs | 14 +- aes/aes-soft/benches/aes256.rs | 14 +- aes/aes-soft/src/aes128.rs | 94 ++++++ aes/aes-soft/src/aes192.rs | 98 ++++++ aes/aes-soft/src/aes256.rs | 94 ++++++ aes/aes-soft/src/fixslice.rs | 580 +++++++++++++++------------------ aes/aes-soft/src/impls.rs | 120 ------- aes/aes-soft/src/lib.rs | 15 +- aes/aes-soft/tests/lib.rs | 87 ----- 9 files changed, 553 insertions(+), 563 deletions(-) create mode 100644 aes/aes-soft/src/aes128.rs create mode 100644 aes/aes-soft/src/aes192.rs create mode 100644 aes/aes-soft/src/aes256.rs delete mode 100644 aes/aes-soft/src/impls.rs diff --git a/aes/aes-soft/benches/aes128.rs b/aes/aes-soft/benches/aes128.rs index 8a0beedd..c04b48bb 100644 --- a/aes/aes-soft/benches/aes128.rs +++ b/aes/aes-soft/benches/aes128.rs @@ -2,7 +2,7 @@ extern crate test; use aes_soft::cipher::{BlockCipher, NewBlockCipher}; -use aes_soft::{Aes128, Aes128Fixsliced}; +use aes_soft::Aes128; #[bench] pub fn aes128_encrypt(bh: &mut test::Bencher) { @@ -40,18 +40,6 @@ pub fn aes128_encrypt8(bh: &mut test::Bencher) { bh.bytes = (input[0].len() * input.len()) as u64; } -#[bench] -pub fn aes128_encrypt2_fixsliced(bh: &mut test::Bencher) { - let cipher = Aes128Fixsliced::new(&Default::default()); - let mut input = Default::default(); - - bh.iter(|| { - cipher.encrypt_blocks(&mut input); - test::black_box(&input); - }); - bh.bytes = (input[0].len() * input.len()) as u64; -} - #[bench] pub fn aes128_decrypt8(bh: &mut test::Bencher) { let cipher = Aes128::new(&Default::default()); diff --git a/aes/aes-soft/benches/aes256.rs b/aes/aes-soft/benches/aes256.rs index 5d0c1cae..4235c34e 100644 --- a/aes/aes-soft/benches/aes256.rs +++ b/aes/aes-soft/benches/aes256.rs @@ -2,7 +2,7 @@ extern crate test; use aes_soft::cipher::{BlockCipher, NewBlockCipher}; -use aes_soft::{Aes256, Aes256Fixsliced}; +use aes_soft::Aes256; #[bench] pub fn aes256_encrypt(bh: &mut test::Bencher) { @@ -40,18 +40,6 @@ pub fn aes256_encrypt8(bh: &mut test::Bencher) { bh.bytes = (input[0].len() * input.len()) as u64; } -#[bench] -pub fn aes256_encrypt2_fixsliced(bh: &mut test::Bencher) { - let cipher = Aes256Fixsliced::new(&Default::default()); - let mut input = Default::default(); - - bh.iter(|| { - cipher.encrypt_blocks(&mut input); - test::black_box(&input); - }); - bh.bytes = (input[0].len() * input.len()) as u64; -} - #[bench] pub fn aes256_decrypt8(bh: &mut test::Bencher) { let cipher = Aes256::new(&Default::default()); diff --git a/aes/aes-soft/src/aes128.rs b/aes/aes-soft/src/aes128.rs new file mode 100644 index 00000000..411f1c06 --- /dev/null +++ b/aes/aes-soft/src/aes128.rs @@ -0,0 +1,94 @@ +//! AES-128 + +use cipher::{ + consts::{U11, U16, U8}, + BlockCipher, NewBlockCipher, +}; + +use crate::{ + bitslice::{ + bit_slice_1x128_with_u32x4, bit_slice_1x16_with_u16, bit_slice_4x4_with_u16, + bit_slice_fill_4x4_with_u32x4, decrypt_core, un_bit_slice_1x128_with_u32x4, + un_bit_slice_1x16_with_u16, Bs8State, + }, + consts::U32X4_0, + expand::expand_key, + fixslice::{self, FixsliceKeys128}, + simd::u32x4, + Block, ParBlocks, +}; + +/// AES-128 key +pub type Key = cipher::block::Key; + +/// AES-128 block cipher instance +#[derive(Clone)] +pub struct Aes128 { + enc_keys: FixsliceKeys128, + dec_keys: [Bs8State; 11], + dec_keys8: [Bs8State; 11], +} + +impl NewBlockCipher for Aes128 { + type KeySize = U16; + + #[inline] + fn new(key: &Key) -> Self { + let dk = expand_key::(key).1; + + let k8 = Bs8State( + U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, + ); + + let mut c = Self { + enc_keys: fixslice::aes128_key_schedule(key), + dec_keys: [Bs8State(0, 0, 0, 0, 0, 0, 0, 0); 11], + dec_keys8: [k8; 11], + }; + + for i in 0..11 { + c.dec_keys[i] = bit_slice_4x4_with_u16(dk[i][0], dk[i][1], dk[i][2], dk[i][3]); + c.dec_keys8[i] = bit_slice_fill_4x4_with_u32x4(dk[i][0], dk[i][1], dk[i][2], dk[i][3]); + } + + c + } +} + +impl BlockCipher for Aes128 { + type BlockSize = U16; + type ParBlocks = U8; + + #[inline] + fn encrypt_block(&self, block: &mut Block) { + let mut blocks = [Block::default(); 2]; + blocks[0].copy_from_slice(block); + fixslice::aes128_encrypt(&self.enc_keys, &mut blocks); + block.copy_from_slice(&blocks[0]); + } + + #[inline] + fn decrypt_block(&self, block: &mut Block) { + let mut bs = bit_slice_1x16_with_u16(block); + bs = decrypt_core(&bs, &self.dec_keys); + un_bit_slice_1x16_with_u16(&bs, block); + } + + #[inline] + fn encrypt_blocks(&self, blocks: &mut ParBlocks) { + for chunk in blocks.chunks_mut(2) { + fixslice::aes128_encrypt(&self.enc_keys, chunk); + } + } + + #[inline] + fn decrypt_blocks(&self, blocks: &mut ParBlocks) { + #[allow(unsafe_code)] + let blocks: &mut [u8; 16 * 8] = unsafe { &mut *(blocks as *mut _ as *mut [u8; 128]) }; + let bs = bit_slice_1x128_with_u32x4(blocks); + let bs2 = decrypt_core(&bs, &self.dec_keys8); + un_bit_slice_1x128_with_u32x4(bs2, blocks); + } +} + +opaque_debug::implement!(Aes128); diff --git a/aes/aes-soft/src/aes192.rs b/aes/aes-soft/src/aes192.rs new file mode 100644 index 00000000..70f907cf --- /dev/null +++ b/aes/aes-soft/src/aes192.rs @@ -0,0 +1,98 @@ +//! AES-192 + +use cipher::{ + consts::{U13, U16, U24, U8}, + BlockCipher, NewBlockCipher, +}; + +use crate::{ + bitslice::{ + bit_slice_1x128_with_u32x4, bit_slice_1x16_with_u16, bit_slice_4x4_with_u16, + bit_slice_fill_4x4_with_u32x4, decrypt_core, encrypt_core, un_bit_slice_1x128_with_u32x4, + un_bit_slice_1x16_with_u16, Bs8State, + }, + consts::U32X4_0, + expand::expand_key, + simd::u32x4, + Block, ParBlocks, +}; + +/// AES-192 key +pub type Key = cipher::block::Key; + +/// AES-192 block cipher instance +#[derive(Clone)] +pub struct Aes192 { + enc_keys: [Bs8State; 13], + dec_keys: [Bs8State; 13], + enc_keys8: [Bs8State; 13], + dec_keys8: [Bs8State; 13], +} + +impl NewBlockCipher for Aes192 { + type KeySize = U24; + + #[inline] + fn new(key: &Key) -> Self { + let (ek, dk) = expand_key::(key); + + let k8 = Bs8State( + U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, + ); + + let mut c = Self { + enc_keys: [Bs8State(0, 0, 0, 0, 0, 0, 0, 0); 13], + dec_keys: [Bs8State(0, 0, 0, 0, 0, 0, 0, 0); 13], + enc_keys8: [k8; 13], + dec_keys8: [k8; 13], + }; + + for i in 0..13 { + c.enc_keys[i] = bit_slice_4x4_with_u16(ek[i][0], ek[i][1], ek[i][2], ek[i][3]); + c.dec_keys[i] = bit_slice_4x4_with_u16(dk[i][0], dk[i][1], dk[i][2], dk[i][3]); + c.enc_keys8[i] = bit_slice_fill_4x4_with_u32x4(ek[i][0], ek[i][1], ek[i][2], ek[i][3]); + c.dec_keys8[i] = bit_slice_fill_4x4_with_u32x4(dk[i][0], dk[i][1], dk[i][2], dk[i][3]); + } + + c + } +} + +impl BlockCipher for Aes192 { + type BlockSize = U16; + type ParBlocks = U8; + + #[inline] + fn encrypt_block(&self, block: &mut Block) { + let mut bs = bit_slice_1x16_with_u16(block); + bs = encrypt_core(&bs, &self.enc_keys); + un_bit_slice_1x16_with_u16(&bs, block); + } + + #[inline] + fn decrypt_block(&self, block: &mut Block) { + let mut bs = bit_slice_1x16_with_u16(block); + bs = decrypt_core(&bs, &self.dec_keys); + un_bit_slice_1x16_with_u16(&bs, block); + } + + #[inline] + fn encrypt_blocks(&self, blocks: &mut ParBlocks) { + #[allow(unsafe_code)] + let blocks: &mut [u8; 16 * 8] = unsafe { &mut *(blocks as *mut _ as *mut [u8; 128]) }; + let bs = bit_slice_1x128_with_u32x4(blocks); + let bs2 = encrypt_core(&bs, &self.enc_keys8); + un_bit_slice_1x128_with_u32x4(bs2, blocks); + } + + #[inline] + fn decrypt_blocks(&self, blocks: &mut ParBlocks) { + #[allow(unsafe_code)] + let blocks: &mut [u8; 16 * 8] = unsafe { &mut *(blocks as *mut _ as *mut [u8; 128]) }; + let bs = bit_slice_1x128_with_u32x4(blocks); + let bs2 = decrypt_core(&bs, &self.dec_keys8); + un_bit_slice_1x128_with_u32x4(bs2, blocks); + } +} + +opaque_debug::implement!(Aes192); diff --git a/aes/aes-soft/src/aes256.rs b/aes/aes-soft/src/aes256.rs new file mode 100644 index 00000000..163bf7ff --- /dev/null +++ b/aes/aes-soft/src/aes256.rs @@ -0,0 +1,94 @@ +//! AES-256 + +use cipher::{ + consts::{U15, U16, U32, U8}, + BlockCipher, NewBlockCipher, +}; + +use crate::{ + bitslice::{ + bit_slice_1x128_with_u32x4, bit_slice_1x16_with_u16, bit_slice_4x4_with_u16, + bit_slice_fill_4x4_with_u32x4, decrypt_core, un_bit_slice_1x128_with_u32x4, + un_bit_slice_1x16_with_u16, Bs8State, + }, + consts::U32X4_0, + expand::expand_key, + fixslice::{self, FixsliceKeys256}, + simd::u32x4, + Block, ParBlocks, +}; + +/// AES-256 key +pub type Key = cipher::block::Key; + +/// AES-256 block cipher instance +#[derive(Clone)] +pub struct Aes256 { + enc_keys: FixsliceKeys256, + dec_keys: [Bs8State; 15], + dec_keys8: [Bs8State; 15], +} + +impl NewBlockCipher for Aes256 { + type KeySize = U32; + + #[inline] + fn new(key: &Key) -> Self { + let dk = expand_key::(key).1; + + let k8 = Bs8State( + U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, U32X4_0, + ); + + let mut c = Self { + enc_keys: fixslice::aes256_key_schedule(key), + dec_keys: [Bs8State(0, 0, 0, 0, 0, 0, 0, 0); 15], + dec_keys8: [k8; 15], + }; + + for i in 0..15 { + c.dec_keys[i] = bit_slice_4x4_with_u16(dk[i][0], dk[i][1], dk[i][2], dk[i][3]); + c.dec_keys8[i] = bit_slice_fill_4x4_with_u32x4(dk[i][0], dk[i][1], dk[i][2], dk[i][3]); + } + + c + } +} + +impl BlockCipher for Aes256 { + type BlockSize = U16; + type ParBlocks = U8; + + #[inline] + fn encrypt_block(&self, block: &mut Block) { + let mut blocks = [Block::default(); 2]; + blocks[0].copy_from_slice(block); + fixslice::aes256_encrypt(&self.enc_keys, &mut blocks); + block.copy_from_slice(&blocks[0]); + } + + #[inline] + fn decrypt_block(&self, block: &mut Block) { + let mut bs = bit_slice_1x16_with_u16(block); + bs = decrypt_core(&bs, &self.dec_keys); + un_bit_slice_1x16_with_u16(&bs, block); + } + + #[inline] + fn encrypt_blocks(&self, blocks: &mut ParBlocks) { + for chunk in blocks.chunks_mut(2) { + fixslice::aes256_encrypt(&self.enc_keys, chunk); + } + } + + #[inline] + fn decrypt_blocks(&self, blocks: &mut ParBlocks) { + #[allow(unsafe_code)] + let blocks: &mut [u8; 16 * 8] = unsafe { &mut *(blocks as *mut _ as *mut [u8; 128]) }; + let bs = bit_slice_1x128_with_u32x4(blocks); + let bs2 = decrypt_core(&bs, &self.dec_keys8); + un_bit_slice_1x128_with_u32x4(bs2, blocks); + } +} + +opaque_debug::implement!(Aes256); diff --git a/aes/aes-soft/src/fixslice.rs b/aes/aes-soft/src/fixslice.rs index 96732a1a..3d53b760 100644 --- a/aes/aes-soft/src/fixslice.rs +++ b/aes/aes-soft/src/fixslice.rs @@ -13,363 +13,292 @@ //! //! Originally licensed MIT. Relicensed as Apache 2.0+MIT with permission. -use cipher::{ - block::Key, - consts::{U16, U2, U32}, - generic_array::GenericArray, - BlockCipher, NewBlockCipher, -}; +use super::Block; +use crate::{aes128, aes256}; use core::convert::TryInto; -/// 128-bit AES block -// TODO(tarcieri): unify with other `Block` type aliases -type Block = GenericArray; - -/// 2x128-bit AES blocks -// TODO(tarcieri): unify with other `ParBlocks` type aliases -type ParBlocks = GenericArray; - /// AES-128 round keys -type RKeys128 = [u32; 88]; +pub(crate) type FixsliceKeys128 = [u32; 88]; /// AES-256 round keys -type RKeys256 = [u32; 120]; +pub(crate) type FixsliceKeys256 = [u32; 120]; /// 256-bit internal state type State = [u32; 8]; -/// AES-128 with a fully fixsliced implementation -#[derive(Clone)] -pub struct Aes128Fixsliced { - /// Round keys - rkeys: RKeys128, -} - -impl NewBlockCipher for Aes128Fixsliced { - type KeySize = U16; - - #[inline] - fn new(key: &Key) -> Self { - // TODO(tarcieri): use `::default()` after MSRV 1.47+ - let mut rkeys = [0u32; 88]; - - // Pack the keys into the bitsliced state - packing(&mut rkeys[..8], key, key); - memshift32(&mut rkeys, 0); - sbox(&mut rkeys[8..16]); - - rkeys[15] ^= 0x00000300; // 1st rconst - xor_columns(&mut rkeys, 8, 8, 2); // Rotword and XOR between the columns - memshift32(&mut rkeys, 8); - sbox(&mut rkeys[16..24]); - - rkeys[22] ^= 0x00000300; // 2nd rconst - xor_columns(&mut rkeys, 16, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_1(&mut rkeys[8..16]); // to match fixslicing - memshift32(&mut rkeys, 16); - sbox(&mut rkeys[24..32]); - - rkeys[29] ^= 0x00000300; // 3rd rconst - xor_columns(&mut rkeys, 24, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_2(&mut rkeys[16..24]); // to match fixslicing - memshift32(&mut rkeys, 24); - sbox(&mut rkeys[32..40]); - - rkeys[36] ^= 0x00000300; // 4th rconst - xor_columns(&mut rkeys, 32, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_3(&mut rkeys[24..32]); // to match fixslicing - memshift32(&mut rkeys, 32); - sbox(&mut rkeys[40..48]); - - rkeys[43] ^= 0x00000300; // 5th rconst - xor_columns(&mut rkeys, 40, 8, 2); // Rotword and XOR between the columns - memshift32(&mut rkeys, 40); - sbox(&mut rkeys[48..56]); - - rkeys[50] ^= 0x00000300; // 6th rconst - xor_columns(&mut rkeys, 48, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_1(&mut rkeys[40..48]); // to match fixslicing - memshift32(&mut rkeys, 48); - sbox(&mut rkeys[56..64]); - - rkeys[57] ^= 0x00000300; // 7th rconst - xor_columns(&mut rkeys, 56, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_2(&mut rkeys[48..56]); // to match fixslicing - memshift32(&mut rkeys, 56); - sbox(&mut rkeys[64..72]); - - rkeys[64] ^= 0x00000300; // 8th rconst - xor_columns(&mut rkeys, 64, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_3(&mut rkeys[56..64]); // to match fixslicing - memshift32(&mut rkeys, 64); - sbox(&mut rkeys[72..80]); - - rkeys[79] ^= 0x00000300; // 9th rconst - rkeys[78] ^= 0x00000300; // 9th rconst - rkeys[76] ^= 0x00000300; // 9th rconst - rkeys[75] ^= 0x00000300; // 9th rconst - xor_columns(&mut rkeys, 72, 8, 2); // Rotword and XOR between the columns - memshift32(&mut rkeys, 72); - sbox(&mut rkeys[80..]); - - rkeys[86] ^= 0x00000300; // 10th rconst - rkeys[85] ^= 0x00000300; // 10th rconst - rkeys[83] ^= 0x00000300; // 10th rconst - rkeys[82] ^= 0x00000300; // 10th rconst - xor_columns(&mut rkeys, 80, 8, 2); // Rotword and XOR between the columns - inv_shiftrows_1(&mut rkeys[72..80]); - - // Bitwise NOT to speed up SBox calculations - for i in 1..11 { - rkeys[i * 8 + 1] ^= 0xffffffff; - rkeys[i * 8 + 2] ^= 0xffffffff; - rkeys[i * 8 + 6] ^= 0xffffffff; - rkeys[i * 8 + 7] ^= 0xffffffff; - } - - Self { rkeys } +/// Fully bitsliced AES-128 key schedule to match the fully-fixsliced +/// representation. +pub(crate) fn aes128_key_schedule(key: &aes128::Key) -> FixsliceKeys128 { + // TODO(tarcieri): use `::default()` after MSRV 1.47+ + let mut rkeys = [0u32; 88]; + + // Pack the keys into the bitsliced state + packing(&mut rkeys[..8], key, key); + memshift32(&mut rkeys, 0); + sbox(&mut rkeys[8..16]); + + rkeys[15] ^= 0x00000300; // 1st rconst + xor_columns(&mut rkeys, 8, 8, 2); // Rotword and XOR between the columns + memshift32(&mut rkeys, 8); + sbox(&mut rkeys[16..24]); + + rkeys[22] ^= 0x00000300; // 2nd rconst + xor_columns(&mut rkeys, 16, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_1(&mut rkeys[8..16]); // to match fixslicing + memshift32(&mut rkeys, 16); + sbox(&mut rkeys[24..32]); + + rkeys[29] ^= 0x00000300; // 3rd rconst + xor_columns(&mut rkeys, 24, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_2(&mut rkeys[16..24]); // to match fixslicing + memshift32(&mut rkeys, 24); + sbox(&mut rkeys[32..40]); + + rkeys[36] ^= 0x00000300; // 4th rconst + xor_columns(&mut rkeys, 32, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_3(&mut rkeys[24..32]); // to match fixslicing + memshift32(&mut rkeys, 32); + sbox(&mut rkeys[40..48]); + + rkeys[43] ^= 0x00000300; // 5th rconst + xor_columns(&mut rkeys, 40, 8, 2); // Rotword and XOR between the columns + memshift32(&mut rkeys, 40); + sbox(&mut rkeys[48..56]); + + rkeys[50] ^= 0x00000300; // 6th rconst + xor_columns(&mut rkeys, 48, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_1(&mut rkeys[40..48]); // to match fixslicing + memshift32(&mut rkeys, 48); + sbox(&mut rkeys[56..64]); + + rkeys[57] ^= 0x00000300; // 7th rconst + xor_columns(&mut rkeys, 56, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_2(&mut rkeys[48..56]); // to match fixslicing + memshift32(&mut rkeys, 56); + sbox(&mut rkeys[64..72]); + + rkeys[64] ^= 0x00000300; // 8th rconst + xor_columns(&mut rkeys, 64, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_3(&mut rkeys[56..64]); // to match fixslicing + memshift32(&mut rkeys, 64); + sbox(&mut rkeys[72..80]); + + rkeys[79] ^= 0x00000300; // 9th rconst + rkeys[78] ^= 0x00000300; // 9th rconst + rkeys[76] ^= 0x00000300; // 9th rconst + rkeys[75] ^= 0x00000300; // 9th rconst + xor_columns(&mut rkeys, 72, 8, 2); // Rotword and XOR between the columns + memshift32(&mut rkeys, 72); + sbox(&mut rkeys[80..]); + + rkeys[86] ^= 0x00000300; // 10th rconst + rkeys[85] ^= 0x00000300; // 10th rconst + rkeys[83] ^= 0x00000300; // 10th rconst + rkeys[82] ^= 0x00000300; // 10th rconst + xor_columns(&mut rkeys, 80, 8, 2); // Rotword and XOR between the columns + inv_shiftrows_1(&mut rkeys[72..80]); + + // Bitwise NOT to speed up SBox calculations + for i in 1..11 { + rkeys[i * 8 + 1] ^= 0xffffffff; + rkeys[i * 8 + 2] ^= 0xffffffff; + rkeys[i * 8 + 6] ^= 0xffffffff; + rkeys[i * 8 + 7] ^= 0xffffffff; } -} - -impl BlockCipher for Aes128Fixsliced { - type BlockSize = U16; - type ParBlocks = U2; - #[inline] - fn encrypt_block(&self, block: &mut Block) { - let mut blocks = ParBlocks::default(); - blocks[0].copy_from_slice(&block); - self.encrypt_blocks(&mut blocks); - block.copy_from_slice(&blocks[0]); - } + rkeys +} - #[inline] - fn decrypt_block(&self, _block: &mut Block) { - todo!() +/// Fully bitsliced AES-256 key schedule to match the fully-fixsliced +/// representation. +pub(crate) fn aes256_key_schedule(key: &aes256::Key) -> FixsliceKeys256 { + // TODO(tarcieri): use `::default()` after MSRV 1.47+ + let mut rkeys = [0u32; 120]; + + // Pack the keys into the bitsliced state + packing(&mut rkeys[..8], &key[..16], &key[..16]); + packing(&mut rkeys[8..16], &key[16..], &key[16..]); + memshift32(&mut rkeys, 8); + sbox(&mut rkeys[16..24]); + + rkeys[23] ^= 0x00000300; // 1st rconst + xor_columns(&mut rkeys, 16, 16, 2); // Rotword and XOR between the columns + memshift32(&mut rkeys, 16); + sbox(&mut rkeys[24..32]); + xor_columns(&mut rkeys, 24, 16, 26); // XOR between the columns + inv_shiftrows_1(&mut rkeys[8..16]); // to match fixslicing + memshift32(&mut rkeys, 24); + sbox(&mut rkeys[32..40]); + + rkeys[38] ^= 0x00000300; // 2nd rconst + xor_columns(&mut rkeys, 32, 16, 2); // Rotword and XOR between the columns + inv_shiftrows_2(&mut rkeys[16..24]); // to match fixslicing + memshift32(&mut rkeys, 32); + sbox(&mut rkeys[40..48]); + xor_columns(&mut rkeys, 40, 16, 26); // XOR between the columns + inv_shiftrows_3(&mut rkeys[24..32]); // to match fixslicing + memshift32(&mut rkeys, 40); + sbox(&mut rkeys[48..56]); + + rkeys[53] ^= 0x00000300; // 3rd rconst + xor_columns(&mut rkeys, 48, 16, 2); // Rotword and XOR between the columns + memshift32(&mut rkeys, 48); + sbox(&mut rkeys[56..64]); + xor_columns(&mut rkeys, 56, 16, 26); // XOR between the columns + inv_shiftrows_1(&mut rkeys[40..48]); // to match fixslicing + memshift32(&mut rkeys, 56); + sbox(&mut rkeys[64..72]); + + rkeys[68] ^= 0x00000300; // 4th rconst + xor_columns(&mut rkeys, 64, 16, 2); // Rotword and XOR between the columns + inv_shiftrows_2(&mut rkeys[48..56]); // to match fixslicing + memshift32(&mut rkeys, 64); + sbox(&mut rkeys[72..80]); + xor_columns(&mut rkeys, 72, 16, 26); // XOR between the columns + inv_shiftrows_3(&mut rkeys[56..64]); // to match fixslicing + memshift32(&mut rkeys, 72); + sbox(&mut rkeys[80..88]); + + rkeys[83] ^= 0x00000300; // 5th rconst + xor_columns(&mut rkeys, 80, 16, 2); // Rotword and XOR between the columns + memshift32(&mut rkeys, 80); + sbox(&mut rkeys[88..96]); + xor_columns(&mut rkeys, 88, 16, 26); // XOR between the columns + inv_shiftrows_1(&mut rkeys[72..80]); // to match fixslicing + memshift32(&mut rkeys, 88); + sbox(&mut rkeys[96..104]); + + rkeys[98] ^= 0x00000300; // 6th rconst + xor_columns(&mut rkeys, 96, 16, 2); // Rotword and XOR between the columns + inv_shiftrows_2(&mut rkeys[80..88]); // to match fixslicing + memshift32(&mut rkeys, 96); + sbox(&mut rkeys[104..112]); + xor_columns(&mut rkeys, 104, 16, 26); // XOR between the columns + inv_shiftrows_3(&mut rkeys[88..96]); // to match fixslicing + memshift32(&mut rkeys, 104); + sbox(&mut rkeys[112..]); + + rkeys[113] ^= 0x00000300; // 7th rconst + xor_columns(&mut rkeys, 112, 16, 2); // Rotword and XOR between the columns + inv_shiftrows_1(&mut rkeys[104..112]); // to match fixslicing + + // Bitwise NOT to speed up SBox calculations + for i in 1..15 { + rkeys[i * 8 + 1] ^= 0xffffffff; + rkeys[i * 8 + 2] ^= 0xffffffff; + rkeys[i * 8 + 6] ^= 0xffffffff; + rkeys[i * 8 + 7] ^= 0xffffffff; } - #[inline] - fn encrypt_blocks(&self, blocks: &mut ParBlocks) { - let mut state = State::default(); - - // packs into bitsliced representation - packing(&mut state, blocks[0].as_ref(), blocks[1].as_ref()); - ark(&mut state, &self.rkeys[..8]); + rkeys +} - // 1st round - sbox(&mut state); - mixcolumns_0(&mut state); - ark(&mut state, &self.rkeys[8..16]); +/// Fully-fixsliced AES-128 encryption (the ShiftRows is completely omitted). +/// +/// Encrypts two blocks in-place and in parallel. +pub(crate) fn aes128_encrypt(rkeys: &FixsliceKeys128, blocks: &mut [Block]) { + debug_assert_eq!(blocks.len(), 2); + let mut state = State::default(); + + // Pack into bitsliced representation + packing(&mut state, blocks[0].as_ref(), blocks[1].as_ref()); + ark(&mut state, &rkeys[..8]); + + // 1st round + sbox(&mut state); + mixcolumns_0(&mut state); + ark(&mut state, &rkeys[8..16]); + + // 2nd round + sbox(&mut state); + mixcolumns_1(&mut state); + ark(&mut state, &rkeys[16..24]); + + // 3rd round + sbox(&mut state); + mixcolumns_2(&mut state); + ark(&mut state, &rkeys[24..32]); + + // 4th round + sbox(&mut state); + mixcolumns_3(&mut state); + ark(&mut state, &rkeys[32..40]); + + // 5th round + sbox(&mut state); + mixcolumns_0(&mut state); + ark(&mut state, &rkeys[40..48]); + + // 6th round + sbox(&mut state); + mixcolumns_1(&mut state); + ark(&mut state, &rkeys[48..56]); + + // 7th round + sbox(&mut state); + mixcolumns_2(&mut state); + ark(&mut state, &rkeys[56..64]); + + // 8th round + sbox(&mut state); + mixcolumns_3(&mut state); + ark(&mut state, &rkeys[64..72]); + + // 9th round + sbox(&mut state); + mixcolumns_0(&mut state); + ark(&mut state, &rkeys[72..80]); + + // 10th round + sbox(&mut state); + double_shiftrows(&mut state); // resynchronization + ark(&mut state, &rkeys[80..]); + + // Unpack state into output + unpacking(&mut state, blocks); +} - // 2nd round - sbox(&mut state); - mixcolumns_1(&mut state); - ark(&mut state, &self.rkeys[16..24]); +/// Fully-fixsliced AES-256 encryption (the ShiftRows is completely omitted). +/// +/// Encrypts two blocks in-place and in parallel. +pub(crate) fn aes256_encrypt(rkeys: &FixsliceKeys256, blocks: &mut [Block]) { + debug_assert!(blocks.len() == 1 || blocks.len() == 2); - // 3rd round - sbox(&mut state); - mixcolumns_2(&mut state); - ark(&mut state, &self.rkeys[24..32]); + let mut state = State::default(); - // 4th round - sbox(&mut state); - mixcolumns_3(&mut state); - ark(&mut state, &self.rkeys[32..40]); + // Pack into bitsliced representation + packing(&mut state, &blocks[0], &blocks[1]); - // 5th round + // Loop over quadruple rounds + for i in (0..96).step_by(32) { + ark(&mut state, &rkeys[i..(i + 8)]); sbox(&mut state); mixcolumns_0(&mut state); - ark(&mut state, &self.rkeys[40..48]); - // 6th round + ark(&mut state, &rkeys[(i + 8)..(i + 16)]); sbox(&mut state); mixcolumns_1(&mut state); - ark(&mut state, &self.rkeys[48..56]); - // 7th round + ark(&mut state, &rkeys[(i + 16)..(i + 24)]); sbox(&mut state); mixcolumns_2(&mut state); - ark(&mut state, &self.rkeys[56..64]); - // 8th round + ark(&mut state, &rkeys[(i + 24)..(i + 32)]); sbox(&mut state); mixcolumns_3(&mut state); - ark(&mut state, &self.rkeys[64..72]); - - // 9th round - sbox(&mut state); - mixcolumns_0(&mut state); - ark(&mut state, &self.rkeys[72..80]); - - // 10th round - sbox(&mut state); - double_shiftrows(&mut state); // resynchronization - ark(&mut state, &self.rkeys[80..]); - - // Unpack state into output - unpacking(&mut state, blocks); - } - - #[inline] - fn decrypt_blocks(&self, _blocks: &mut ParBlocks) { - todo!() - } -} - -/// AES-256 with a fully fixsliced implementation -#[derive(Clone)] -pub struct Aes256Fixsliced { - /// Round keys - rkeys: RKeys256, -} - -impl NewBlockCipher for Aes256Fixsliced { - type KeySize = U32; - - #[inline] - fn new(key: &Key) -> Self { - // TODO(tarcieri): use `::default()` after MSRV 1.47+ - let mut rkeys = [0u32; 120]; - - // Pack the keys into the bitsliced state - packing(&mut rkeys[..8], &key[..16], &key[..16]); - packing(&mut rkeys[8..16], &key[16..], &key[16..]); - memshift32(&mut rkeys, 8); - sbox(&mut rkeys[16..24]); - - rkeys[23] ^= 0x00000300; // 1st rconst - xor_columns(&mut rkeys, 16, 16, 2); // Rotword and XOR between the columns - memshift32(&mut rkeys, 16); - sbox(&mut rkeys[24..32]); - xor_columns(&mut rkeys, 24, 16, 26); // XOR between the columns - inv_shiftrows_1(&mut rkeys[8..16]); // to match fixslicing - memshift32(&mut rkeys, 24); - sbox(&mut rkeys[32..40]); - - rkeys[38] ^= 0x00000300; // 2nd rconst - xor_columns(&mut rkeys, 32, 16, 2); // Rotword and XOR between the columns - inv_shiftrows_2(&mut rkeys[16..24]); // to match fixslicing - memshift32(&mut rkeys, 32); - sbox(&mut rkeys[40..48]); - xor_columns(&mut rkeys, 40, 16, 26); // XOR between the columns - inv_shiftrows_3(&mut rkeys[24..32]); // to match fixslicing - memshift32(&mut rkeys, 40); - sbox(&mut rkeys[48..56]); - - rkeys[53] ^= 0x00000300; // 3rd rconst - xor_columns(&mut rkeys, 48, 16, 2); // Rotword and XOR between the columns - memshift32(&mut rkeys, 48); - sbox(&mut rkeys[56..64]); - xor_columns(&mut rkeys, 56, 16, 26); // XOR between the columns - inv_shiftrows_1(&mut rkeys[40..48]); // to match fixslicing - memshift32(&mut rkeys, 56); - sbox(&mut rkeys[64..72]); - - rkeys[68] ^= 0x00000300; // 4th rconst - xor_columns(&mut rkeys, 64, 16, 2); // Rotword and XOR between the columns - inv_shiftrows_2(&mut rkeys[48..56]); // to match fixslicing - memshift32(&mut rkeys, 64); - sbox(&mut rkeys[72..80]); - xor_columns(&mut rkeys, 72, 16, 26); // XOR between the columns - inv_shiftrows_3(&mut rkeys[56..64]); // to match fixslicing - memshift32(&mut rkeys, 72); - sbox(&mut rkeys[80..88]); - - rkeys[83] ^= 0x00000300; // 5th rconst - xor_columns(&mut rkeys, 80, 16, 2); // Rotword and XOR between the columns - memshift32(&mut rkeys, 80); - sbox(&mut rkeys[88..96]); - xor_columns(&mut rkeys, 88, 16, 26); // XOR between the columns - inv_shiftrows_1(&mut rkeys[72..80]); // to match fixslicing - memshift32(&mut rkeys, 88); - sbox(&mut rkeys[96..104]); - - rkeys[98] ^= 0x00000300; // 6th rconst - xor_columns(&mut rkeys, 96, 16, 2); // Rotword and XOR between the columns - inv_shiftrows_2(&mut rkeys[80..88]); // to match fixslicing - memshift32(&mut rkeys, 96); - sbox(&mut rkeys[104..112]); - xor_columns(&mut rkeys, 104, 16, 26); // XOR between the columns - inv_shiftrows_3(&mut rkeys[88..96]); // to match fixslicing - memshift32(&mut rkeys, 104); - sbox(&mut rkeys[112..]); - - rkeys[113] ^= 0x00000300; // 7th rconst - xor_columns(&mut rkeys, 112, 16, 2); // Rotword and XOR between the columns - inv_shiftrows_1(&mut rkeys[104..112]); // to match fixslicing - - // Bitwise NOT to speed up SBox calculations - for i in 1..15 { - rkeys[i * 8 + 1] ^= 0xffffffff; - rkeys[i * 8 + 2] ^= 0xffffffff; - rkeys[i * 8 + 6] ^= 0xffffffff; - rkeys[i * 8 + 7] ^= 0xffffffff; - } - - Self { rkeys } - } -} - -impl BlockCipher for Aes256Fixsliced { - type BlockSize = U16; - type ParBlocks = U2; - - #[inline] - fn encrypt_block(&self, block: &mut Block) { - let mut blocks = ParBlocks::default(); - blocks[0].copy_from_slice(&block); - self.encrypt_blocks(&mut blocks); - block.copy_from_slice(&blocks[0]); - } - - #[inline] - fn decrypt_block(&self, _block: &mut Block) { - todo!() } - #[inline] - fn encrypt_blocks(&self, blocks: &mut ParBlocks) { - let mut state = State::default(); - - // Pack into bitsliced representation - packing(&mut state, &blocks[0], &blocks[1]); - - // Loop over quadruple rounds - for i in (0..96).step_by(32) { - ark(&mut state, &self.rkeys[i..(i + 8)]); - sbox(&mut state); - mixcolumns_0(&mut state); - - ark(&mut state, &self.rkeys[(i + 8)..(i + 16)]); - sbox(&mut state); - mixcolumns_1(&mut state); + ark(&mut state, &rkeys[96..104]); + sbox(&mut state); + mixcolumns_0(&mut state); - ark(&mut state, &self.rkeys[(i + 16)..(i + 24)]); - sbox(&mut state); - mixcolumns_2(&mut state); + ark(&mut state, &rkeys[104..112]); + sbox(&mut state); + double_shiftrows(&mut state); // resynchronization + ark(&mut state, &rkeys[112..]); - ark(&mut state, &self.rkeys[(i + 24)..(i + 32)]); - sbox(&mut state); - mixcolumns_3(&mut state); - } - - ark(&mut state, &self.rkeys[96..104]); - sbox(&mut state); - mixcolumns_0(&mut state); - - ark(&mut state, &self.rkeys[104..112]); - sbox(&mut state); - double_shiftrows(&mut state); // resynchronization - ark(&mut state, &self.rkeys[112..]); - - // Unpack state into output - unpacking(&mut state, blocks); - } - - #[inline] - fn decrypt_blocks(&self, _blocks: &mut ParBlocks) { - todo!() - } + // Unpack state into output + unpacking(&mut state, blocks); } /// Bitsliced implementation of the AES Sbox based on Boyar, Peralta and Calik. @@ -817,9 +746,10 @@ fn packing(output: &mut [u32], input0: &[u8], input1: &[u8]) { swapmove!(output[7], output[3], 0x0f0f0f0f, 4); } -/// Unpacks the 256-bit internal state in two 128-bit blocs out0, out1. -fn unpacking(input: &mut [u32], output: &mut ParBlocks) { +/// Unpacks the 256-bit internal state in two 128-bit blocks +fn unpacking(input: &mut [u32], output: &mut [Block]) { debug_assert_eq!(input.len(), 8); + debug_assert_eq!(output.len(), 2); swapmove!(input[4], input[0], 0x0f0f0f0f, 4); swapmove!(input[5], input[1], 0x0f0f0f0f, 4); diff --git a/aes/aes-soft/src/impls.rs b/aes/aes-soft/src/impls.rs deleted file mode 100644 index 89085f01..00000000 --- a/aes/aes-soft/src/impls.rs +++ /dev/null @@ -1,120 +0,0 @@ -pub use cipher::{BlockCipher, NewBlockCipher}; - -use cipher::{ - consts::{U11, U13, U15, U16, U24, U32, U8}, - generic_array::GenericArray, -}; - -use crate::{ - bitslice::{ - bit_slice_1x128_with_u32x4, bit_slice_1x16_with_u16, bit_slice_4x4_with_u16, - bit_slice_fill_4x4_with_u32x4, decrypt_core, encrypt_core, un_bit_slice_1x128_with_u32x4, - un_bit_slice_1x16_with_u16, Bs8State, - }, - consts::U32X4_0, - expand::expand_key, - simd::u32x4, -}; - -pub type Block128 = GenericArray; -pub type Block128x8 = GenericArray, U8>; - -macro_rules! define_aes_impl { - ( - $name:ident, - $key_size:ty, - $rounds:expr, - $rounds2:ty, - $doc:expr - ) => { - #[doc=$doc] - #[derive(Clone)] - pub struct $name { - enc_keys: [Bs8State; $rounds], - dec_keys: [Bs8State; $rounds], - enc_keys8: [Bs8State; $rounds], - dec_keys8: [Bs8State; $rounds], - } - - impl NewBlockCipher for $name { - type KeySize = $key_size; - - #[inline] - fn new(key: &GenericArray) -> Self { - let (ek, dk) = expand_key::<$key_size, $rounds2>(key); - let k8 = Bs8State( - U32X4_0, U32X4_0, U32X4_0, U32X4_0, - U32X4_0, U32X4_0, U32X4_0, U32X4_0 - ); - let mut c = Self { - enc_keys: [Bs8State(0, 0, 0, 0, 0, 0, 0, 0); $rounds], - dec_keys: [Bs8State(0, 0, 0, 0, 0, 0, 0, 0); $rounds], - enc_keys8: [k8; $rounds], - dec_keys8: [k8; $rounds], - }; - for i in 0..$rounds { - c.enc_keys[i] = bit_slice_4x4_with_u16( - ek[i][0], ek[i][1], ek[i][2], ek[i][3], - ); - c.dec_keys[i] = bit_slice_4x4_with_u16( - dk[i][0], dk[i][1], dk[i][2], dk[i][3], - ); - c.enc_keys8[i] = bit_slice_fill_4x4_with_u32x4( - ek[i][0], ek[i][1], ek[i][2], ek[i][3], - ); - c.dec_keys8[i] = bit_slice_fill_4x4_with_u32x4( - dk[i][0], dk[i][1], dk[i][2], dk[i][3], - ); - } - c - } - } - - impl BlockCipher for $name { - type BlockSize = U16; - type ParBlocks = U8; - - #[inline] - fn encrypt_block(&self, block: &mut Block128) { - let mut bs = bit_slice_1x16_with_u16(block); - bs = encrypt_core(&bs, &self.enc_keys); - un_bit_slice_1x16_with_u16(&bs, block); - } - - #[inline] - fn decrypt_block(&self, block: &mut Block128) { - let mut bs = bit_slice_1x16_with_u16(block); - bs = decrypt_core(&bs, &self.dec_keys); - un_bit_slice_1x16_with_u16(&bs, block); - } - - #[inline] - fn encrypt_blocks(&self, blocks: &mut Block128x8) { - #[allow(unsafe_code)] - let blocks: &mut [u8; 16*8] = unsafe { - &mut *(blocks as *mut _ as *mut [u8; 128]) - }; - let bs = bit_slice_1x128_with_u32x4(blocks); - let bs2 = encrypt_core(&bs, &self.enc_keys8); - un_bit_slice_1x128_with_u32x4(bs2, blocks); - } - - #[inline] - fn decrypt_blocks(&self, blocks: &mut Block128x8) { - #[allow(unsafe_code)] - let blocks: &mut [u8; 16*8] = unsafe { - &mut *(blocks as *mut _ as *mut [u8; 128]) - }; - let bs = bit_slice_1x128_with_u32x4(blocks); - let bs2 = decrypt_core(&bs, &self.dec_keys8); - un_bit_slice_1x128_with_u32x4(bs2, blocks); - } - } - - opaque_debug::implement!($name); - } -} - -define_aes_impl!(Aes128, U16, 11, U11, "AES-128 block cipher instance"); -define_aes_impl!(Aes192, U24, 13, U13, "AES-192 block cipher instance"); -define_aes_impl!(Aes256, U32, 15, U15, "AES-256 block cipher instance"); diff --git a/aes/aes-soft/src/lib.rs b/aes/aes-soft/src/lib.rs index 054aee92..25b86bc9 100644 --- a/aes/aes-soft/src/lib.rs +++ b/aes/aes-soft/src/lib.rs @@ -43,14 +43,19 @@ pub use cipher; +pub mod aes128; +pub mod aes192; +pub mod aes256; mod bitslice; mod consts; mod expand; mod fixslice; -mod impls; mod simd; -pub use crate::{ - fixslice::{Aes128Fixsliced, Aes256Fixsliced}, - impls::{Aes128, Aes192, Aes256}, -}; +pub use crate::{aes128::Aes128, aes192::Aes192, aes256::Aes256}; + +/// 128-bit AES block +pub type Block = cipher::generic_array::GenericArray; + +/// 8x128-bit AES blocks to be processed in parallel +pub type ParBlocks = cipher::block::ParBlocks; diff --git a/aes/aes-soft/tests/lib.rs b/aes/aes-soft/tests/lib.rs index 369c73c0..3735a2a6 100644 --- a/aes/aes-soft/tests/lib.rs +++ b/aes/aes-soft/tests/lib.rs @@ -4,90 +4,3 @@ cipher::new_test!(aes128_test, "aes128", aes_soft::Aes128); cipher::new_test!(aes192_test, "aes192", aes_soft::Aes192); cipher::new_test!(aes256_test, "aes256", aes_soft::Aes256); - -macro_rules! new_encrypt_only_test { - ($name:ident, $test_name:expr, $cipher:ty) => { - #[test] - fn $name() { - use cipher::block::{dev::blobby::Blob3Iterator, BlockCipher, NewBlockCipher}; - use cipher::generic_array::{typenum::Unsigned, GenericArray}; - - fn run_test(key: &[u8], pt: &[u8], ct: &[u8]) -> bool { - let state = <$cipher as NewBlockCipher>::new_varkey(key).unwrap(); - - let mut block = GenericArray::clone_from_slice(pt); - state.encrypt_block(&mut block); - if ct != block.as_slice() { - return false; - } - - true - } - - fn run_par_test(key: &[u8], pt: &[u8]) -> bool { - type ParBlocks = <$cipher as BlockCipher>::ParBlocks; - type BlockSize = <$cipher as BlockCipher>::BlockSize; - type Block = GenericArray; - type ParBlock = GenericArray; - - let state = <$cipher as NewBlockCipher>::new_varkey(key).unwrap(); - - let block = Block::clone_from_slice(pt); - let mut blocks1 = ParBlock::default(); - for (i, b) in blocks1.iter_mut().enumerate() { - *b = block; - b[0] = b[0].wrapping_add(i as u8); - } - let mut blocks2 = blocks1.clone(); - - // check that `encrypt_blocks` and `encrypt_block` - // result in the same ciphertext - state.encrypt_blocks(&mut blocks1); - for b in blocks2.iter_mut() { - state.encrypt_block(b); - } - if blocks1 != blocks2 { - return false; - } - - true - } - - let pb = <$cipher as BlockCipher>::ParBlocks::to_usize(); - let data = include_bytes!(concat!("data/", $test_name, ".blb")); - for (i, row) in Blob3Iterator::new(data).unwrap().enumerate() { - let [key, pt, ct] = row.unwrap(); - if !run_test(key, pt, ct) { - panic!( - "\n\ - Failed test №{}\n\ - key:\t{:?}\n\ - plaintext:\t{:?}\n\ - ciphertext:\t{:?}\n", - i, key, pt, ct, - ); - } - - // test parallel blocks encryption/decryption - if pb != 1 { - if !run_par_test(key, pt) { - panic!( - "\n\ - Failed parallel test №{}\n\ - key:\t{:?}\n\ - plaintext:\t{:?}\n\ - ciphertext:\t{:?}\n", - i, key, pt, ct, - ); - } - } - } - // test if cipher can be cloned - let key = Default::default(); - let _ = <$cipher as NewBlockCipher>::new(&key).clone(); - } - }; -} - -new_encrypt_only_test!(aes128_fixsliced_test, "aes128", aes_soft::Aes128Fixsliced); -new_encrypt_only_test!(aes256_fixsliced_test, "aes256", aes_soft::Aes256Fixsliced);