Skip to content

Commit

Permalink
Add Intel SHA1 extension support
Browse files Browse the repository at this point in the history
  • Loading branch information
noloader committed Dec 1, 2016
1 parent 6970ef7 commit 7ab9b00
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 30 deletions.
7 changes: 3 additions & 4 deletions config.compat
Expand Up @@ -502,11 +502,10 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 0
#endif

// AVX2 in MSC 18.00
#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_AVX) && !defined(_M_ARM) && ((_MSC_VER >= 1600) || (defined(__RDRND__) || defined(__RDSEED__) || defined(__AVX__)))
#define CRYPTOPP_BOOL_AVX_AVAILABLE 1
#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_SHA) && !defined(_M_ARM) && ((_MSC_VER >= 1900) || (CRYPTOPP_GCC_VERSION >= 50000) || defined(__SHA__))
#define CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_AVX_AVAILABLE 0
#define CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 0
#endif

// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
Expand Down
7 changes: 3 additions & 4 deletions config.h
Expand Up @@ -502,11 +502,10 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 0
#endif

// AVX2 in MSC 18.00
#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_AVX) && !defined(_M_ARM) && ((_MSC_VER >= 1600) || (defined(__RDRND__) || defined(__RDSEED__) || defined(__AVX__)))
#define CRYPTOPP_BOOL_AVX_AVAILABLE 1
#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_SHA) && !defined(_M_ARM) && ((_MSC_VER >= 1900) || (CRYPTOPP_GCC_VERSION >= 50000) || defined(__SHA__))
#define CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_AVX_AVAILABLE 0
#define CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 0
#endif

// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
Expand Down
9 changes: 3 additions & 6 deletions cpu.h
Expand Up @@ -47,12 +47,9 @@
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
# include <wmmintrin.h> // aesenc, aesdec, etc
#endif // wmmintrin.h
#if CRYPTOPP_BOOL_AVX_INTRINSICS_AVAILABLE
# include <immintrin.h> // RDRAND, RDSEED and AVX
#endif
#if CRYPTOPP_BOOL_AVX2_INTRINSICS_AVAILABLE
# include <zmmintrin.h> // AVX 512-bit extensions
#endif
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
# include <immintrin.h> // RDRAND, RDSEED, AVX, SHA
#endif // immintrin.h
#endif // X86/X64/X32 Headers

// Applies to both X86/X32/X64 and ARM32/ARM64. And we've got MIPS devices on the way.
Expand Down
237 changes: 223 additions & 14 deletions sha.cpp
@@ -1,7 +1,7 @@
// sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c

// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2.
// Both are in the public domain.
// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
// implemented Intel SHA extensions. All are in the public domain.

// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code

Expand Down Expand Up @@ -29,20 +29,13 @@

NAMESPACE_BEGIN(CryptoPP)

// start of Steve Reid's code
////////////////////////////////
// start of Steve Reid's code //
////////////////////////////////

#define blk0(i) (W[i] = data[i])
#define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))

void SHA1::InitState(HashWordType *state)
{
state[0] = 0x67452301L;
state[1] = 0xEFCDAB89L;
state[2] = 0x98BADCFEL;
state[3] = 0x10325476L;
state[4] = 0xC3D2E1F0L;
}

#define f1(x,y,z) (z^(x&(y^z)))
#define f2(x,y,z) (x^y^z)
#define f3(x,y,z) ((x&y)|(z&(x|y)))
Expand All @@ -55,7 +48,7 @@ void SHA1::InitState(HashWordType *state)
#define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
#define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);

void SHA1::Transform(word32 *state, const word32 *data)
static void SHA1_CXX_Transform(word32 *state, const word32 *data)
{
word32 W[16];
/* Copy context->state[] to working vars */
Expand Down Expand Up @@ -93,7 +86,223 @@ void SHA1::Transform(word32 *state, const word32 *data)
state[4] += e;
}

// end of Steve Reid's code
//////////////////////////////
// end of Steve Reid's code //
//////////////////////////////

#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
static void SHA1_SHAEXT_Transform(word32 *state, const word32 *data)
{
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
__m128i MASK, MSG0, MSG1, MSG2, MSG3;

word32 T[16];
ByteReverse(T, data, 64);

// Load initial values
ABCD = _mm_loadu_si128((__m128i*) state);
E0 = _mm_set_epi32(state[4], 0, 0, 0);
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
MASK = _mm_set_epi64x(W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f));

// Save current hash
ABCD_SAVE = ABCD;
E0_SAVE = E0;

// Rounds 0-3
MSG0 = _mm_loadu_si128((__m128i*) T+0);
MSG0 = _mm_shuffle_epi8(MSG0, MASK);
E0 = _mm_add_epi32(E0, MSG0);
E1 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);

// Rounds 4-7
MSG1 = _mm_loadu_si128((__m128i*) (T+4));
MSG1 = _mm_shuffle_epi8(MSG1, MASK);
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);

// Rounds 8-11
MSG2 = _mm_loadu_si128((__m128i*) (T+8));
MSG2 = _mm_shuffle_epi8(MSG2, MASK);
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);

// Rounds 12-15
MSG3 = _mm_loadu_si128((__m128i*) (T+12));
MSG3 = _mm_shuffle_epi8(MSG3, MASK);
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);

// Rounds 16-19
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);

// Rounds 20-23
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
MSG3 = _mm_xor_si128(MSG3, MSG1);

// Rounds 24-27
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);

// Rounds 28-31
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);

// Rounds 32-35
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);

// Rounds 36-39
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
MSG3 = _mm_xor_si128(MSG3, MSG1);

// Rounds 40-43
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);

// Rounds 44-47
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);

// Rounds 48-51
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);

// Rounds 52-55
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
MSG3 = _mm_xor_si128(MSG3, MSG1);

// Rounds 56-59
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);

// Rounds 60-63
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);

// Rounds 64-67
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);

// Rounds 68-71
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
MSG3 = _mm_xor_si128(MSG3, MSG1);

// Rounds 72-75
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);

// Rounds 76-79
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);

// Add values back to state
E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);

// Save state
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
_mm_storeu_si128((__m128i*) state, ABCD);
*(state+4) = _mm_extract_epi32(E0, 3);
}
#endif

typedef void (*pfnSHA1Transform)(word32 *state, const word32 *data);

pfnSHA1Transform InitializeSHA1Transform()
{
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA())
return &SHA1_SHAEXT_Transform;
else
#endif

return &SHA1_CXX_Transform;
}

void SHA1::InitState(HashWordType *state)
{
state[0] = 0x67452301L;
state[1] = 0xEFCDAB89L;
state[2] = 0x98BADCFEL;
state[3] = 0x10325476L;
state[4] = 0xC3D2E1F0L;
}

void SHA1::Transform(word32 *state, const word32 *data)
{
static const pfnSHA1Transform s_pfn = InitializeSHA1Transform();
s_pfn(state, data);
}

// *************************************************************

Expand Down
3 changes: 1 addition & 2 deletions sha.h
@@ -1,7 +1,6 @@
// sha.h - written and placed in the public domain by Wei Dai

//! \file
//! \headerfile sha.h
//! \file sha.h
//! \brief Classes for SHA-1 and SHA-2 family of message digests

#ifndef CRYPTOPP_SHA_H
Expand Down

1 comment on commit 7ab9b00

@noloader
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also see Issue 139.

Please sign in to comment.