Skip to content

Commit

Permalink
MDEV-9872: New Power8 crc32(ieee) optimized functions
Browse files Browse the repository at this point in the history
These are different from the existing crc32 functions which
where really crc32c.
  • Loading branch information
grooverdan authored and svoj committed Aug 5, 2016
1 parent e7e313f commit a2c826b
Show file tree
Hide file tree
Showing 12 changed files with 1,038 additions and 152 deletions.
2 changes: 1 addition & 1 deletion extra/crc32-vpmsum/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ENABLE_LANGUAGE(ASM)
ADD_CONVENIENCE_LIBRARY(${CRC32_VPMSUM_LIBRARY} crc32.S crc32_wrapper.c)
ADD_CONVENIENCE_LIBRARY(${CRC32_VPMSUM_LIBRARY} crc32c.S crc32c_wrapper.c crc32ieee.S crc32ieee_wrapper.c)
58 changes: 9 additions & 49 deletions extra/crc32-vpmsum/crc32.S → extra/crc32-vpmsum/crc32.iS
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,8 @@
/* byte reverse permute constant */
.octa 0x0F0E0D0C0B0A09080706050403020100

#define __ASSEMBLY__
#include "crc32_constants.h"

.text

#if defined(__BIG_ENDIAN__) && defined(REFLECT)
#define BYTESWAP_DATA
#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
#define BYTESWAP_DATA
#else
#undef BYTESWAP_DATA
#endif

#define off16 r25
#define off32 r26
#define off48 r27
Expand All @@ -82,7 +71,7 @@
#endif

/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
FUNC_START(__crc32_vpmsum)
FUNC_START(__F)
std r31,-8(r1)
std r30,-16(r1)
std r29,-24(r1)
Expand Down Expand Up @@ -126,19 +115,14 @@ FUNC_START(__crc32_vpmsum)
/* Get the initial value into v8 */
vxor v8,v8,v8
MTVRD(v8, r3)
#ifdef REFLECT

vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
#else
vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
#endif

#ifdef BYTESWAP_DATA
addis r3,r2,.byteswap_constant@toc@ha
addi r3,r3,.byteswap_constant@toc@l

lvx byteswap,0,r3
addi r3,r3,16
#endif

cmpdi r5,256
blt .Lshort
Expand Down Expand Up @@ -170,8 +154,8 @@ FUNC_START(__crc32_vpmsum)
addi r7,r7,-1
mtctr r7

addis r3,r2,.constants@toc@ha
addi r3,r3,.constants@toc@l
addis r3,r2,CONSTANTS@toc@ha
addi r3,r3,CONSTANTS@toc@l

/* Find the start of our constants */
add r3,r3,r8
Expand Down Expand Up @@ -376,7 +360,6 @@ FUNC_START(__crc32_vpmsum)
vxor v6,v6,v14
vxor v7,v7,v15

#ifdef REFLECT
/*
* vpmsumd produces a 96 bit result in the least significant bits
* of the register. Since we are bit reflected we have to shift it
Expand All @@ -391,7 +374,6 @@ FUNC_START(__crc32_vpmsum)
vsldoi v5,v5,zeroes,4
vsldoi v6,v6,zeroes,4
vsldoi v7,v7,zeroes,4
#endif

/* xor with last 1024 bits */
lvx v8,0,r4
Expand Down Expand Up @@ -526,42 +508,21 @@ FUNC_START(__crc32_vpmsum)

.Lbarrett_reduction:
/* Barrett constants */
addis r3,r2,.barrett_constants@toc@ha
addi r3,r3,.barrett_constants@toc@l
addis r3,r2,BARRETT_CONSTANTS@toc@ha
addi r3,r3,BARRETT_CONSTANTS@toc@l

lvx const1,0,r3
lvx const2,off16,r3

vsldoi v1,v0,v0,8
vxor v0,v0,v1 /* xor two 64 bit results together */

#ifdef REFLECT
/* shift left one bit */
vspltisb v1,1
vsl v0,v0,v1
#endif

vand v0,v0,mask_64bit

#ifndef REFLECT
/*
* Now for the Barrett reduction algorithm. The idea is to calculate q,
* the multiple of our polynomial that we need to subtract. By
* doing the computation 2x bits higher (ie 64 bits) and shifting the
* result back down 2x bits, we round down to the nearest multiple.
*/
VPMSUMD(v1,v0,const1) /* ma */
vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
VPMSUMD(v1,v1,const2) /* qn */
vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */

/*
* Get the result into r3. We need to shift it left 8 bytes:
* V0 [ 0 1 2 X ]
* V0 [ 0 X 2 3 ]
*/
vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
#else
/*
* The reflected version of Barrett reduction. Instead of bit
* reflecting our data (which is expensive to do), we bit reflect our
Expand All @@ -582,7 +543,6 @@ FUNC_START(__crc32_vpmsum)
* V0 [ 0 X 2 3 ]
*/
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
#endif

.Lout:
subi r6,r1,56+10*16
Expand Down Expand Up @@ -631,8 +591,8 @@ FUNC_START(__crc32_vpmsum)
cmpdi r5,0
beq .Lzero

addis r3,r2,.short_constants@toc@ha
addi r3,r3,.short_constants@toc@l
addis r3,r2,SHORT_CONSTANTS@toc@ha
addi r3,r3,SHORT_CONSTANTS@toc@l

/* Calculate where in the constant table we need to start */
subfic r6,r5,256
Expand Down Expand Up @@ -770,6 +730,6 @@ FUNC_START(__crc32_vpmsum)
blr
b .Lout

FUNC_END(__crc32_vpmsum)
FUNC_END(__F)

#endif /* __powerpc__ */
Original file line number Diff line number Diff line change
@@ -1,41 +1,27 @@
#ifdef __powerpc__

#define CRC_TABLE
#include "crc32_constants.h"

#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)

#ifdef REFLECT
static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned long len)
{
while (len--)
crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
return crc;
}
#else
static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned long len)
{
while (len--)
crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
return crc;
}
#endif

unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned int __F(unsigned int crc, unsigned char *p,
unsigned long len);

unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned int F(unsigned int crc, unsigned char *p,
unsigned long len)
{
unsigned int prealign;
unsigned int tail;

#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif

if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
crc = crc32_align(crc, p, len);
Expand All @@ -49,7 +35,7 @@ unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
p += prealign;
}

crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
crc = __F(crc, p, len & ~VMX_ALIGN_MASK);

tail = len & VMX_ALIGN_MASK;
if (tail) {
Expand All @@ -58,9 +44,7 @@ unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
}

out:
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif

return crc;
}
Expand Down
14 changes: 14 additions & 0 deletions extra/crc32-vpmsum/crc32c.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifdef __powerpc__

#define CONSTANTS .crc32c_constants
#define SHORT_CONSTANTS .crc32c_short_constants
#define BARRETT_CONSTANTS .crc32c_barrett_constants

#include "crc32c_constants.h"

#define __F __crc32c_vpmsum

#include "crc32.iS"

#endif

Original file line number Diff line number Diff line change
Expand Up @@ -3,83 +3,10 @@

#ifdef __powerpc__


#define CRC 0x1edc6f41
#define CRC_XOR
#define REFLECT

#ifndef __ASSEMBLY__
#ifdef CRC_TABLE
static const unsigned int crc_table[] = {
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};

#endif
#else
#define MAX_SIZE 32768
.constants:
CONSTANTS:

/* Reduce 262144 kbits to 1024 bits */
/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
Expand Down Expand Up @@ -847,7 +774,7 @@ static const unsigned int crc_table[] = {
/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
.octa 0x0000000171fb63ce00000001609bc4b4

.short_constants:
SHORT_CONSTANTS:

/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
/* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
Expand Down Expand Up @@ -899,12 +826,11 @@ static const unsigned int crc_table[] = {
.octa 0x82f63b786ea2d55ca66805eb18b8ea18


.barrett_constants:
BARRETT_CONSTANTS:
/* 33 bit reflected Barrett constant m - (4^32)/n */
.octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
/* 33 bit reflected Barrett constant n */
.octa 0x00000000000000000000000105ec76f1
#endif

#endif /* __powerpc__ */

Expand Down
Loading

0 comments on commit a2c826b

Please sign in to comment.