diff --git a/doc/safegcd_implementation.md b/doc/safegcd_implementation.md index efb61b06c..3ae556f9a 100644 --- a/doc/safegcd_implementation.md +++ b/doc/safegcd_implementation.md @@ -244,8 +244,8 @@ def modinv(M, Mi, x): This means that in practice we'll always perform a multiple of *N* divsteps. This is not a problem because once *g=0*, further divsteps do not affect *f*, *g*, *d*, or *e* anymore (only *δ* keeps -increasing). For variable time code such excess iterations will be mostly optimized away in -section 6. +increasing). For variable time code such excess iterations will be mostly optimized away in later +sections. ## 4. Avoiding modulus operations @@ -519,6 +519,20 @@ computation: g >>= 1 ``` +A variant of divsteps with better worst-case performance can be used instead: starting *δ* at +*1/2* instead of *1*. This reduces the worst case number of iterations to *590* for *256*-bit inputs +(which can be shown using convex hull analysis). In this case, the substitution *ζ=-(δ+1/2)* +is used instead to keep the variable integral. Incrementing *δ* by *1* still translates to +decrementing *ζ* by *1*, but negating *δ* now corresponds to going from *ζ* to *-(ζ+1)*, or +*~ζ*. Doing that conditionally based on *c3* is simply: + +```python + ... + c3 = c1 & c2 + zeta ^= c3 + ... +``` + By replacing the loop in `divsteps_n_matrix` with a variant of the divstep code above (extended to also apply all *f* operations to *u*, *v* and all *g* operations to *q*, *r*), a constant-time version of `divsteps_n_matrix` is obtained. The full code will be in section 7. @@ -535,7 +549,8 @@ other cases, it slows down calculations unnecessarily. In this section, we will faster non-constant time `divsteps_n_matrix` function. To do so, first consider yet another way of writing the inner loop of divstep operations in -`gcd` from section 1. This decomposition is also explained in the paper in section 8.2. +`gcd` from section 1. This decomposition is also explained in the paper in section 8.2. We use +the original version with initial *δ=1* and *η=-δ* here. ```python for _ in range(N): @@ -643,24 +658,24 @@ All together we need the following functions: section 5, extended to handle *u*, *v*, *q*, *r*: ```python -def divsteps_n_matrix(eta, f, g): - """Compute eta and transition matrix t after N divsteps (multiplied by 2^N).""" +def divsteps_n_matrix(zeta, f, g): + """Compute zeta and transition matrix t after N divsteps (multiplied by 2^N).""" u, v, q, r = 1, 0, 0, 1 # start with identity matrix for _ in range(N): - c1 = eta >> 63 + c1 = zeta >> 63 # Compute x, y, z as conditionally-negated versions of f, u, v. x, y, z = (f ^ c1) - c1, (u ^ c1) - c1, (v ^ c1) - c1 c2 = -(g & 1) # Conditionally add x, y, z to g, q, r. g, q, r = g + (x & c2), q + (y & c2), r + (z & c2) c1 &= c2 # reusing c1 here for the earlier c3 variable - eta = (eta ^ c1) - (c1 + 1) # inlining the unconditional eta decrement here + zeta = (zeta ^ c1) - 1 # inlining the unconditional zeta decrement here # Conditionally add g, q, r to f, u, v. f, u, v = f + (g & c1), u + (q & c1), v + (r & c1) # When shifting g down, don't shift q, r, as we construct a transition matrix multiplied # by 2^N. Instead, shift f's coefficients u and v up. g, u, v = g >> 1, u << 1, v << 1 - return eta, (u, v, q, r) + return zeta, (u, v, q, r) ``` - The functions to update *f* and *g*, and *d* and *e*, from section 2 and section 4, with the constant-time @@ -702,15 +717,15 @@ def normalize(sign, v, M): return v ``` -- And finally the `modinv` function too, adapted to use *η* instead of *δ*, and using the fixed +- And finally the `modinv` function too, adapted to use *ζ* instead of *δ*, and using the fixed iteration count from section 5: ```python def modinv(M, Mi, x): """Compute the modular inverse of x mod M, given Mi=1/M mod 2^N.""" - eta, f, g, d, e = -1, M, x, 0, 1 - for _ in range((724 + N - 1) // N): - eta, t = divsteps_n_matrix(-eta, f % 2**N, g % 2**N) + zeta, f, g, d, e = -1, M, x, 0, 1 + for _ in range((590 + N - 1) // N): + zeta, t = divsteps_n_matrix(zeta, f % 2**N, g % 2**N) f, g = update_fg(f, g, t) d, e = update_de(d, e, t, M, Mi) return normalize(f, d, M) diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h index aa7988c4b..661c5fc04 100644 --- a/src/modinv32_impl.h +++ b/src/modinv32_impl.h @@ -168,17 +168,17 @@ typedef struct { int32_t u, v, q, r; } secp256k1_modinv32_trans2x2; -/* Compute the transition matrix and eta for 30 divsteps. +/* Compute the transition matrix and zeta for 30 divsteps. * - * Input: eta: initial eta - * f0: bottom limb of initial f - * g0: bottom limb of initial g + * Input: zeta: initial zeta + * f0: bottom limb of initial f + * g0: bottom limb of initial g * Output: t: transition matrix - * Return: final eta + * Return: final zeta * * Implements the divsteps_n_matrix function from the explanation. */ -static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) { +static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) { /* u,v,q,r are the elements of the transformation matrix being built up, * starting with the identity matrix. Semantically they are signed integers * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This @@ -193,8 +193,8 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t VERIFY_CHECK((f & 1) == 1); /* f must always be odd */ VERIFY_CHECK((u * f0 + v * g0) == f << i); VERIFY_CHECK((q * f0 + r * g0) == g << i); - /* Compute conditional masks for (eta < 0) and for (g & 1). */ - c1 = eta >> 31; + /* Compute conditional masks for (zeta < 0) and for (g & 1). */ + c1 = zeta >> 31; c2 = -(g & 1); /* Compute x,y,z, conditionally negated versions of f,u,v. */ x = (f ^ c1) - c1; @@ -204,10 +204,10 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t g += x & c2; q += y & c2; r += z & c2; - /* In what follows, c1 is a condition mask for (eta < 0) and (g & 1). */ + /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */ c1 &= c2; - /* Conditionally negate eta, and unconditionally subtract 1. */ - eta = (eta ^ c1) - (c1 + 1); + /* Conditionally change zeta into -zeta-2 or zeta-1. */ + zeta = (zeta ^ c1) - 1; /* Conditionally add g,q,r to f,u,v. */ f += g & c1; u += q & c1; @@ -216,8 +216,8 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t g >>= 1; u <<= 1; v <<= 1; - /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */ - VERIFY_CHECK(eta >= -751 && eta <= 751); + /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */ + VERIFY_CHECK(zeta >= -601 && zeta <= 601); } /* Return data in t and return value. */ t->u = (int32_t)u; @@ -229,7 +229,7 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t * will be divided out again). As each divstep's individual matrix has determinant 2, the * aggregate of 30 of them will have determinant 2^30. */ VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30); - return eta; + return zeta; } /* Compute the transition matrix and eta for 30 divsteps (variable time). @@ -453,19 +453,19 @@ static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_sign /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */ static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) { - /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */ + /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */ secp256k1_modinv32_signed30 d = {{0}}; secp256k1_modinv32_signed30 e = {{1}}; secp256k1_modinv32_signed30 f = modinfo->modulus; secp256k1_modinv32_signed30 g = *x; int i; - int32_t eta = -1; + int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */ - /* Do 25 iterations of 30 divsteps each = 750 divsteps. 724 suffices for 256-bit inputs. */ - for (i = 0; i < 25; ++i) { - /* Compute transition matrix and new eta after 30 divsteps. */ + /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */ + for (i = 0; i < 20; ++i) { + /* Compute transition matrix and new zeta after 30 divsteps. */ secp256k1_modinv32_trans2x2 t; - eta = secp256k1_modinv32_divsteps_30(eta, f.v[0], g.v[0], &t); + zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t); /* Update d,e using that transition matrix. */ secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo); /* Update f,g using that transition matrix. */ @@ -515,7 +515,7 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256 int i = 0; #endif int j, len = 9; - int32_t eta = -1; + int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */ int32_t cond, fn, gn; /* Do iterations of 30 divsteps each until g=0. */ diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h index 78505fa18..a88184361 100644 --- a/src/modinv64_impl.h +++ b/src/modinv64_impl.h @@ -145,17 +145,17 @@ typedef struct { int64_t u, v, q, r; } secp256k1_modinv64_trans2x2; -/* Compute the transition matrix and eta for 62 divsteps. +/* Compute the transition matrix and zeta for 62 divsteps (where zeta=-(delta+1/2)). * - * Input: eta: initial eta - * f0: bottom limb of initial f - * g0: bottom limb of initial g + * Input: zeta: initial zeta + * f0: bottom limb of initial f + * g0: bottom limb of initial g * Output: t: transition matrix - * Return: final eta + * Return: final zeta * * Implements the divsteps_n_matrix function from the explanation. */ -static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) { +static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) { /* u,v,q,r are the elements of the transformation matrix being built up, * starting with the identity matrix. Semantically they are signed integers * in range [-2^62,2^62], but here represented as unsigned mod 2^64. This @@ -170,8 +170,8 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t VERIFY_CHECK((f & 1) == 1); /* f must always be odd */ VERIFY_CHECK((u * f0 + v * g0) == f << i); VERIFY_CHECK((q * f0 + r * g0) == g << i); - /* Compute conditional masks for (eta < 0) and for (g & 1). */ - c1 = eta >> 63; + /* Compute conditional masks for (zeta < 0) and for (g & 1). */ + c1 = zeta >> 63; c2 = -(g & 1); /* Compute x,y,z, conditionally negated versions of f,u,v. */ x = (f ^ c1) - c1; @@ -181,10 +181,10 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t g += x & c2; q += y & c2; r += z & c2; - /* In what follows, c1 is a condition mask for (eta < 0) and (g & 1). */ + /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */ c1 &= c2; - /* Conditionally negate eta, and unconditionally subtract 1. */ - eta = (eta ^ c1) - (c1 + 1); + /* Conditionally change zeta into -zeta-2 or zeta-1. */ + zeta = (zeta ^ c1) - 1; /* Conditionally add g,q,r to f,u,v. */ f += g & c1; u += q & c1; @@ -193,8 +193,8 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t g >>= 1; u <<= 1; v <<= 1; - /* Bounds on eta that follow from the bounds on iteration count (max 12*62 divsteps). */ - VERIFY_CHECK(eta >= -745 && eta <= 745); + /* Bounds on zeta that follow from the bounds on iteration count (max 10*62 divsteps). */ + VERIFY_CHECK(zeta >= -621 && zeta <= 621); } /* Return data in t and return value. */ t->u = (int64_t)u; @@ -206,10 +206,10 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t * will be divided out again). As each divstep's individual matrix has determinant 2, the * aggregate of 62 of them will have determinant 2^62. */ VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62); - return eta; + return zeta; } -/* Compute the transition matrix and eta for 62 divsteps (variable time). +/* Compute the transition matrix and eta for 62 divsteps (variable time, eta=-delta). * * Input: eta: initial eta * f0: bottom limb of initial f @@ -455,19 +455,19 @@ static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_sign /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */ static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) { - /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */ + /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */ secp256k1_modinv64_signed62 d = {{0, 0, 0, 0, 0}}; secp256k1_modinv64_signed62 e = {{1, 0, 0, 0, 0}}; secp256k1_modinv64_signed62 f = modinfo->modulus; secp256k1_modinv64_signed62 g = *x; int i; - int64_t eta = -1; + int64_t zeta = -1; /* zeta = -(delta+1/2); delta starts at 1/2. */ - /* Do 12 iterations of 62 divsteps each = 744 divsteps. 724 suffices for 256-bit inputs. */ - for (i = 0; i < 12; ++i) { - /* Compute transition matrix and new eta after 62 divsteps. */ + /* Do 10 iterations of 62 divsteps each = 620 divsteps. 590 suffices for 256-bit inputs. */ + for (i = 0; i < 10; ++i) { + /* Compute transition matrix and new zeta after 62 divsteps. */ secp256k1_modinv64_trans2x2 t; - eta = secp256k1_modinv64_divsteps_62(eta, f.v[0], g.v[0], &t); + zeta = secp256k1_modinv64_divsteps_62(zeta, f.v[0], g.v[0], &t); /* Update d,e using that transition matrix. */ secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo); /* Update f,g using that transition matrix. */ @@ -517,7 +517,7 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256 int i = 0; #endif int j, len = 5; - int64_t eta = -1; + int64_t eta = -1; /* eta = -delta; delta is initially 1 */ int64_t cond, fn, gn; /* Do iterations of 62 divsteps each until g=0. */