Skip to content
Browse files

libmpa: Implement Montgomery ladder

The mpa_exp_mod() function implements a LtoR algorithm. The LtoR
algorithm is sensitive to timing attacks since it leaks information
about the exponent since it's doing a different amount of work in each
loop when doing the modular exponentiation. It will always do a square
in each loop, but it will also do an additional multiply when the
exponent bit k=1.

This patch implements the Montgomery ladder (and thereby replaces the
old LtoR implementation), which always does the same amount of
operations in each loop and thereby make it more robust to timing

Fixes: OP-TEE-2016-0002 which was reported by Applus+ Laboratories.

Signed-off-by: Joakim Bech <>
Acked-by: Jerome Forissier <>
Acked-by: Jens Wiklander <>
Acked-by: Etienne Carriere <>
Tested-by: Jerome Forissier <> (HiKey, GP)
Tested-by: Etienne Carriere <> (b2260, GP)
  • Loading branch information
jbech-linaro authored and jforissier committed Jun 19, 2017
1 parent 93b0a70 commit 40b1b281a6f85f8658be749dc92b57d6a8bd5e78
Showing with 59 additions and 24 deletions.
  1. +59 −24 lib/libmpa/mpa_expmod.c
@@ -26,60 +26,95 @@
#include "mpa.h"

#define swp(a, b) do { \
mpanum *tmp = *a; \
*a = *b; \
*b = tmp; \
} while (0)

* mpa_exp_mod
* Calculates dest = op1 ^ op2 mod n
* This function uses the Montgomery ladder concept as proposed by Marc Joye and
* Sun-Ming Yen, which makes the function more resistant to timing attacks.
void mpa_exp_mod(mpanum dest,
const mpanum op1,
const mpanum op2,
const mpanum n,
const mpanum r_modn,
const mpanum r2_modn,
const mpa_word_t n_inv, mpa_scratch_mem pool)
const mpanum op1,
const mpanum op2,
const mpanum n,
const mpanum r_modn,
const mpanum r2_modn,
const mpa_word_t n_inv, mpa_scratch_mem pool)
mpanum A;
mpanum B;
mpanum tmp_a;
mpanum xtilde;
mpanum tmp_xtilde;
mpanum *ptr_a;
mpanum *ptr_b;
mpanum *swapper;
mpanum *ptr_tmp_a;
mpanum *ptr_xtilde;
mpanum *ptr_tmp_xtilde;
int idx;

mpa_alloc_static_temp_var(&A, pool);
mpa_alloc_static_temp_var(&B, pool);
mpa_alloc_static_temp_var(&tmp_a, pool);
mpa_alloc_static_temp_var(&xtilde, pool);
mpa_alloc_static_temp_var(&tmp_xtilde, pool);

/* transform to Montgomery space */
/* use internal version since xtidle is big enough */
* Transform the base (op1) into Montgomery space. Use internal version
* since xtilde is big enough.
__mpa_montgomery_mul(xtilde, op1, r2_modn, n, n_inv);

mpa_copy(A, r_modn);

ptr_a = &A;
ptr_b = &B;
ptr_tmp_a = &tmp_a;
ptr_xtilde = &xtilde;
ptr_tmp_xtilde = &tmp_xtilde;


for (idx = mpa_highest_bit_index(op2); idx >= 0; idx--) {
__mpa_montgomery_mul(*ptr_b, *ptr_a, *ptr_a, n, n_inv);
if (mpa_get_bit(op2, idx) == 1) {
__mpa_montgomery_mul(*ptr_a, *ptr_b, xtilde, n, n_inv);
if (mpa_get_bit(op2, idx) == 0) {
/* x' = A*x' */
__mpa_montgomery_mul(*ptr_tmp_xtilde, *ptr_a,
*ptr_xtilde, n, n_inv);

/* A = A^2 */
__mpa_montgomery_mul(*ptr_tmp_a, *ptr_a, *ptr_a, n,
} else {
swapper = ptr_a;
ptr_a = ptr_b;
ptr_b = swapper;
/* A = A*x' */
__mpa_montgomery_mul(*ptr_tmp_a, *ptr_a, *ptr_xtilde, n,

/* x' = x'^2 */
__mpa_montgomery_mul(*ptr_tmp_xtilde, *ptr_xtilde,
*ptr_xtilde, n, n_inv);

* The simple reason for swapping here is to avoid copy
* intermediate results, instead we're just moving the pointers.
swp(&ptr_tmp_a, &ptr_a);
swp(&ptr_tmp_xtilde, &ptr_xtilde);

/* transform back form Montgomery space */
__mpa_montgomery_mul(*ptr_b, (const mpanum)&const_one, *ptr_a,
/* Transform back from Montgomery space */
__mpa_montgomery_mul(*ptr_tmp_a, (const mpanum)&const_one, *ptr_a,
n, n_inv);

mpa_copy(dest, *ptr_b);
mpa_copy(dest, *ptr_tmp_a);

mpa_free_static_temp_var(&A, pool);
mpa_free_static_temp_var(&B, pool);
mpa_free_static_temp_var(&tmp_a, pool);
mpa_free_static_temp_var(&xtilde, pool);
mpa_free_static_temp_var(&tmp_xtilde, pool);

0 comments on commit 40b1b28

Please sign in to comment.