New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Split off the base64 utilities into a jose-b64
crate
#3
Conversation
8d9afbf
to
231cf5c
Compare
Here's an <T as jose_b64::codec::Codec>::e2d:
Lfunc_begin7:
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
push rbx
.cfi_offset rbx, -24
mov ecx, esi
Ltmp39:
movzx eax, cl
Ltmp40:
lea r8, [rip + l___unnamed_7]
movzx eax, byte ptr [rax + r8]
cmp eax, 255
je LBB7_6
Ltmp41:
mov ebx, esi
shr ebx, 8
Ltmp42:
movzx ebx, bl
Ltmp43:
movzx ebx, byte ptr [rbx + r8]
cmp ebx, 255
Ltmp44:
je LBB7_8
Ltmp45:
shr esi, 16
Ltmp46:
movzx esi, sil
Ltmp47:
movzx esi, byte ptr [rsi + r8]
cmp esi, 255
Ltmp48:
je LBB7_9
Ltmp49:
shr rcx, 24
Ltmp50:
mov cl, byte ptr [rcx + r8]
Ltmp51:
cmp cl, -1
Ltmp52:
je LBB7_5
Ltmp53:
shl eax, 18
shl ebx, 12
or eax, ebx
shl esi, 6
or ebx, esi
Ltmp54:
shr eax, 16
Ltmp55:
or cl, sil
mov byte ptr [rdi + 1], al
mov byte ptr [rdi + 2], bh
mov byte ptr [rdi + 3], cl
xor eax, eax
Ltmp56:
mov byte ptr [rdi], al
pop rbx
pop rbp
ret
Ltmp57:
LBB7_8:
inc rdx
Ltmp58:
jmp LBB7_6
Ltmp59:
LBB7_9:
add rdx, 2
Ltmp60:
jmp LBB7_6
Ltmp61:
LBB7_5:
add rdx, 3
Ltmp62:
LBB7_6:
mov qword ptr [rdi + 8], rdx
mov al, 1
mov byte ptr [rdi], al
pop rbx
pop rbp
ret
Ltmp63:
Lfunc_end7:
.cfi_endproc
<T as jose_b64::codec::Codec>::d2e:
Lfunc_begin6:
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
Ltmp34:
shl edi, 8
Ltmp35:
bswap edi
Ltmp36:
mov eax, edi
shr eax, 18
lea rcx, [rip + l___unnamed_6]
movzx edx, byte ptr [rax + rcx]
mov eax, edi
shr eax, 12
and eax, 63
movzx eax, byte ptr [rax + rcx]
mov esi, edi
and esi, 63
shr edi, 6
and edi, 63
movzx edi, byte ptr [rdi + rcx]
Ltmp37:
movzx ecx, byte ptr [rsi + rcx]
shl ecx, 24
shl edi, 16
shl eax, 8
or eax, edx
or eax, edi
or eax, ecx
pop rbp
ret
Ltmp38:
Lfunc_end6:
.cfi_endproc |
Likewise, here's an aarch64 ASM dump: <T as jose_b64::codec::Codec>::e2d:
Lfunc_begin7:
.cfi_startproc
mov w8, w1
Ltmp101:
and x9, x8, #0xff
Ltmp102:
Lloh4:
adrp x10, l___unnamed_7@PAGE
Lloh5:
add x10, x10, l___unnamed_7@PAGEOFF
ldrb w9, [x10, x9]
cmp w9, #255
b.eq LBB7_5
Ltmp103:
ubfx x11, x1, #8, #8
Ltmp104:
ldrb w11, [x10, x11]
cmp w11, #255
Ltmp105:
b.eq LBB7_6
Ltmp106:
ubfx x12, x1, #16, #8
Ltmp107:
ldrb w12, [x10, x12]
cmp w12, #255
Ltmp108:
b.eq LBB7_7
Ltmp109:
lsr x8, x8, #24
Ltmp110:
ldrb w10, [x10, x8]
cmp w10, #255
Ltmp111:
b.ne LBB7_8
Ltmp112:
add x8, x2, #3
Ltmp113:
str x8, [x0, #8]
mov w8, #1
Ltmp114:
strb w8, [x0]
ret
Ltmp115:
LBB7_5:
str x2, [x0, #8]
mov w8, #1
Ltmp116:
strb w8, [x0]
ret
Ltmp117:
LBB7_6:
add x8, x2, #1
Ltmp118:
str x8, [x0, #8]
mov w8, #1
Ltmp119:
strb w8, [x0]
ret
Ltmp120:
LBB7_7:
add x8, x2, #2
Ltmp121:
str x8, [x0, #8]
mov w8, #1
Ltmp122:
strb w8, [x0]
ret
Ltmp123:
LBB7_8:
lsl w11, w11, #12
orr w9, w11, w9, lsl #18
lsl w12, w12, #6
orr w11, w12, w11
Ltmp124:
lsr w9, w9, #16
lsr w11, w11, #8
strb w9, [x0, #1]
orr w9, w10, w12
strb w11, [x0, #2]
strb w9, [x0, #3]
Ltmp125:
strb wzr, [x0]
ret
Ltmp126:
.loh AdrpAdd Lloh4, Lloh5
Lfunc_end7:
.cfi_endproc
<T as jose_b64::codec::Codec>::d2e:
Lfunc_begin6:
.cfi_startproc
lsl w8, w0, #8
Ltmp95:
rev w8, w8
Ltmp96:
lsr x9, x8, #18
Lloh2:
adrp x10, l___unnamed_6@PAGE
Lloh3:
add x10, x10, l___unnamed_6@PAGEOFF
ldrb w0, [x10, x9]
Ltmp97:
ubfx x9, x8, #12, #6
ldrb w9, [x10, x9]
and x11, x8, #0x3f
ubfx x8, x8, #6, #6
Ltmp98:
ldrb w8, [x10, x8]
Ltmp99:
ldrb w10, [x10, x11]
bfi w0, w9, #8, #8
bfi w0, w8, #16, #8
bfi w0, w10, #24, #8
ret
Ltmp100:
.loh AdrpAdd Lloh2, Lloh3
Lfunc_end6:
.cfi_endproc |
The only other place we could branch on the contents is during |
By this, I take it you mean streaming the output of the encoder?
Augmenting it to support streaming outputs rather than a contiguous slice is probably not too difficult. |
@npmccallum regarding constant-time operation, the main difference between something like this implementation and
LUTs use data-dependent memory access and are not expected to operate in constant time on any architecture and generally prone to microarchitectural sidechannels (e.g. cache timing attacks) |
@tarcieri You are, of course, correct that memory access isn't constant time. Complete brain fart on my side. I have now replaced the core base64 functionality with |
f882963
to
a81dca9
Compare
MERGE THIS FIRST: #5 |
Signed-off-by: Nathaniel McCallum <nathaniel@profian.com>
Signed-off-by: Nathaniel McCallum <nathaniel@profian.com>
Additionally, move the types into logical modules for better organization. Signed-off-by: Nathaniel McCallum <nathaniel@profian.com>
The b64 still provides a variety of types useful for JOSE implementations. However, the actual base64 encoding is now done by base64ct. This means that we hopefully won't leak key values via timing attacks. One notable exception is for the streaming Encoder/Decoder. We still use the base64ct implementation here. However, we currently report value errors on every block rather than accumulating the error flag until the end of the stream. This ensures that we don't pass on invalid blocks to the inner stream. Otherwise, this encoder should still be constant time. Signed-off-by: Nathaniel McCallum <nathaniel@profian.com>
|
||
/// A serde wrapper for base64-encoded bytes. | ||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] | ||
pub struct Bytes<T = Box<[u8]>, C = UrlSafe> { | ||
pub struct Bytes<T = Box<[u8]>, E = Base64UrlUnpadded> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the use of E
to avoid code duplication for different base64 encodings 👍 Good idea
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is an idea that emerged out of necessity. :) Specifically, the alternate encoding is required by the x5c
parameter which requires standard Base64. See: https://www.rfc-editor.org/rfc/rfc7515#section-4.1.6
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I know. We just hide this implementation detail: https://github.com/minkan-chat/jose/blob/jws-refactor/src/header.rs#L234-L253
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { | ||
f.debug_tuple("Bytes").field(&self.buf).finish() | ||
} | ||
} | ||
|
||
impl<T, C> Deref for Bytes<T, C> { | ||
impl<T, E> Deref for Bytes<T, E> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm kind of torn with the Deref
and DerefMut
implementation, look at the std docs:
[...] Deref should only be implemented for smart pointers to avoid confusion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the only question is how broadly we interpret smart pointer
. For example, Zeroizing
uses Deref
while not being a smart pointer in the strictest sense. In the looser sense, Bytes
is a smart pointer in the same way Zeroizing
is (i.e. a wrapper around a contiguous collection of bytes).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I'm not sure if it is alright or if it isn't. We might have to ask some other rust folks. For example, we have a similar thing in our implementation: https://github.com/minkan-chat/jose/blob/jws-refactor/src/base64_url.rs#L75
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW, I'd consider both this application and Zeroizing
to be "smart pointer" cases.
To me a smart pointer is a special container that is generic around some contained data it owns and mediates access to. Having a special Drop
impl is definitely one of the intended use cases.
FYI: I'll try to go through this over the weekend |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks OK as a wrapper around base64ct
.
- I would still suggest trying to upstream
StreamingDecoder
/StreamingEncoder
types tobase64ct
. - I'm still unclear what streaming use cases you aim to support with this but assume they exist.
- All of the rest of the RustCrypto projects have migrated away from
mod.rs
to use 2018 edition module style. I would suggest using that if only for consistency. - We have the
serdect
crate for optional constant-time Base64 decoding/encoding withbase64ct
, although I'm not sure it fulfills the same use cases. - The
zero
module seems a little weird but fine as an internal implementation detail
👍 If we can merge this PR with the goal of making that change, it would give you time to review the higher level patches while I work on porting this in parallel.
JOSE supports detached signing / encryption. This is commonly used for things like signing OS images where the signature itself is JSON but the payload sits as binary on the disk (and may be GiBs in size). Streaming signature creation/verification is the only way this scales.
I will comply. Nevertheless, the proposed reasoning for this in Rust change is that editors can align the module directories with the module files. However, to my knowledge, no editors currently do this. Instead, they all align the module directories at the top and the module files at the bottom. In my experience, this style has never caught on broadly in the Rust ecosystem because of this problem.
This only supports hex, not base64(url).
What's weird about it? It is just an internal implementation detail as a way to make |
Please focus on reviewing the ENTIRE new crate; not just what appears in the diff. This is the first time this code has gone through a RustCrypto review.
This contains an independent implementation of Base64 from
base64ct
. One particular focus of this new crate is the ability to stream base64 encoding. This is essential for the streamed signing/verification in the future JWS crate.The
Bytes
type is the most commonly used type in this crate besides the codec types. It decodes bytes from a base64 string in serde.The
Secret
type is similar toBytes
, but adds additional invariants for secret handling.The
Json
type is used forJWS
(andJWE
in the future). It is for nested JSON.The
Optional
type is used to make encoding runtime optional. This is important for theb64
header value inJWS
.The
Update
trait is useful for implementing streaming in downstream crates. For example, it isn't only used for base64, but also for signing and verification.MERGE THIS FIRST: #5
@tarcieri @CBenoit @Erik1000