Skip to content

Commit

Permalink
pem-rfc7468: buffered Base64 decoder (#406)
Browse files Browse the repository at this point in the history
Modifies `pem_rfc7468::Decoder` to be a wrapper around the buffered
`base64ct::Decoder` type.

This allows incrementally decoding PEM, as opposed to a one-shot
approach, however support for one-shot decoding is still preserved as a
set of static functions which previously served as the crate's primary
API, meaning this should not be a breaking change for most current
applications.

Additionally impls `std::io::Read` for `pem_rfc7468::Decoder`,
delegating to the `std::io::Read` implementation added to
`base64ct::Decoder` in #404.
  • Loading branch information
tarcieri committed Feb 9, 2022
1 parent a1f08fd commit cd23518
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 171 deletions.
4 changes: 2 additions & 2 deletions pem-rfc7468/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ rust-version = "1.56"
base64ct = { version = "=1.4.0-pre.0", path = "../base64ct" }

[features]
alloc = []
std = ["alloc"]
alloc = ["base64ct/alloc"]
std = ["alloc", "base64ct/std"]

[package.metadata.docs.rs]
all-features = true
Expand Down
251 changes: 108 additions & 143 deletions pem-rfc7468/src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,48 @@
//!
//! [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648

use crate::{
grammar, Base64Decoder, Error, Result, BASE64_WRAP_WIDTH, POST_ENCAPSULATION_BOUNDARY,
PRE_ENCAPSULATION_BOUNDARY,
};
use core::str;

#[cfg(feature = "alloc")]
use alloc::vec::Vec;

use crate::{grammar, Error, Result, POST_ENCAPSULATION_BOUNDARY, PRE_ENCAPSULATION_BOUNDARY};
use base64ct::{Base64, Encoding};
use core::str;
#[cfg(feature = "std")]
use std::io;

/// Decode a PEM document according to RFC 7468's "Strict" grammar.
///
/// On success, writes the decoded document into the provided buffer, returning
/// the decoded label and the portion of the provided buffer containing the
/// decoded message.
pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
Decoder::new().decode(pem, buf)
let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
let type_label = decoder.type_label();
let buf = buf.get_mut(..decoder.decoded_len()).ok_or(Error::Length)?;
let decoded = decoder.decode(buf).map_err(|e| check_for_headers(pem, e))?;

if decoder.base64.is_finished() {
Ok((type_label, decoded))
} else {
Err(Error::Length)
}
}

/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
/// the result as a [`Vec`] upon success.
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
Decoder::new().decode_vec(pem)
let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
let type_label = decoder.type_label();
let mut buf = Vec::new();
decoder
.decode_to_end(&mut buf)
.map_err(|e| check_for_headers(pem, e))?;
Ok((type_label, buf))
}

/// Decode the encapsulation boundaries of a PEM document according to RFC 7468's "Strict" grammar.
Expand All @@ -41,64 +61,103 @@ pub fn decode_label(pem: &[u8]) -> Result<&str> {
Ok(Encapsulation::try_from(pem)?.label())
}

/// Check for PEM headers in the input, as they are disallowed by RFC7468.
///
/// Returns `Error::HeaderDisallowed` if headers are encountered.
fn check_for_headers(pem: &[u8], err: Error) -> Error {
if err == Error::Base64(base64ct::Error::InvalidEncoding)
&& pem.iter().any(|&b| b == grammar::CHAR_COLON)
{
Error::HeaderDisallowed
} else {
err
}
}

/// PEM decoder.
///
/// This type provides a degree of configurability for how PEM is decoded.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Decoder {
/// Number of characters at which to line-wrap Base64-encoded data
/// (default `64`).
///
/// Must be a multiple of `4`, or otherwise decoding operations will return
/// `Error::Base64`.
// TODO(tarcieri): support for wrap widths which aren't multiples of 4?
pub wrap_width: usize,
/// Stateful buffered decoder type which decodes an input PEM document according
/// to RFC 7468's "Strict" grammar.
#[derive(Clone)]
pub struct Decoder<'i> {
/// PEM type label.
type_label: &'i str,

/// Buffered Base64 decoder.
base64: Base64Decoder<'i>,
}

impl Decoder {
/// Create a new [`Decoder`] with the default options.
pub fn new() -> Self {
Self::default()
impl<'i> Decoder<'i> {
/// Create a new PEM [`Decoder`] with the default options.
///
/// Uses the default 64-character line wrapping.
pub fn new(pem: &'i [u8]) -> Result<Self> {
Self::new_wrapped(pem, BASE64_WRAP_WIDTH)
}

/// Decode a PEM document according to RFC 7468's "Strict" grammar.
///
/// On success, writes the decoded document into the provided buffer, returning
/// the decoded label and the portion of the provided buffer containing the
/// decoded message.
pub fn decode<'i, 'o>(&self, pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
/// Create a new PEM [`Decoder`] which wraps at the given line width.
pub fn new_wrapped(pem: &'i [u8], line_width: usize) -> Result<Self> {
let encapsulation = Encapsulation::try_from(pem)?;
let label = encapsulation.label();
let decoded_bytes = encapsulation.decode(self, buf)?;
Ok((label, decoded_bytes))
let type_label = encapsulation.label();
let base64 = Base64Decoder::new_wrapped(encapsulation.encapsulated_text, line_width)?;
Ok(Self { type_label, base64 })
}

/// Get the PEM type label for the input document.
pub fn type_label(&self) -> &'i str {
self.type_label
}

/// Decode data into the provided output buffer.
///
/// There must be at least as much remaining Base64 input to be decoded
/// in order to completely fill `buf`.
pub fn decode<'o>(&mut self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
Ok(self.base64.decode(buf)?)
}

/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
/// the result as a [`Vec`] upon success.
/// Decode all of the remaining data in the input buffer into `buf`.
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub fn decode_vec<'a>(&self, pem: &'a [u8]) -> Result<(&'a str, Vec<u8>)> {
let encapsulation = Encapsulation::try_from(pem)?;
let label = encapsulation.label();
pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8]> {
Ok(self.base64.decode_to_end(buf)?)
}

// count all chars (gives over-estimation, due to whitespace)
let max_len = encapsulation.encapsulated_text.len() * 3 / 4;
/// Get the decoded length of the remaining PEM data after Base64 decoding.
pub fn decoded_len(&self) -> usize {
self.base64.decoded_len()
}

let mut result = vec![0u8; max_len];
let decoded_len = encapsulation.decode(self, &mut result)?.len();
/// Are we finished decoding the PEM input?
pub fn is_finished(&self) -> bool {
self.base64.is_finished()
}

// Actual encoded length can be slightly shorter than estimated
// TODO(tarcieri): more reliable length estimation
result.truncate(decoded_len);
Ok((label, result))
/// Convert into the inner [`base64::Decoder`].
pub fn into_base64_decoder(self) -> Base64Decoder<'i> {
self.base64
}
}

impl Default for Decoder {
fn default() -> Self {
Self {
wrap_width: crate::BASE64_WRAP_WIDTH,
}
impl<'i> From<Decoder<'i>> for Base64Decoder<'i> {
fn from(decoder: Decoder<'i>) -> Base64Decoder<'i> {
decoder.base64
}
}

#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
impl<'i> io::Read for Decoder<'i> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.base64.read(buf)
}

fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
self.base64.read_to_end(buf)
}

fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
self.base64.read_exact(buf)
}
}

Expand Down Expand Up @@ -185,51 +244,6 @@ impl<'a> Encapsulation<'a> {
pub fn label(self) -> &'a str {
self.label
}

/// Get an iterator over the (allegedly) Base64-encoded lines of the
/// encapsulated text.
pub fn encapsulated_text(self, wrap_width: usize) -> Result<Lines<'a>> {
if (wrap_width > 0) && (wrap_width % 4 == 0) {
Ok(Lines {
bytes: self.encapsulated_text,
is_start: true,
wrap_width,
})
} else {
Err(Error::Base64)
}
}

/// Decode the "encapsulated text", i.e. Base64-encoded data which lies between
/// the pre/post-encapsulation boundaries.
fn decode<'o>(&self, decoder: &Decoder, buf: &'o mut [u8]) -> Result<&'o [u8]> {
// Ensure wrap width is supported.
if (decoder.wrap_width == 0) || (decoder.wrap_width % 4 != 0) {
return Err(Error::Base64);
}

let mut out_len = 0;

for line in self.encapsulated_text(decoder.wrap_width)? {
let line = line?;

match Base64::decode(line, &mut buf[out_len..]) {
Err(error) => {
// in the case that we are decoding the first line
// and we error, then attribute the error to an unsupported header
// if a colon char is present in the line
if out_len == 0 && line.iter().any(|&b| b == grammar::CHAR_COLON) {
return Err(Error::HeaderDisallowed);
} else {
return Err(error.into());
}
}
Ok(out) => out_len += out.len(),
}
}

Ok(&buf[..out_len])
}
}

impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
Expand All @@ -240,73 +254,24 @@ impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
}
}

/// Iterator over the lines in the encapsulated text.
struct Lines<'a> {
/// Remaining data being iterated over.
bytes: &'a [u8],

/// `true` if no lines have been read.
is_start: bool,

/// Base64 line-wrapping width in bytes.
wrap_width: usize,
}

impl<'a> Iterator for Lines<'a> {
type Item = Result<&'a [u8]>;

fn next(&mut self) -> Option<Self::Item> {
if self.bytes.len() > self.wrap_width {
let (line, rest) = self.bytes.split_at(self.wrap_width);
if let Some(rest) = grammar::strip_leading_eol(rest) {
self.is_start = false;
self.bytes = rest;
Some(Ok(line))
} else {
// if bytes remaining does not split at `wrap_width` such
// that the next char(s) in the rest is vertical whitespace
// then attribute the error generically as `EncapsulatedText`
// unless we are at the first line and the line contains a colon
// then it may be a unsupported header
Some(Err(
if self.is_start && line.iter().any(|&b| b == grammar::CHAR_COLON) {
Error::HeaderDisallowed
} else {
Error::EncapsulatedText
},
))
}
} else if !self.bytes.is_empty() {
let line = self.bytes;
self.bytes = &[];
Some(Ok(line))
} else {
None
}
}
}

#[cfg(test)]
mod tests {
use super::Encapsulation;
use crate::BASE64_WRAP_WIDTH;

#[test]
fn pkcs8_example() {
let pem = include_bytes!("../tests/examples/pkcs8.pem");
let result = Encapsulation::parse(pem).unwrap();
assert_eq!(result.label, "PRIVATE KEY");
let encapsulation = Encapsulation::parse(pem).unwrap();
assert_eq!(encapsulation.label, "PRIVATE KEY");

let mut lines = result.encapsulated_text(BASE64_WRAP_WIDTH).unwrap();
assert_eq!(
lines.next().unwrap().unwrap(),
encapsulation.encapsulated_text,
&[
77, 67, 52, 67, 65, 81, 65, 119, 66, 81, 89, 68, 75, 50, 86, 119, 66, 67, 73, 69,
73, 66, 102, 116, 110, 72, 80, 112, 50, 50, 83, 101, 119, 89, 109, 109, 69, 111,
77, 99, 88, 56, 86, 119, 73, 52, 73, 72, 119, 97, 113, 100, 43, 57, 76, 70, 80,
106, 47, 49, 53, 101, 113, 70
]
);
assert_eq!(lines.next(), None);
}
}
32 changes: 18 additions & 14 deletions pem-rfc7468/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub type Result<T> = core::result::Result<T, Error>;
#[non_exhaustive]
pub enum Error {
/// Base64-related errors.
Base64,
Base64(base64ct::Error),

/// Character encoding-related errors.
CharacterEncoding,
Expand Down Expand Up @@ -39,26 +39,30 @@ pub enum Error {

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
Error::Base64 => "PEM Base64 error",
Error::CharacterEncoding => "PEM character encoding error",
Error::EncapsulatedText => "PEM error in encapsulated text",
Error::HeaderDisallowed => "PEM headers disallowed by RFC7468",
Error::Label => "PEM type label invalid",
Error::Length => "PEM length invalid",
Error::Preamble => "PEM preamble contains invalid data (NUL byte)",
Error::PreEncapsulationBoundary => "PEM error in pre-encapsulation boundary",
Error::PostEncapsulationBoundary => "PEM error in post-encapsulation boundary",
})
match self {
Error::Base64(err) => write!(f, "PEM Base64 error: {}", err),
Error::CharacterEncoding => f.write_str("PEM character encoding error"),
Error::EncapsulatedText => f.write_str("PEM error in encapsulated text"),
Error::HeaderDisallowed => f.write_str("PEM headers disallowed by RFC7468"),
Error::Label => f.write_str("PEM type label invalid"),
Error::Length => f.write_str("PEM length invalid"),
Error::Preamble => f.write_str("PEM preamble contains invalid data (NUL byte)"),
Error::PreEncapsulationBoundary => {
f.write_str("PEM error in pre-encapsulation boundary")
}
Error::PostEncapsulationBoundary => {
f.write_str("PEM error in post-encapsulation boundary")
}
}
}
}

#[cfg(feature = "std")]
impl std::error::Error for Error {}

impl From<base64ct::Error> for Error {
fn from(_: base64ct::Error) -> Error {
Error::Base64
fn from(err: base64ct::Error) -> Error {
Error::Base64(err)
}
}

Expand Down
Loading

0 comments on commit cd23518

Please sign in to comment.