Skip to content

Commit

Permalink
Merge pull request #229 from miam-miam100/main
Browse files Browse the repository at this point in the history
Add support for zstd dictionaries
  • Loading branch information
NobodyXu committed Jul 10, 2023
2 parents 0e2d166 + 2bd88a7 commit 24556b1
Show file tree
Hide file tree
Showing 18 changed files with 521 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0),

## Unreleased

- Add `Zstd{Encoder,Decoder}::with_dict()` constructors.
- Add `zstdmt` crate feature that enables `zstd-safe/zstdmt`, allowing multi-threaded functionality to work as expected.

## 0.4.0 - 2023-05-10
Expand Down
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ required-features = ["zlib"]
name = "zstd"
required-features = ["zstd"]

[[test]]
name = "zstd-dict"
required-features = ["zstd", "tokio"]

[[example]]
name = "zlib_tokio_write"
required-features = ["zlib", "tokio"]
Expand Down
8 changes: 8 additions & 0 deletions src/codec/zstd/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::io;
use std::io::Result;

use crate::{codec::Decode, unshared::Unshared, util::PartialBuffer};
Expand All @@ -14,6 +15,13 @@ impl ZstdDecoder {
decoder: Unshared::new(Decoder::new().unwrap()),
}
}

pub(crate) fn new_with_dict(dictionary: &[u8]) -> io::Result<Self> {
let mut decoder = Decoder::with_dictionary(dictionary)?;
Ok(Self {
decoder: Unshared::new(decoder),
})
}
}

impl Decode for ZstdDecoder {
Expand Down
8 changes: 8 additions & 0 deletions src/codec/zstd/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{codec::Encode, unshared::Unshared, util::PartialBuffer};
use libzstd::stream::raw::{CParameter, Encoder, Operation};
use std::io;
use std::io::Result;

#[derive(Debug)]
Expand All @@ -23,6 +24,13 @@ impl ZstdEncoder {
encoder: Unshared::new(encoder),
}
}

pub(crate) fn new_with_dict(level: i32, dictionary: &[u8]) -> io::Result<Self> {
let mut encoder = Encoder::with_dictionary(level, dictionary)?;
Ok(Self {
encoder: Unshared::new(encoder),
})
}
}

impl Encode for ZstdEncoder {
Expand Down
4 changes: 3 additions & 1 deletion src/futures/bufread/macros/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! decoder {
($(#[$attr:meta])* $name:ident) => {
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -21,6 +21,8 @@ macro_rules! decoder {
}
}

$($($inherent_methods)*)*

/// Configure multi-member/frame decoding, if enabled this will reset the decoder state
/// when reaching the end of a compressed member/frame and expect either EOF or another
/// compressed member/frame to follow it in the stream.
Expand Down
4 changes: 2 additions & 2 deletions src/futures/bufread/macros/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! encoder {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -17,7 +17,7 @@ macro_rules! encoder {
/// Creates a new encoder which will read uncompressed data from the given stream
/// and emit a compressed stream.
///
$($constructor)*
$($inherent_methods)*
)*

/// Acquires a reference to the underlying reader that this encoder is wrapping.
Expand Down
4 changes: 3 additions & 1 deletion src/futures/write/macros/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! decoder {
($(#[$attr:meta])* $name:ident) => {
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -21,6 +21,8 @@ macro_rules! decoder {
}
}

$($($inherent_methods)*)*

/// Acquires a reference to the underlying reader that this decoder is wrapping.
pub fn get_ref(&self) -> &W {
self.inner.get_ref()
Expand Down
4 changes: 2 additions & 2 deletions src/futures/write/macros/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! encoder {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -17,7 +17,7 @@ macro_rules! encoder {
/// Creates a new encoder which will take in uncompressed data and write it
/// compressed to the given stream.
///
$($constructor)*
$($inherent_methods)*
)*

/// Acquires a reference to the underlying writer that this encoder is wrapping.
Expand Down
111 changes: 93 additions & 18 deletions src/macros.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
macro_rules! algos {
(@algo $algo:ident [$algo_s:expr] $decoder:ident $encoder:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
(@algo $algo:ident [$algo_s:expr] $decoder:ident $encoder:ident<$inner:ident>
{ @enc $($encoder_methods:tt)* }
{ @dec $($decoder_methods:tt)* }
) => {
#[cfg(feature = $algo_s)]
decoder! {
#[doc = concat!("A ", $algo_s, " decoder, or decompressor")]
#[cfg(feature = $algo_s)]
$decoder

{ $($decoder_methods)* }
}

#[cfg(feature = $algo_s)]
Expand All @@ -15,12 +20,15 @@ macro_rules! algos {
pub fn new(inner: $inner) -> Self {
Self::with_quality(inner, crate::Level::Default)
}
} $({ $($constructor)* })*
}

{ $($encoder_methods)* }
}
};

($($mod:ident)::+<$inner:ident>) => {
algos!(@algo brotli ["brotli"] BrotliDecoder BrotliEncoder<$inner> {
algos!(@algo brotli ["brotli"] BrotliDecoder BrotliEncoder<$inner>
{ @enc
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
let params = brotli::enc::backward_references::BrotliEncoderParams::default();
Self {
Expand All @@ -30,9 +38,13 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);

algos!(@algo bzip2 ["bzip2"] BzDecoder BzEncoder<$inner>
{ @enc

algos!(@algo bzip2 ["bzip2"] BzDecoder BzEncoder<$inner> {
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand All @@ -41,9 +53,12 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);

algos!(@algo deflate ["deflate"] DeflateDecoder DeflateEncoder<$inner> {
algos!(@algo deflate ["deflate"] DeflateDecoder DeflateEncoder<$inner>
{ @enc
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand All @@ -52,9 +67,13 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);

algos!(@algo gzip ["gzip"] GzipDecoder GzipEncoder<$inner>
{ @enc

algos!(@algo gzip ["gzip"] GzipDecoder GzipEncoder<$inner> {
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand All @@ -63,9 +82,12 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);

algos!(@algo zlib ["zlib"] ZlibDecoder ZlibEncoder<$inner> {
algos!(@algo zlib ["zlib"] ZlibDecoder ZlibEncoder<$inner>
{ @enc
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand All @@ -74,9 +96,13 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);

algos!(@algo zstd ["zstd"] ZstdDecoder ZstdEncoder<$inner>
{ @enc

algos!(@algo zstd ["zstd"] ZstdDecoder ZstdEncoder<$inner> {
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand Down Expand Up @@ -106,9 +132,52 @@ macro_rules! algos {
),
}
}
});

algos!(@algo xz ["xz"] XzDecoder XzEncoder<$inner> {
/// Creates a new encoder, using the specified compression level and pre-trained
/// dictionary, which will read uncompressed data from the given stream and emit a
/// compressed stream.
///
/// Dictionaries provide better compression ratios for small files, but are required to
/// be present during decompression.
///
/// # Errors
///
/// Returns error when `dictionary` is not valid.
pub fn with_dict(inner: $inner, level: crate::Level, dictionary: &[u8]) -> ::std::io::Result<Self> {
Ok(Self {
inner: crate::$($mod::)+generic::Encoder::new(
inner,
crate::codec::ZstdEncoder::new_with_dict(level.into_zstd(), dictionary)?,
),
})
}
}
{ @dec
/// Creates a new decoder, using the specified compression level and pre-trained
/// dictionary, which will read compressed data from the given stream and emit an
/// uncompressed stream.
///
/// Dictionaries provide better compression ratios for small files, but are required to
/// be present during decompression. The dictionary used must be the same as the one
/// used for compression.
///
/// # Errors
///
/// Returns error when `dictionary` is not valid.
pub fn with_dict(inner: $inner, dictionary: &[u8]) -> ::std::io::Result<Self> {
Ok(Self {
inner: crate::$($mod::)+generic::Decoder::new(
inner,
crate::codec::ZstdDecoder::new_with_dict(dictionary)?,
),
})
}
}
);

algos!(@algo xz ["xz"] XzDecoder XzEncoder<$inner>
{ @enc

pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand All @@ -117,9 +186,13 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);

algos!(@algo lzma ["lzma"] LzmaDecoder LzmaEncoder<$inner>
{ @enc

algos!(@algo lzma ["lzma"] LzmaDecoder LzmaEncoder<$inner> {
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
Expand All @@ -128,6 +201,8 @@ macro_rules! algos {
),
}
}
});
}
{ @dec }
);
}
}
4 changes: 3 additions & 1 deletion src/tokio/bufread/macros/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! decoder {
($(#[$attr:meta])* $name:ident) => {
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -21,6 +21,8 @@ macro_rules! decoder {
}
}

$($($inherent_methods)*)*

/// Configure multi-member/frame decoding, if enabled this will reset the decoder state
/// when reaching the end of a compressed member/frame and expect either EOF or another
/// compressed member/frame to follow it in the stream.
Expand Down
4 changes: 2 additions & 2 deletions src/tokio/bufread/macros/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! encoder {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -17,7 +17,7 @@ macro_rules! encoder {
/// Creates a new encoder which will read uncompressed data from the given stream
/// and emit a compressed stream.
///
$($constructor)*
$($inherent_methods)*
)*

/// Acquires a reference to the underlying reader that this encoder is wrapping.
Expand Down
4 changes: 3 additions & 1 deletion src/tokio/write/macros/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! decoder {
($(#[$attr:meta])* $name:ident) => {
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -21,6 +21,8 @@ macro_rules! decoder {
}
}

$($($inherent_methods)*)*

/// Acquires a reference to the underlying reader that this decoder is wrapping.
pub fn get_ref(&self) -> &W {
self.inner.get_ref()
Expand Down
4 changes: 2 additions & 2 deletions src/tokio/write/macros/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
macro_rules! encoder {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
pin_project_lite::pin_project! {
$(#[$attr])*
///
Expand All @@ -17,7 +17,7 @@ macro_rules! encoder {
/// Creates a new encoder which will take in uncompressed data and write it
/// compressed to the given stream.
///
$($constructor)*
$($inherent_methods)*
)*

/// Acquires a reference to the underlying writer that this encoder is wrapping.
Expand Down
Binary file added tests/artifacts/dictionary-rust
Binary file not shown.
Binary file added tests/artifacts/dictionary-rust-other
Binary file not shown.

0 comments on commit 24556b1

Please sign in to comment.