Skip to content

Commit

Permalink
Fix unpacking of filenames with contains UTF-8 characters
Browse files Browse the repository at this point in the history
Change from the C to C.UTF-8 locale, allowing libarchive to handle
filenames in UTF-8. We restrict to change LC_CTYPE only, since
libarchive only needs the charset set.

See on libarchive Website for a more complete description of the issue:

  libarchive/libarchive#587
  https://github.com/libarchive/libarchive/wiki/Filenames

Once we complete the uncompress operation, we restore the original
LC_CTYPE after extraction to avoid side effects.

Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
  • Loading branch information
otavio committed Mar 1, 2021
1 parent dbed3fe commit 233a1fd
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -33,6 +33,7 @@ futures-executor = { version = "0.3.5", optional = true }
blocking = { version = "1.0.0", optional = true }
tokio = { version = "1.0.0", features = ["rt-multi-thread", "macros", "fs", "net"], optional = true }
tokio-util = { version = "0.6.0", features = ["compat"], optional = true }
libc = "0.2.86"

[features]
async_support = ["async-trait", "futures-channel", "futures-core", "futures-io", "futures-util", "futures-executor"]
Expand Down
63 changes: 63 additions & 0 deletions src/ffi/mod.rs
Expand Up @@ -5,3 +5,66 @@
mod generated;

pub(crate) use crate::ffi::generated::*;

pub(crate) unsafe fn archive_read_next_header(
archive: *mut archive,
entry: *mut *mut archive_entry,
) -> ::std::os::raw::c_int {
let old_locale = set_utf8_locale();
let ret = generated::archive_read_next_header(archive, entry);
restore_locale(old_locale);

ret
}

// Change from the C to C.UTF-8 locale, allowing libarchive to
// handle filenames in UTF-8. We restrict to change LC_CTYPE only,
// since libarchive only needs the charset set.
//
// See on libarchive Website for a more complete description
// of the issue:
//
// https://github.com/libarchive/libarchive/issues/587
// https://github.com/libarchive/libarchive/wiki/Filenames
#[cfg(unix)]
unsafe fn set_utf8_locale() -> libc::locale_t {
#[cfg(target_os = "linux")]
let locale = b"C.UTF-8\0";

#[cfg(target_os = "macos")]
let locale = b"UTF-8\0";

let utf8_locale = libc::newlocale(
libc::LC_CTYPE_MASK,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
std::ptr::null_mut(),
);

libc::uselocale(utf8_locale)
}

// Restore the original LC_CTYPE after extraction to avoid side effects.
#[cfg(unix)]
unsafe fn restore_locale(old_locale: libc::locale_t) {
libc::uselocale(old_locale);
}

#[cfg(windows)]
unsafe fn set_utf8_locale() -> std::ffi::CString {
let locale = b".UTF-8\0";

let old_locale =
std::ffi::CStr::from_ptr(libc::setlocale(libc::LC_CTYPE, std::ptr::null())).to_owned();

libc::setlocale(
libc::LC_CTYPE,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
);

old_locale
}

#[cfg(windows)]
unsafe fn restore_locale(old_locale: std::ffi::CString) {
libc::setlocale(libc::LC_CTYPE, old_locale.as_ptr());
}
Binary file added tests/fixtures/utf8.tar
Binary file not shown.
15 changes: 15 additions & 0 deletions tests/integration_test.rs
Expand Up @@ -309,6 +309,21 @@ fn uncompress_to_dir_not_preserve_owner() {
);
}

#[test]
fn uncompress_to_dir_with_utf8_pathname() {
let dir = tempfile::TempDir::new().expect("Failed to create the tmp directory");
let mut source = std::fs::File::open("tests/fixtures/utf8.tar").unwrap();

uncompress_archive(&mut source, dir.path(), Ownership::Ignore)
.expect("Failed to uncompress the file");

assert_eq!(
dir.path().join("utf-8-file-name-őúíá").exists(),
true,
"the path doesn't exist"
);
}

#[test]
fn uncompress_same_file_not_preserve_owner() {
uncompress_archive(
Expand Down

0 comments on commit 233a1fd

Please sign in to comment.