Skip to content

Commit

Permalink
Fix unpacking of filenames with contains UTF-8 characters
Browse files Browse the repository at this point in the history
Change from the C to C.UTF-8 locale, allowing libarchive to handle
filenames in UTF-8. We restrict to change LC_CTYPE only, since
libarchive only needs the charset set.

See on libarchive Website for a more complete description of the issue:

  libarchive/libarchive#587
  https://github.com/libarchive/libarchive/wiki/Filenames

Once we complete the uncompress operation, we restore the original
LC_CTYPE after extraction to avoid side effects.

Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
  • Loading branch information
otavio committed Feb 28, 2021
1 parent d84609b commit 02dabea
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ futures-executor = { version = "0.3.5", optional = true }
blocking = { version = "1.0.0", optional = true }
tokio = { version = "1.0.0", features = ["rt-multi-thread", "macros", "fs", "net"], optional = true }
tokio-util = { version = "0.6.0", features = ["compat"], optional = true }
libc = "0.2.86"

[features]
async_support = ["async-trait", "futures-channel", "futures-core", "futures-io", "futures-util", "futures-executor"]
Expand Down
57 changes: 55 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,11 @@ where
archive_reader,
)?;

f(archive_reader, archive_writer, archive_entry)
let old_locale = set_utf8_locale();
let r = f(archive_reader, archive_writer, archive_entry);
restore_locale(old_locale);

r
})();

archive_result(ffi::archive_read_close(archive_reader), archive_reader)?;
Expand Down Expand Up @@ -421,7 +425,11 @@ where
archive_reader,
)?;

f(archive_reader, archive_writer, archive_entry)
let old_locale = set_utf8_locale();
let r = f(archive_reader, archive_writer, archive_entry);
restore_locale(old_locale);

r
})();

archive_result(ffi::archive_read_close(archive_reader), archive_reader)?;
Expand Down Expand Up @@ -547,3 +555,48 @@ unsafe extern "C" fn libarchive_read_callback(
}
}
}

// Change from the C to C.UTF-8 locale, allowing libarchive to
// handle filenames in UTF-8. We restrict to change LC_CTYPE only,
// since libarchive only needs the charset set.
//
// See on libarchive Website for a more complete description
// of the issue:
//
// https://github.com/libarchive/libarchive/issues/587
// https://github.com/libarchive/libarchive/wiki/Filenames
#[cfg(unix)]
unsafe fn set_utf8_locale() -> libc::locale_t {
#[cfg(target_os = "linux")]
let locale = b"C.UTF-8\0";

#[cfg(target_os = "macos")]
let locale = b"UTF-8\0";

let utf8_locale = libc::newlocale(
libc::LC_CTYPE_MASK,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
std::ptr::null_mut(),
);

libc::uselocale(utf8_locale)
}

// Restore the original LC_CTYPE after extraction to avoid side effects.
#[cfg(unix)]
unsafe fn restore_locale(old_locale: libc::locale_t) {
libc::uselocale(old_locale);
}

#[cfg(windows)]
unsafe fn set_utf8_locale() -> *mut i8 {
let locale = b".UTF-8\0";

libc::setlocale(
libc::LC_CTYPE,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
)
}

#[cfg(windows)]
unsafe fn restore_locale(old_locale: libc::locale_t) {}
Binary file added tests/fixtures/utf8.tar
Binary file not shown.
15 changes: 15 additions & 0 deletions tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,21 @@ fn uncompress_to_dir_not_preserve_owner() {
);
}

#[test]
fn uncompress_to_dir_with_utf8_pathname() {
let dir = tempfile::TempDir::new().expect("Failed to create the tmp directory");
let mut source = std::fs::File::open("tests/fixtures/utf8.tar").unwrap();

uncompress_archive(&mut source, dir.path(), Ownership::Ignore)
.expect("Failed to uncompress the file");

assert_eq!(
dir.path().join("utf-8-file-name-őúíá").exists(),
true,
"the path doesn't exist"
);
}

#[test]
fn uncompress_same_file_not_preserve_owner() {
uncompress_archive(
Expand Down

0 comments on commit 02dabea

Please sign in to comment.