Skip to content

Commit

Permalink
Fix unpacking of filenames with contains UTF-8 characters
Browse files Browse the repository at this point in the history
Change from the C to C.UTF-8 locale, allowing libarchive to handle
filenames in UTF-8. We restrict to change LC_CTYPE only, since
libarchive only needs the charset set.

See on libarchive Website for a more complete description of the issue:

  libarchive/libarchive#587
  https://github.com/libarchive/libarchive/wiki/Filenames

Once we complete the uncompress operation, we restore the original
LC_CTYPE after extraction to avoid side effects.

Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
  • Loading branch information
otavio committed Feb 28, 2021
1 parent 2d62dee commit 6434866
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ futures-executor = { version = "0.3.5", optional = true }
blocking = { version = "1.0.0", optional = true }
tokio = { version = "1.0.0", features = ["rt-multi-thread", "macros", "fs", "net"], optional = true }
tokio-util = { version = "0.6.0", features = ["compat"], optional = true }
libc = "0.2.86"

[features]
async_support = ["async-trait", "futures-channel", "futures-core", "futures-io", "futures-util", "futures-executor"]
Expand Down
57 changes: 55 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,11 @@ where
archive_reader,
)?;

f(archive_reader, archive_writer, archive_entry)
let old_locale = set_utf8_locale();
let r = f(archive_reader, archive_writer, archive_entry);
restore_locale(old_locale);

r
})();

archive_result(ffi::archive_read_close(archive_reader), archive_reader)?;
Expand Down Expand Up @@ -421,7 +425,11 @@ where
archive_reader,
)?;

f(archive_reader, archive_writer, archive_entry)
let old_locale = set_utf8_locale();
let r = f(archive_reader, archive_writer, archive_entry);
restore_locale(old_locale);

r
})();

archive_result(ffi::archive_read_close(archive_reader), archive_reader)?;
Expand Down Expand Up @@ -547,3 +555,48 @@ unsafe extern "C" fn libarchive_read_callback(
}
}
}

// Change from the C to C.UTF-8 locale, allowing libarchive to
// handle filenames in UTF-8. We restrict to change LC_CTYPE only,
// since libarchive only needs the charset set.
//
// See on libarchive Website for a more complete description
// of the issue:
//
// https://github.com/libarchive/libarchive/issues/587
// https://github.com/libarchive/libarchive/wiki/Filenames
#[cfg(unix)]
unsafe fn set_utf8_locale() -> libc::locale_t {
#[cfg(target_os = "linux")]
let locale = b"C.UTF-8\0";

#[cfg(target_os = "macos")]
let locale = b"UTF-8\0";

let utf8_locale = libc::newlocale(
libc::LC_CTYPE_MASK,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
std::ptr::null_mut(),
);

libc::uselocale(utf8_locale)
}

// Restore the original LC_CTYPE after extraction to avoid side effects.
#[cfg(unix)]
unsafe fn restore_locale(old_locale: libc::locale_t) {
libc::uselocale(old_locale);
}

#[cfg(windows)]
unsafe fn set_utf8_locale() -> *mut char {
let locale = b".UTF-8\0";

libc::setlocale(
libc::LC_CTYPE,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
)
}

#[cfg(windows)]
unsafe fn restore_locale(old_locale: libc::locale_t) {}

0 comments on commit 6434866

Please sign in to comment.