Skip to content

Commit

Permalink
ntfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option
Browse files Browse the repository at this point in the history
NLS table for utf8 is broken and cannot be fixed.

So instead of broken utf8 nls functions char2uni() and uni2char() use
functions utf8s_to_utf16s() and utf16s_to_utf8s() which implements correct
conversion between UTF-16 and UTF-8.

These functions implements also correct processing of UTF-16 surrogate
pairs and therefore after this change ntfs driver would be able to correctly
handle also file names with 4-byte UTF-8 sequences.

When iochatset=utf8 is used then set vol->nls_map to NULL and use it for
distinguish between the fact if NLS table or native UTF-8 functions should
be used.

Signed-off-by: Pali Rohár <pali@kernel.org>
  • Loading branch information
pali authored and intel-lab-lkp committed Aug 8, 2021
1 parent fb17de5 commit b4774ff
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 23 deletions.
6 changes: 4 additions & 2 deletions fs/ntfs/dir.c
Expand Up @@ -1034,7 +1034,8 @@ static inline int ntfs_filldir(ntfs_volume *vol,
}
name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
ie->key.file_name.file_name_length, &name,
NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
NTFS_MAX_NAME_LEN *
(vol->nls_map ? NLS_MAX_CHARSET_SIZE : 4) + 1);
if (name_len <= 0) {
ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
(long long)MREF_LE(ie->data.dir.indexed_file));
Expand Down Expand Up @@ -1118,7 +1119,8 @@ static int ntfs_readdir(struct file *file, struct dir_context *actor)
* Allocate a buffer to store the current name being processed
* converted to format determined by current NLS.
*/
name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
name = kmalloc(NTFS_MAX_NAME_LEN *
(vol->nls_map ? NLS_MAX_CHARSET_SIZE : 4) + 1, GFP_NOFS);
if (unlikely(!name)) {
err = -ENOMEM;
goto err_out;
Expand Down
5 changes: 4 additions & 1 deletion fs/ntfs/inode.c
Expand Up @@ -2303,7 +2303,10 @@ int ntfs_show_options(struct seq_file *sf, struct dentry *root)
seq_printf(sf, ",fmask=0%o", vol->fmask);
seq_printf(sf, ",dmask=0%o", vol->dmask);
}
seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
if (vol->nls_map)
seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
else
seq_puts(sf, ",iocharset=utf8");
if (NVolCaseSensitive(vol))
seq_printf(sf, ",case_sensitive");
if (NVolShowSystemFiles(vol))
Expand Down
41 changes: 24 additions & 17 deletions fs/ntfs/super.c
Expand Up @@ -84,7 +84,7 @@ static int simple_getbool(char *s, bool *setval)
*
* Parse the recognized options in @opt for the ntfs volume described by @vol.
*/
static bool parse_options(ntfs_volume *vol, char *opt)
static bool parse_options(ntfs_volume *vol, char *opt, int remount)
{
char *p, *v, *ov;
static char *utf8 = "utf8";
Expand All @@ -95,6 +95,7 @@ static bool parse_options(ntfs_volume *vol, char *opt)
int mft_zone_multiplier = -1, on_errors = -1;
int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
struct nls_table *nls_map = NULL;
int have_iocharset = 0;

/* I am lazy... (-8 */
#define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value) \
Expand Down Expand Up @@ -196,12 +197,16 @@ static bool parse_options(ntfs_volume *vol, char *opt)
goto needs_arg;
use_utf8:
unload_nls(nls_map);
nls_map = load_nls(v);
if (!nls_map) {
ntfs_error(vol->sb, "NLS character set "
"%s not found.", v);
return false;
nls_map = NULL;
if (strcmp(v, "utf8") != 0) {
nls_map = load_nls(v);
if (!nls_map) {
ntfs_error(vol->sb, "NLS character set "
"%s not found.", v);
return false;
}
}
have_iocharset = 1;
} else if (!strcmp(p, "utf8")) {
bool val = false;
ntfs_warning(vol->sb, "Option utf8 is no longer "
Expand Down Expand Up @@ -241,25 +246,27 @@ static bool parse_options(ntfs_volume *vol, char *opt)
return false;
}
}
if (nls_map) {
if (vol->nls_map && vol->nls_map != nls_map) {
if (have_iocharset) {
if (remount && vol->nls_map != nls_map) {
ntfs_error(vol->sb, "Cannot change NLS character set "
"on remount.");
return false;
} /* else (!vol->nls_map) */
ntfs_debug("Using NLS character set %s.", nls_map->charset);
vol->nls_map = nls_map;
} else /* (!nls_map) */ {
if (!vol->nls_map) {
} else (!remount) {
ntfs_debug("Using NLS character set %s.",
nls_map ? nls_map->charset : "utf8");
vol->nls_map = nls_map;
}
} else if (!remount) {
if (strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0) {
vol->nls_map = load_nls_default();
if (!vol->nls_map) {
ntfs_error(vol->sb, "Failed to load default "
"NLS character set.");
return false;
}
ntfs_debug("Using default NLS character set (%s).",
vol->nls_map->charset);
}
ntfs_debug("Using default NLS character set (%s).",
vol->nls_map ? vol->nls_map->charset : "utf8");
}
if (mft_zone_multiplier != -1) {
if (vol->mft_zone_multiplier && vol->mft_zone_multiplier !=
Expand Down Expand Up @@ -534,7 +541,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)

// TODO: Deal with *flags.

if (!parse_options(vol, opt))
if (!parse_options(vol, opt, 1))
return -EINVAL;

ntfs_debug("Done.");
Expand Down Expand Up @@ -2731,7 +2738,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
NVolSetSparseEnabled(vol);

/* Important to get the mount options dealt with now. */
if (!parse_options(vol, (char*)opt))
if (!parse_options(vol, (char*)opt, 0))
goto err_out_now;

/* We support sector sizes up to the PAGE_SIZE. */
Expand Down
27 changes: 24 additions & 3 deletions fs/ntfs/unistr.c
Expand Up @@ -254,6 +254,16 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
if (likely(ins)) {
ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
if (likely(ucs)) {
if (!nls) {
wc_len = utf8s_to_utf16s(ins, ins_len,
UTF16_LITTLE_ENDIAN, ucs,
NTFS_MAX_NAME_LEN);
if (wc_len < 0 || wc_len >= NTFS_MAX_NAME_LEN)
goto name_err;
ucs[wc_len] = 0;
*outs = ucs;
return o;
}
for (i = o = 0; i < ins_len; i += wc_len) {
wc_len = nls->char2uni(ins + i, ins_len - i,
&wc);
Expand Down Expand Up @@ -283,7 +293,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
if (wc_len < 0) {
ntfs_error(vol->sb, "Name using character set %s contains "
"characters that cannot be converted to "
"Unicode.", nls->charset);
"Unicode.", nls ? nls->charset : "utf8");
i = -EILSEQ;
} else /* if (o >= NTFS_MAX_NAME_LEN) */ {
ntfs_error(vol->sb, "Name is too long (maximum length for a "
Expand Down Expand Up @@ -335,11 +345,22 @@ int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
goto conversion_err;
}
if (!ns) {
ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
ns_len = ins_len * (nls ? NLS_MAX_CHARSET_SIZE : 4);
ns = kmalloc(ns_len + 1, GFP_NOFS);
if (!ns)
goto mem_err_out;
}
if (!nls) {
o = utf16s_to_utf8s(ins, ins_len, UTF16_LITTLE_ENDIAN,
ns, ns_len);
if (o >= ns_len) {
wc = -ENAMETOOLONG;
goto conversion_err;
}
ns[o] = 0;
*outs = ns;
return o;
}
for (i = o = 0; i < ins_len; i++) {
retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
ns_len - o);
Expand Down Expand Up @@ -373,7 +394,7 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
"converted to character set %s. You might want to "
"try to use the mount option iocharset=utf8.",
nls->charset);
nls ? nls->charset : "utf8");
if (ns != *outs)
kfree(ns);
if (wc != -ENAMETOOLONG)
Expand Down

0 comments on commit b4774ff

Please sign in to comment.