Skip to content

Commit

Permalink
fs: unicode: Make UTF-8 encoding loadable
Browse files Browse the repository at this point in the history
utf8data.h_shipped has a large database table which is an auto-generated
decodification trie for the unicode normalization functions.
It is not necessary to carry this large table in the kernel hence make
UTF-8 encoding loadable by converting it into a module.
Also, modify the file called unicode-core which will act as a layer for
unicode subsystem. It will load the UTF-8 module and access it's functions
whenever any filesystem that needs unicode is mounted.

Signed-off-by: Shreeya Patel <shreeya.patel@collabora.com>
  • Loading branch information
Shreeya Patel authored and intel-lab-lkp committed Mar 13, 2021
1 parent f5d42a6 commit 85f4765
Show file tree
Hide file tree
Showing 6 changed files with 427 additions and 164 deletions.
7 changes: 6 additions & 1 deletion fs/unicode/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ config UNICODE
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
support.

config UNICODE_UTF8
tristate "UTF-8 module"
depends on UNICODE
default m

config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
depends on UNICODE
depends on UNICODE_UTF8
default n
5 changes: 4 additions & 1 deletion fs/unicode/Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# SPDX-License-Identifier: GPL-2.0

obj-$(CONFIG_UNICODE) += unicode.o
obj-$(CONFIG_UNICODE_UTF8) += utf8.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o

unicode-y := utf8-norm.o unicode-core.o
unicode-y := unicode-core.o
utf8-y := utf8mod.o utf8-norm.o

$(obj)/utf8-norm.o: $(obj)/utf8data.h
$(obj)/utf8mod.o: $(obj)/utf8-norm.o

# In the normal build, the checked-in utf8data.h is just shipped.
#
Expand Down
201 changes: 39 additions & 162 deletions fs/unicode/unicode-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,70 +7,29 @@
#include <linux/errno.h>
#include <linux/unicode.h>
#include <linux/stringhash.h>
#include <linux/delay.h>

#include "utf8n.h"
struct unicode_ops *utf8_ops;

static int unicode_load_module(void);

int unicode_validate(const struct unicode_map *um, const struct qstr *str)
{
const struct utf8data *data = utf8nfdi(um->version);

if (utf8nlen(data, str->name, str->len) < 0)
return -1;
return 0;
return utf8_ops->validate(um, str);
}
EXPORT_SYMBOL(unicode_validate);

int unicode_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;

if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;

if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
return -EINVAL;

do {
c1 = utf8byte(&cur1);
c2 = utf8byte(&cur2);

if (c1 < 0 || c2 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);

return 0;
return utf8_ops->strncmp(um, s1, s2);
}
EXPORT_SYMBOL(unicode_strncmp);

int unicode_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;

if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;

if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
return -EINVAL;

do {
c1 = utf8byte(&cur1);
c2 = utf8byte(&cur2);

if (c1 < 0 || c2 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);

return 0;
return utf8_ops->strncasecmp(um, s1, s2);
}
EXPORT_SYMBOL(unicode_strncasecmp);

Expand All @@ -81,155 +40,73 @@ int unicode_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1;
int c1, c2;
int i = 0;

if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;

do {
c1 = utf8byte(&cur1);
c2 = cf->name[i++];
if (c1 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);

return 0;
return utf8_ops->strncasecmp_folded(um, cf, s1);
}
EXPORT_SYMBOL(unicode_strncasecmp_folded);

int unicode_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
size_t nlen = 0;

if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;

for (nlen = 0; nlen < dlen; nlen++) {
int c = utf8byte(&cur);

dest[nlen] = c;
if (!c)
return nlen;
if (c == -1)
break;
}
return -EINVAL;
return utf8_ops->casefold(um, str, dest, dlen);
}
EXPORT_SYMBOL(unicode_casefold);

int unicode_casefold_hash(const struct unicode_map *um, const void *salt,
struct qstr *str)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
int c;
unsigned long hash = init_name_hash(salt);

if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;

while ((c = utf8byte(&cur))) {
if (c < 0)
return -EINVAL;
hash = partial_name_hash((unsigned char)c, hash);
}
str->hash = end_name_hash(hash);
return 0;
return utf8_ops->casefold_hash(um, salt, str);
}
EXPORT_SYMBOL(unicode_casefold_hash);

int unicode_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur;
ssize_t nlen = 0;
return utf8_ops->normalize(um, str, dest, dlen);
}
EXPORT_SYMBOL(unicode_normalize);

if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;
struct unicode_map *unicode_load(const char *version)
{
int ret = unicode_load_module();

for (nlen = 0; nlen < dlen; nlen++) {
int c = utf8byte(&cur);
if (ret)
return ERR_PTR(ret);

dest[nlen] = c;
if (!c)
return nlen;
if (c == -1)
break;
}
return -EINVAL;
else
return utf8_ops->load(version);
}
EXPORT_SYMBOL(unicode_normalize);
EXPORT_SYMBOL(unicode_load);

static int unicode_parse_version(const char *version, unsigned int *maj,
unsigned int *min, unsigned int *rev)
void unicode_unload(struct unicode_map *um)
{
substring_t args[3];
char version_string[12];
static const struct match_token token[] = {
{1, "%d.%d.%d"},
{0, NULL}
};
kfree(um);
}
EXPORT_SYMBOL(unicode_unload);

strncpy(version_string, version, sizeof(version_string));
static int unicode_load_module(void)
{
int ret = request_module("utf8");

if (match_token(version_string, token, args) != 1)
return -EINVAL;
msleep(100);

if (match_int(&args[0], maj) || match_int(&args[1], min) ||
match_int(&args[2], rev))
return -EINVAL;
if (ret) {
pr_err("Failed to load UTF-8 module\n");
return ret;
}

return 0;
}

struct unicode_map *unicode_load(const char *version)
void unicode_register(struct unicode_ops *ops)
{
struct unicode_map *um = NULL;
int unicode_version;

if (version) {
unsigned int maj, min, rev;

if (unicode_parse_version(version, &maj, &min, &rev) < 0)
return ERR_PTR(-EINVAL);

if (!utf8version_is_supported(maj, min, rev))
return ERR_PTR(-EINVAL);

unicode_version = UNICODE_AGE(maj, min, rev);
} else {
unicode_version = utf8version_latest();
printk(KERN_WARNING"UTF-8 version not specified. "
"Assuming latest supported version (%d.%d.%d).",
(unicode_version >> 16) & 0xff,
(unicode_version >> 8) & 0xff,
(unicode_version & 0xff));
}

um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
if (!um)
return ERR_PTR(-ENOMEM);

um->charset = "UTF-8";
um->version = unicode_version;

return um;
utf8_ops = ops;
}
EXPORT_SYMBOL(unicode_load);
EXPORT_SYMBOL(unicode_register);

void unicode_unload(struct unicode_map *um)
void unicode_unregister(void)
{
kfree(um);
utf8_ops = NULL;
}
EXPORT_SYMBOL(unicode_unload);
EXPORT_SYMBOL(unicode_unregister);

MODULE_LICENSE("GPL v2");
Loading

0 comments on commit 85f4765

Please sign in to comment.