Skip to content

Commit

Permalink
Merge pull request #338 from johnd0e/update-luautf8
Browse files Browse the repository at this point in the history
LuaFAR: update luautf8 library to 0.1.3-2-g5627e17
  • Loading branch information
alabuzhev committed Jan 20, 2021
2 parents e878bac + ca3dc4d commit 6c8f15a
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 145 deletions.
2 changes: 1 addition & 1 deletion plugins/luamacro/_globalinfo.lua
@@ -1,6 +1,6 @@
function export.GetGlobalInfo()
return {
Version = { 3, 0, 0, 760 },
Version = { 3, 0, 0, 761 },
MinFarVersion = { 3, 0, 0, 5171 },
Guid = win.Uuid("4EBBEFC8-2084-4B7F-94C0-692CE136894D"),
Title = "LuaMacro",
Expand Down
4 changes: 4 additions & 0 deletions plugins/luamacro/changelog
@@ -1,3 +1,7 @@
johnd0e 15.01.2021 10:12:00 +0200 - build 761

1. LuaFAR: update luautf8 library to 0.1.3-2-g5627e17.

shmuel 09.09.2020 21:29:21 +0300 - build 760

1. LuaFAR: continued 751 (minor changes).
Expand Down
75 changes: 38 additions & 37 deletions plugins/luamacro/luafar/lutf8lib.c
@@ -1,4 +1,4 @@
/* vim: set ft=c nu et sw=2 fdc=2 fdm=syntax : */
/* vim: set ft=c nu et sw=2 fdc=2 fdm=syntax : */
#define LUA_LIB
#include <lua.h>
#include <lauxlib.h>
Expand All @@ -8,15 +8,14 @@
#include <assert.h>
#include <string.h>

#include "unidata.h"

/* UTF-8 string operations */

typedef unsigned int utfint;

#define UTF8_BUFFSZ 8
#define UTF8_MAX 0x7FFFFFFFu
#define UTF8_MAXCP 0x10FFFFu
#define iscont(p) ((*(p) & 0xC0) == 0x80)
#define UTF8_BUFFSZ 8
#define UTF8_MAX 0x7FFFFFFFu
#define UTF8_MAXCP 0x10FFFFu
#define iscont(p) ((*(p) & 0xC0) == 0x80)
#define CAST(tp,expr) ((tp)(expr))

#ifndef LUA_QL
Expand All @@ -43,7 +42,7 @@ static size_t utf8_encode (char *buff, utfint x) {
return n;
}

static const char *utf8_decode (const char *s, utfint *val) {
static const char *utf8_decode (const char *s, utfint *val, int strict) {
static const utfint limits[] =
{~0u, 0x80u, 0x800u, 0x10000u, 0x200000u, 0x4000000u};
unsigned int c = (unsigned char)s[0];
Expand All @@ -63,6 +62,11 @@ static const char *utf8_decode (const char *s, utfint *val) {
return NULL; /* invalid byte sequence */
s += count; /* skip continuation bytes read */
}
if (strict) {
/* check for invalid code points; too large or surrogates */
if (res > UTF8_MAXCP || (0xD800u <= res && res <= 0xDFFFu))
return NULL;
}
if (val) *val = res;
return s + 1; /* +1 to include first byte */
}
Expand Down Expand Up @@ -114,8 +118,6 @@ static int utf8_range(const char *s, const char *e, lua_Integer *i, lua_Integer

/* Unicode character categories */

#include "unidata.h"

#define table_size(t) (sizeof(t)/sizeof((t)[0]))

#define utf8_categories(X) \
Expand Down Expand Up @@ -235,7 +237,7 @@ static const char *to_utf8 (lua_State *L, int idx, const char **end) {
}

static const char *utf8_safe_decode (lua_State *L, const char *p, utfint *pval) {
p = utf8_decode(p, pval);
p = utf8_decode(p, pval, 0);
if (p == NULL) luaL_error(L, "invalid UTF-8 code");
return p;
}
Expand Down Expand Up @@ -263,13 +265,11 @@ static int Lutf8_len (lua_State *L) {
luaL_argcheck(L, --pose < (lua_Integer)len, 3,
"final position out of string");
for (n = 0, p=s+posi, e=s+pose+1; p < e; ++n) {
if ((*p & 0xFF) < 0xC0)
++p;
else if (lax)
if (lax)
p = utf8_next(p, e);
else {
utfint ch;
const char *np = utf8_decode(p, &ch);
const char *np = utf8_decode(p, &ch, !lax);
if (np == NULL || utf8_invalid(ch)) {
lua_pushnil(L);
lua_pushinteger(L, p - s + 1);
Expand Down Expand Up @@ -305,8 +305,8 @@ static int Lutf8_reverse (lua_State *L) {
luaL_addlstring(&b, prev, e-prev);
}
} else {
utfint code = 0;
for (prev = e; s < prev; prev = pprev) {
utfint code = 0;
ends = utf8_safe_decode(L, pprev = utf8_prev(s, prev), &code);
assert(ends == prev);
if (utf8_invalid(code))
Expand All @@ -327,8 +327,8 @@ static int Lutf8_byte (lua_State *L) {
lua_Integer posi = luaL_optinteger(L, 2, 1);
lua_Integer pose = luaL_optinteger(L, 3, posi);
if (utf8_range(s, e, &posi, &pose)) {
utfint ch = 0;
for (e = s + pose, s = s + posi; s < e; ++n) {
utfint ch = 0;
s = utf8_safe_decode(L, s, &ch);
lua_pushinteger(L, ch);
}
Expand All @@ -344,7 +344,6 @@ static int Lutf8_codepoint (lua_State *L) {
int lax = lua_toboolean(L, 4);
int n;
const char *se;
utfint code = 0;
luaL_argcheck(L, posi >= 1, 2, "out of range");
luaL_argcheck(L, pose <= (lua_Integer)len, 3, "out of range");
if (posi > pose) return 0; /* empty interval; return no values */
Expand All @@ -355,6 +354,7 @@ static int Lutf8_codepoint (lua_State *L) {
n = 0; /* count the number of returns */
se = s + pose; /* string end */
for (n = 0, s += posi - 1; s < se;) {
utfint code = 0;
s = utf8_safe_decode(L, s, &code);
if (!lax && utf8_invalid(code))
return luaL_error(L, "invalid UTF-8 code");
Expand Down Expand Up @@ -383,11 +383,11 @@ static int Lutf8_##name (lua_State *L) { \
if (t == LUA_TNUMBER) \
lua_pushinteger(L, utf8_to##name(CAST(utfint, lua_tointeger(L, 1)))); \
else if (t == LUA_TSTRING) { \
utfint ch = 0; \
luaL_Buffer b; \
const char *e, *s = to_utf8(L, 1, &e); \
luaL_buffinit(L, &b); \
while (s < e) { \
utfint ch = 0; \
s = utf8_safe_decode(L, s, &ch); \
add_utf8char(&b, utf8_to##name(ch)); \
} \
Expand Down Expand Up @@ -423,10 +423,10 @@ static const char *parse_escape (lua_State *L, const char *s, const char *e, int

static int Lutf8_escape (lua_State *L) {
const char *e, *s = check_utf8(L, 1, &e);
utfint ch = 0;
luaL_Buffer b;
luaL_buffinit(L, &b);
while (s < e) {
utfint ch = 0;
s = utf8_safe_decode(L, s, &ch);
if (ch == '%') {
int hex = 0;
Expand All @@ -435,9 +435,9 @@ static int Lutf8_escape (lua_State *L) {
case '4': case '5': case '6': case '7':
case '8': case '9': case '{':
break;
case 'x': case 'X': hex = 1; /* FALLTHOUGH */
case 'x': case 'X': hex = 1; /* fall through */
case 'u': case 'U': if (s+1 < e) { ++s; break; }
/* FALLTHOUGH */
/* fall through */
default:
s = utf8_safe_decode(L, s, &ch);
goto next;
Expand Down Expand Up @@ -475,7 +475,7 @@ static int Lutf8_insert (lua_State *L) {

static int Lutf8_remove (lua_State *L) {
const char *e, *s = check_utf8(L, 1, &e);
lua_Integer posi = luaL_checkinteger(L, 2);
lua_Integer posi = luaL_optinteger(L, 2, -1);
lua_Integer pose = luaL_optinteger(L, 3, -1);
if (!utf8_range(s, e, &posi, &pose))
lua_settop(L, 1);
Expand All @@ -497,7 +497,7 @@ static int push_offset (lua_State *L, const char *s, const char *e, lua_Integer
else if (p = s+offset-1, iscont(p))
p = utf8_prev(s, p);
if (p == NULL || p == e) return 0;
utf8_decode(p, &ch);
utf8_decode(p, &ch, 0);
lua_pushinteger(L, p-s+1);
lua_pushinteger(L, ch);
return 2;
Expand Down Expand Up @@ -603,8 +603,8 @@ static int Lutf8_width (lua_State *L) {
else {
const char *e, *s = to_utf8(L, 1, &e);
int width = 0;
utfint ch = 0;
while (s < e) {
utfint ch = 0;
int chwidth;
s = utf8_safe_decode(L, s, &ch);
chwidth = utf8_width(ch, ambi_is_single);
Expand All @@ -621,8 +621,8 @@ static int Lutf8_widthindex (lua_State *L) {
int ambi_is_single = !lua_toboolean(L, 3);
int default_width = CAST(int, luaL_optinteger(L, 4, 0));
size_t idx = 1;
utfint ch = 0;
while (s < e) {
utfint ch = 0;
size_t chwidth;
s = utf8_safe_decode(L, s, &ch);
chwidth = utf8_width(ch, ambi_is_single);
Expand Down Expand Up @@ -693,11 +693,11 @@ static const char *match (MatchState *ms, const char *s, const char *p);

/* maximum recursion depth for 'match' */
#if !defined(MAXCCALLS)
#define MAXCCALLS 200
#define MAXCCALLS 200
#endif

#define L_ESC '%'
#define SPECIALS "^$*+?.([%-"
#define L_ESC '%'
#define SPECIALS "^$*+?.([%-"

static int check_capture (MatchState *ms, int l) {
l -= '1';
Expand Down Expand Up @@ -754,20 +754,20 @@ static int match_class (utfint c, utfint cl) {

static int matchbracketclass (MatchState *ms, utfint c, const char *p, const char *ec) {
int sig = 1;
utfint ch = 0;
utfint next = 0;
assert(*p == '[');
if (*++p == '^') {
sig = 0;
p++; /* skip the `^' */
}
while (p < ec) {
utfint ch = 0;
p = utf8_safe_decode(ms->L, p, &ch);
if (ch == L_ESC) {
p = utf8_safe_decode(ms->L, p, &ch);
if (match_class(c, ch))
return sig;
} else {
utfint next = 0;
const char *np = utf8_safe_decode(ms->L, p, &next);
if (next == '-' && np < ec) {
p = utf8_safe_decode(ms->L, np, &next);
Expand Down Expand Up @@ -919,9 +919,9 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
LUA_QL("%%f") " in pattern");
ep = classend(ms, p); /* points to what is next */
if (s != ms->src_init)
utf8_decode(utf8_prev(ms->src_init, s), &previous);
utf8_decode(utf8_prev(ms->src_init, s), &previous, 0);
if (s != ms->src_end)
utf8_decode(s, &current);
utf8_decode(s, &current, 0);
if (!matchbracketclass(ms, previous, p, ep - 1) &&
matchbracketclass(ms, current, p, ep - 1)) {
p = ep; goto init; /* return match(ms, s, ep); */
Expand Down Expand Up @@ -963,7 +963,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
}
case '+': /* 1 or more repetitions */
s = next_s; /* 1 match already done */
/* go through */
/* fall through */
case '*': /* 0 or more repetitions */
s = max_expand(ms, s, p, ep);
break;
Expand Down Expand Up @@ -1053,6 +1053,7 @@ static int find_aux (lua_State *L, int find) {
const char *ep, *p = check_utf8(L, 2, &ep);
lua_Integer idx = luaL_optinteger(L, 3, 1);
const char *init;
if (!idx) idx = 1;
init = utf8_relat(s, es, CAST(int, idx));
if (init == NULL) {
if (idx > 0) {
Expand Down Expand Up @@ -1145,8 +1146,8 @@ static int Lutf8_gmatch (lua_State *L) {

static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, const char *e) {
const char *new_end, *news = to_utf8(ms->L, 3, &new_end);
utfint ch = 0;
while (news < new_end) {
utfint ch = 0;
news = utf8_safe_decode(ms->L, news, &ch);
if (ch != L_ESC)
add_utf8char(b, ch);
Expand Down Expand Up @@ -1204,7 +1205,6 @@ static int Lutf8_gsub (lua_State *L) {
lua_Integer n = 0;
MatchState ms;
luaL_Buffer b;
utfint ch = 0;
luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
"string/function/table expected");
Expand All @@ -1227,6 +1227,7 @@ static int Lutf8_gsub (lua_State *L) {
if (e && e > s) /* non empty match? */
s = e; /* skip it */
else if (s < es) {
utfint ch = 0;
s = utf8_safe_decode(L, s, &ch);
add_utf8char(&b, ch);
} else break;
Expand Down Expand Up @@ -1292,7 +1293,7 @@ LUALIB_API int luaopen_utf8 (lua_State *L) {
}

/* win32cc: flags+='-Wall -Wextra -s -O2 -mdll -DLUA_BUILD_AS_DLL'
* win32cc: libs+='-llua53.dll' output='lua-utf8.dll'
* win32cc: libs+='-llua54.dll' output='lua-utf8.dll'
* win32cc: run='lua.exe test.lua'
* maccc: run='lua -- test_compat.lua'
* maccc: flags+='-g --coverage -bundle -undefined dynamic_lookup' output='lua-utf8.so' */
Expand Down

0 comments on commit 6c8f15a

Please sign in to comment.