From 3b3f3ac8769855b076dad15b467fcb7394f12f76 Mon Sep 17 00:00:00 2001 From: Georgiy Lebedev Date: Tue, 25 Apr 2023 15:12:54 +0300 Subject: [PATCH] box: support square brackets in procedure resolution for Lua calls Add a lexer for parsing parts of Lua procedure call: basically, it's copy-paste of JSON path lexer accounting for differences between JSON paths and Lua procedure calls. Closes #8604 @TarantoolBot document Title: square brackets in procedure resolution for Lua calls Square brackets are now supported in Lua call procedure resolution. This is applicable to `net.box` connection objects `call` method as well as `box.schema.func.call`. Examples of function calls with square brackets can be found in the test to this patch. --- ...s-in-procedure-resolution-for-lua-calls.md | 4 + src/box/identifier.c | 31 +- src/box/identifier.h | 7 + src/box/lua/call.c | 273 +++++++++++++++--- ...rocedure_resolution_for_lua_calls_test.lua | 210 ++++++++++++++ 5 files changed, 467 insertions(+), 58 deletions(-) create mode 100644 changelogs/unreleased/gh-8604-support-square-brackets-in-procedure-resolution-for-lua-calls.md create mode 100644 test/box-luatest/gh_8604_support_square_brackets_in_procedure_resolution_for_lua_calls_test.lua diff --git a/changelogs/unreleased/gh-8604-support-square-brackets-in-procedure-resolution-for-lua-calls.md b/changelogs/unreleased/gh-8604-support-square-brackets-in-procedure-resolution-for-lua-calls.md new file mode 100644 index 000000000000..cf69477bbaf0 --- /dev/null +++ b/changelogs/unreleased/gh-8604-support-square-brackets-in-procedure-resolution-for-lua-calls.md @@ -0,0 +1,4 @@ +## feature/box + +* Added support for square brackets in procedure resolution for Lua calls + (gh-8604). diff --git a/src/box/identifier.c b/src/box/identifier.c index b1c56bdc376f..e56b6ac26584 100644 --- a/src/box/identifier.c +++ b/src/box/identifier.c @@ -34,7 +34,20 @@ #include "diag.h" #include "tt_static.h" #include -#include + +bool +identifier_is_valid_symbol(UChar32 c) +{ + int8_t type = u_charType(c); + /** + * The icu library has a function named u_isprint, however, + * this function does not return any errors. + * Here the `c` symbol printability is determined by comparison + * with unicode category types explicitly. + */ + return type != U_UNASSIGNED && type != U_LINE_SEPARATOR && + type != U_CONTROL_CHAR && type != U_PARAGRAPH_SEPARATOR; +} int identifier_check(const char *str, int str_len) @@ -47,20 +60,8 @@ identifier_check(const char *str, int str_len) int offset = 0; while (offset < str_len) { U8_NEXT(str, offset, str_len, c); - if (c == U_SENTINEL || c == 0xFFFD) - goto error; - - int8_t type = u_charType(c); - /** - * The icu library has a function named u_isprint, however, - * this function does not return any errors. - * Here the `c` symbol printability is determined by comparison - * with unicode category types explicitly. - */ - if (type == U_UNASSIGNED || - type == U_LINE_SEPARATOR || - type == U_CONTROL_CHAR || - type == U_PARAGRAPH_SEPARATOR) + if (c == U_SENTINEL || c == 0xFFFD || + !identifier_is_valid_symbol(c)) goto error; } return 0; diff --git a/src/box/identifier.h b/src/box/identifier.h index 0d39793ba322..a6c51816fc53 100644 --- a/src/box/identifier.h +++ b/src/box/identifier.h @@ -32,6 +32,7 @@ */ #include #include "error.h" +#include #if defined(__cplusplus) extern "C" { @@ -48,6 +49,12 @@ extern "C" { int identifier_check(const char *str, int str_len); +/** + * Check that UTF-8 codepoint is a valid identifier symbol. + */ +bool +identifier_is_valid_symbol(UChar32 c); + #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/lua/call.c b/src/box/lua/call.c index 59c30bb73dad..7bdd17757920 100644 --- a/src/box/lua/call.c +++ b/src/box/lua/call.c @@ -49,6 +49,8 @@ #include "mpstream/mpstream.h" #include "box/session.h" #include "box/iproto_features.h" +#include "box/identifier.h" +#include /** * Handlers identifiers to obtain lua_Cfunction reference from @@ -92,61 +94,239 @@ get_call_serializer(void) } } -/** - * A helper to find a Lua function by name and put it - * on top of the stack. - */ +/* Lexer for procedure resolution. */ +struct lexer { + /* Source string. */ + const char *src; + /* Length of string. */ + size_t src_len; + /* Current lexer offset in bytes. */ + size_t offset; + /* Current lexer offset in symbols. */ + size_t symbol_count; +}; + +/* Initialize lexer. */ +static void +lexer_create(struct lexer *lexer, const char *src, size_t src_len) +{ + lexer->src = src; + lexer->src_len = src_len; + lexer->offset = 0; + lexer->symbol_count = 0; +} + +/* Check if lexer has finished parsing. */ +static bool +lexer_is_eof(const struct lexer *lexer) +{ + return lexer->offset == lexer->src_len; +} + +/* Read a single symbol from a string starting from an offset. */ static int -box_lua_find(lua_State *L, const char *name, const char *name_end) +lexer_read_symbol(struct lexer *lexer, UChar32 *out) { - int index = LUA_GLOBALSINDEX; - int objstack = 0, top = lua_gettop(L); - const char *start = name, *end; - - while ((end = (const char *) memchr(start, '.', name_end - start))) { - lua_checkstack(L, 3); - lua_pushlstring(L, start, end - start); - lua_gettable(L, index); - if (! lua_istable(L, -1)) { - diag_set(ClientError, ER_NO_SUCH_PROC, - name_end - name, name); - return -1; - } - start = end + 1; /* next piece of a.b.c */ - index = lua_gettop(L); /* top of the stack */ + if (lexer_is_eof(lexer)) { + *out = U_SENTINEL; + return -1; } + U8_NEXT(lexer->src, lexer->offset, lexer->src_len, *out); + if (*out == U_SENTINEL) + return -1; + ++lexer->symbol_count; + return 0; +} - /* box.something:method */ - if ((end = (const char *) memchr(start, ':', name_end - start))) { - lua_checkstack(L, 3); - lua_pushlstring(L, start, end - start); - lua_gettable(L, index); - if (! (lua_istable(L, -1) || - lua_islightuserdata(L, -1) || lua_isuserdata(L, -1) )) { - diag_set(ClientError, ER_NO_SUCH_PROC, - name_end - name, name); +/* Rollback one symbol offset. */ +static void +lexer_revert_symbol(struct lexer *lexer, size_t offset) +{ + lexer->offset = offset; + --lexer->symbol_count; +} + +/* Fast forward when it is known that a symbol is 1-byte char. */ +static inline void +lexer_skip_char(struct lexer *lexer) +{ + ++lexer->offset; + ++lexer->symbol_count; +} + +/* Get a current symbol as a 1-byte char. */ +static char +lexer_current_char(const struct lexer *lexer) +{ + return lexer->src[lexer->offset]; +} + +/* + * Parse string identifier in quotes. Lexer stops right after the + * closing quote. + */ +static inline int +lexer_parse_string(struct lexer *lexer, const char **str, size_t *str_len, + UChar32 quote) +{ + assert(lexer->offset < lexer->src_len); + assert(quote == lexer_current_char(lexer)); + /* The first symbol is always char - ' or ". */ + lexer_skip_char(lexer); + size_t str_offset = lexer->offset; + *str = lexer->src + str_offset; + UChar32 c; + while (lexer_read_symbol(lexer, &c) == 0) { + if (c == quote) { + *str_len = lexer->offset - str_offset - 1; + if (*str_len == 0) return -1; + return 0; } - - start = end + 1; /* next piece of a.b.c */ - index = lua_gettop(L); /* top of the stack */ - objstack = index - top; } + return -1; +} + +/* + * Parse digit sequence into integer until non-digit is met. Lexer stops right + * after the last digit. + */ +static int +lexer_parse_integer(struct lexer *lexer, int *num) +{ + *num = 0; + const char *end = lexer->src + lexer->src_len; + const char *pos = lexer->src + lexer->offset; + assert(pos < end); + int len = 0; + *num = 0; + char c = *pos; + if (!isdigit(c)) + return -1; + do { + *num = *num * 10 + c - (int)'0'; + ++len; + } while (++pos < end && isdigit((c = *pos))); + lexer->offset += len; + lexer->symbol_count += len; + return 0; +} +static bool +is_valid_identifier_symbol(UChar32 c) +{ + return identifier_is_valid_symbol(c) && c != '.' && c != '[' && + c != ']' && c != '"' && c != '\'' && c != ':'; +} - lua_pushlstring(L, start, name_end - start); - lua_gettable(L, index); - if (!lua_isfunction(L, -1) && !lua_istable(L, -1)) { - /* lua_call or lua_gettable would raise a type error - * for us, but our own message is more verbose. */ - diag_set(ClientError, ER_NO_SUCH_PROC, - name_end - name, name); +/* + * Parse identifier out of quotes. Lexer stops right after the last + * non-identifier symbol. + */ +static int +lexer_parse_identifier(struct lexer *lexer, const char **str, size_t *str_len) +{ + assert(lexer->offset < lexer->src_len); + size_t str_offset = lexer->offset; + *str = lexer->src + str_offset; + UChar32 c; + if (lexer_read_symbol(lexer, &c) != 0) return -1; + size_t last_offset = lexer->offset; + while (lexer_read_symbol(lexer, &c) == 0) { + if (!is_valid_identifier_symbol(c)) { + lexer_revert_symbol(lexer, last_offset); + break; + } + last_offset = lexer->offset; } + *str_len = lexer->offset - str_offset; + return 0; +} - /* setting stack that it would contain only - * the function pointer. */ - if (index != LUA_GLOBALSINDEX) { +/** + * A helper to find a Lua function by name and put it + * on top of the stack. + */ +static int +box_lua_find(lua_State *L, const char *name, const char *name_end) +{ + int top = lua_gettop(L); + int idx = LUA_GLOBALSINDEX; + int objstack = 0; + struct lexer lexer; + lexer_create(&lexer, name, name_end - name); + while (!lexer_is_eof(&lexer)) { + UChar32 c; + if (lexer_read_symbol(&lexer, &c) != 0) + goto err; + switch (c) { + case '[': { + if (lexer_is_eof(&lexer)) + goto err; + c = (unsigned char)lexer_current_char(&lexer); + if (c == '"' || c == '\'') { + const char *str; + size_t str_len; + if (lexer_parse_string(&lexer, &str, &str_len, + c) != 0) + goto err; + lua_pushlstring(L, str, str_len); + } else { + int num; + if (lexer_parse_integer(&lexer, &num) != 0) + goto err; + lua_pushnumber(L, num); + } + if (lexer_is_eof(&lexer) || + lexer_current_char(&lexer) != ']') + goto err; + lexer_skip_char(&lexer); + if (!lexer_is_eof(&lexer)) { + char ch = lexer_current_char(&lexer); + if (ch != '[' && ch != '.' && ch != ':') + goto err; + } + break; + } + case '.': + case ':': { + if (lexer_is_eof(&lexer)) + goto err; + const char *str; + size_t str_len; + if (lexer_parse_identifier(&lexer, &str, + &str_len) != 0 || + (c == ':' && !lexer_is_eof(&lexer))) + goto err; + lua_pushlstring(L, str, str_len); + break; + } + default: { + lexer_revert_symbol(&lexer, 0); + const char *str; + size_t str_len; + if (lexer_parse_identifier(&lexer, &str, &str_len) != 0) + goto err; + lua_pushlstring(L, str, str_len); + } + } + lua_gettable(L, idx); + if (lexer_is_eof(&lexer)) { + if (!lua_isfunction(L, -1) && !lua_istable(L, -1)) + goto err; + if (c == ':') + objstack = idx - top; + break; + } else if (!lua_istable(L, -1) && + (c != ':' || + (!lua_islightuserdata(L, -1) && + !lua_isuserdata(L, -1)))) { + goto err; + } + idx = lua_gettop(L); + } + if (idx != LUA_GLOBALSINDEX) { if (objstack == 0) { /* no object, only a function */ lua_replace(L, top + 1); lua_pop(L, lua_gettop(L) - top - 1); @@ -161,6 +341,13 @@ box_lua_find(lua_State *L, const char *name, const char *name_end) } } return 1 + objstack; +err: + /* + * lua_call or lua_gettable would raise a type error + * for us, but our own message is more verbose. + */ + diag_set(ClientError, ER_NO_SUCH_PROC, name_end - name, name); + return -1; } /** diff --git a/test/box-luatest/gh_8604_support_square_brackets_in_procedure_resolution_for_lua_calls_test.lua b/test/box-luatest/gh_8604_support_square_brackets_in_procedure_resolution_for_lua_calls_test.lua new file mode 100644 index 000000000000..39e04acc6a74 --- /dev/null +++ b/test/box-luatest/gh_8604_support_square_brackets_in_procedure_resolution_for_lua_calls_test.lua @@ -0,0 +1,210 @@ +local server = require('luatest.server') +local t = require('luatest') + +local g = t.group(nil, t.helpers.matrix{proc = { + 'a.b.c', + 'a.b.c', + 'a.b["c"]', + 'a.b[\'c\']', + 'a.b[555]', + 'a[777].d[444]', + 'a[777].d.e', + 'a[777][666]', + 'a[555]', + 'a[333]:ping', + 'a.f', + 'a.g:ping' +}}) + +g.before_all(function(cg) + cg.server = server:new() + cg.server:start() + cg.server:exec(function() + local netbox = require('net.box') + + local a = { + b = { + c = function() return 'c' end, + [555] = function() return 555 end + }, + [777] = { + d = { + [444] = function() return 444 end, + e = function() return 'e' end + }, + [666] = function() return 666 end + }, + [555] = function() return 555 end, + [333] = netbox.self, + f = function() return 'f' end, + g = netbox.self + } + rawset(_G, 'a', a) + end) +end) + +g.after_all(function(cg) + cg.server:drop() +end) + +-- Checks that procedure resolution for Lua calls works correctly. +g.test_procedure_resolution = function(cg) + cg.server:exec(function(proc) + local netbox = require('net.box') + + t.assert_equals(netbox.self:call(proc), + netbox.self:eval('return ' .. proc .. '()')) + end, {cg.params.proc}) +end + +local g = t.group() + +g.before_all(function(cg) + cg.server = server:new() + cg.server:start() +end) + +g.after_all(function(cg) + cg.server:drop() +end) + +-- Checks that error detection in procedure resolution for Lua calls works +-- correctly. +g.test_procedure_resolution_errors = function(cg) + cg.server:exec(function() + local netbox = require('net.box') + + t.assert_error(function() + netbox.self:call('') + end) + t.assert_error(function() + netbox.self:call('.') + end) + t.assert_error(function() + netbox.self:call(':') + end) + t.assert_error(function() + netbox.self:call('[') + end) + t.assert_error(function() + netbox.self:call(']') + end) + t.assert_error(function() + netbox.self:call('[]') + end) + t.assert_error(function() + netbox.self:call('a.') + end) + t.assert_error(function() + netbox.self:call('l:') + end) + t.assert_error(function() + netbox.self:call('a.b.') + end) + t.assert_error(function() + netbox.self:call('a[b]') + end) + t.assert_error(function() + netbox.self:call('a[[]') + end) + t.assert_error(function() + netbox.self:call('a[[777]') + end) + t.assert_error(function() + netbox.self:call('a["b]') + end) + t.assert_error(function() + netbox.self:call('a["b\']') + end) + t.assert_error(function() + netbox.self:call('a[\'b]') + end) + t.assert_error(function() + netbox.self:call('a[\'b"]') + end) + t.assert_error(function() + netbox.self:call('a[\'\']') + end) + t.assert_error(function() + netbox.self:call('a[""]') + end) + t.assert_error(function() + netbox.self:call('a[\'\']') + end) + t.assert_error(function() + netbox.self:call('a["b""]') + end) + t.assert_error(function() + netbox.self:call('a["b"\']') + end) + t.assert_error(function() + netbox.self:call('a[\'b"\']') + end) + t.assert_error(function() + netbox.self:call('a["b\'"]') + end) + t.assert_error(function() + netbox.self:call('a[333]:') + end) + t.assert_error(function() + netbox.self:call('a[333]:ping:') + end) + t.assert_error(function() + netbox.self:call('a:[333]:ping:') + end) + t.assert_error(function() + netbox.self:call('a:[333]:') + end) + t.assert_error(function() + netbox.self:call('a[555].') + end) + t.assert_error(function() + netbox.self:call('a[555].') + end) + t.assert_error(function() + netbox.self:call('a[777].[666]') + end) + t.assert_error(function() + netbox.self:call('a[777]d[444]') + end) + t.assert_error(function() + netbox.self:call('a[777].d.[444]') + end) + t.assert_error(function() + netbox.self:call('a[777][666]e') + end) + t.assert_error(function() + netbox.self:call('a[555') + end) + t.assert_error(function() + netbox.self:call('a[555]..') + end) + t.assert_error(function() + netbox.self:call('a[555]..') + end) + t.assert_error(function() + netbox.self:call('a[777]..[666]') + end) + t.assert_error(function() + netbox.self:call('a[777].][666]') + end) + t.assert_error(function() + netbox.self:call('a]555[') + end) + t.assert_error(function() + netbox.self:call('a]555]') + end) + t.assert_error(function() + netbox.self:call('a]]') + end) + t.assert_error(function() + netbox.self:call('a[[555]') + end) + t.assert_error(function() + netbox.self:call('a[[555]]') + end) + t.assert_error(function() + netbox.self:call('a.b[c]') + end) + end) +end