Skip to content

Commit

Permalink
box: support square brackets in procedure resolution for Lua calls
Browse files Browse the repository at this point in the history
Add a lexer for parsing parts of Lua procedure call: basically, it's
copy-paste of JSON path lexer accounting for differences between JSON paths
and Lua procedure calls.

Closes tarantool#8604

@TarantoolBot document
Title: square brackets in procedure resolution for Lua calls

Square brackets are now supported in Lua call procedure resolution. This is
applicable to `net.box` connection objects `call` method as well as
`box.schema.func.call`.

Examples of function calls with square brackets can be found in the test to
this patch.
  • Loading branch information
CuriousGeorgiy committed May 10, 2023
1 parent 464a3d9 commit eed41df
Show file tree
Hide file tree
Showing 5 changed files with 467 additions and 58 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## feature/box

* Added support for square brackets in procedure resolution for Lua calls
(gh-8604).
31 changes: 16 additions & 15 deletions src/box/identifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,20 @@
#include "diag.h"
#include "tt_static.h"
#include <unicode/utf8.h>
#include <unicode/uchar.h>

bool
identifier_is_valid_symbol(UChar32 c)
{
int8_t type = u_charType(c);
/**
* The icu library has a function named u_isprint, however,
* this function does not return any errors.
* Here the `c` symbol printability is determined by comparison
* with unicode category types explicitly.
*/
return type != U_UNASSIGNED && type != U_LINE_SEPARATOR &&
type != U_CONTROL_CHAR && type != U_PARAGRAPH_SEPARATOR;
}

int
identifier_check(const char *str, int str_len)
Expand All @@ -47,20 +60,8 @@ identifier_check(const char *str, int str_len)
int offset = 0;
while (offset < str_len) {
U8_NEXT(str, offset, str_len, c);
if (c == U_SENTINEL || c == 0xFFFD)
goto error;

int8_t type = u_charType(c);
/**
* The icu library has a function named u_isprint, however,
* this function does not return any errors.
* Here the `c` symbol printability is determined by comparison
* with unicode category types explicitly.
*/
if (type == U_UNASSIGNED ||
type == U_LINE_SEPARATOR ||
type == U_CONTROL_CHAR ||
type == U_PARAGRAPH_SEPARATOR)
if (c == U_SENTINEL || c == 0xFFFD ||
!identifier_is_valid_symbol(c))
goto error;
}
return 0;
Expand Down
7 changes: 7 additions & 0 deletions src/box/identifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
*/
#include <stdbool.h>
#include "error.h"
#include <unicode/uchar.h>

#if defined(__cplusplus)
extern "C" {
Expand All @@ -48,6 +49,12 @@ extern "C" {
int
identifier_check(const char *str, int str_len);

/**
* Check that UTF-8 codepoint is a valid identifier symbol.
*/
bool
identifier_is_valid_symbol(UChar32 c);

#if defined(__cplusplus)
} /* extern "C" */

Expand Down
273 changes: 230 additions & 43 deletions src/box/lua/call.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
#include "mpstream/mpstream.h"
#include "box/session.h"
#include "box/iproto_features.h"
#include "box/identifier.h"
#include <ctype.h>

/**
* Handlers identifiers to obtain lua_Cfunction reference from
Expand Down Expand Up @@ -92,61 +94,239 @@ get_call_serializer(void)
}
}

/**
* A helper to find a Lua function by name and put it
* on top of the stack.
*/
/* Lexer for procedure resolution. */
struct lexer {
/* Source string. */
const char *src;
/* Length of string. */
size_t src_len;
/* Current lexer offset in bytes. */
size_t offset;
/* Current lexer offset in symbols. */
size_t symbol_count;
};

/* Initialize lexer. */
static void
lexer_create(struct lexer *lexer, const char *src, size_t src_len)
{
lexer->src = src;
lexer->src_len = src_len;
lexer->offset = 0;
lexer->symbol_count = 0;
}

/* Check if lexer has finished parsing. */
static bool
lexer_is_eof(const struct lexer *lexer)
{
return lexer->offset == lexer->src_len;
}

/* Read a single symbol from a string starting from an offset. */
static int
box_lua_find(lua_State *L, const char *name, const char *name_end)
lexer_read_symbol(struct lexer *lexer, UChar32 *out)
{
int index = LUA_GLOBALSINDEX;
int objstack = 0, top = lua_gettop(L);
const char *start = name, *end;

while ((end = (const char *) memchr(start, '.', name_end - start))) {
lua_checkstack(L, 3);
lua_pushlstring(L, start, end - start);
lua_gettable(L, index);
if (! lua_istable(L, -1)) {
diag_set(ClientError, ER_NO_SUCH_PROC,
name_end - name, name);
return -1;
}
start = end + 1; /* next piece of a.b.c */
index = lua_gettop(L); /* top of the stack */
if (lexer_is_eof(lexer)) {
*out = U_SENTINEL;
return -1;
}
U8_NEXT(lexer->src, lexer->offset, lexer->src_len, *out);
if (*out == U_SENTINEL)
return -1;
++lexer->symbol_count;
return 0;
}

/* box.something:method */
if ((end = (const char *) memchr(start, ':', name_end - start))) {
lua_checkstack(L, 3);
lua_pushlstring(L, start, end - start);
lua_gettable(L, index);
if (! (lua_istable(L, -1) ||
lua_islightuserdata(L, -1) || lua_isuserdata(L, -1) )) {
diag_set(ClientError, ER_NO_SUCH_PROC,
name_end - name, name);
/* Rollback one symbol offset. */
static void
lexer_revert_symbol(struct lexer *lexer, size_t offset)
{
lexer->offset = offset;
--lexer->symbol_count;
}

/* Fast forward when it is known that a symbol is 1-byte char. */
static inline void
lexer_skip_char(struct lexer *lexer)
{
++lexer->offset;
++lexer->symbol_count;
}

/* Get a current symbol as a 1-byte char. */
static char
lexer_current_char(const struct lexer *lexer)
{
return lexer->src[lexer->offset];
}

/*
* Parse string identifier in quotes. Lexer stops right after the
* closing quote.
*/
static inline int
lexer_parse_string(struct lexer *lexer, const char **str, size_t *str_len,
UChar32 quote)
{
assert(lexer->offset < lexer->src_len);
assert(quote == lexer_current_char(lexer));
/* The first symbol is always char - ' or ". */
lexer_skip_char(lexer);
size_t str_offset = lexer->offset;
*str = lexer->src + str_offset;
UChar32 c;
while (lexer_read_symbol(lexer, &c) == 0) {
if (c == quote) {
*str_len = lexer->offset - str_offset - 1;
if (*str_len == 0)
return -1;
return 0;
}

start = end + 1; /* next piece of a.b.c */
index = lua_gettop(L); /* top of the stack */
objstack = index - top;
}
return -1;
}

/*
* Parse digit sequence into integer until non-digit is met. Lexer stops right
* after the last digit.
*/
static int
lexer_parse_integer(struct lexer *lexer, int *num)
{
*num = 0;
const char *end = lexer->src + lexer->src_len;
const char *pos = lexer->src + lexer->offset;
assert(pos < end);
int len = 0;
*num = 0;
char c = *pos;
if (!isdigit(c))
return -1;
do {
*num = *num * 10 + c - (int)'0';
++len;
} while (++pos < end && isdigit((c = *pos)));
lexer->offset += len;
lexer->symbol_count += len;
return 0;
}

static bool
is_valid_identifier_symbol(UChar32 c)
{
return identifier_is_valid_symbol(c) && c != '.' && c != '[' &&
c != ']' && c != '"' && c != '\'' && c != ':';
}

lua_pushlstring(L, start, name_end - start);
lua_gettable(L, index);
if (!lua_isfunction(L, -1) && !lua_istable(L, -1)) {
/* lua_call or lua_gettable would raise a type error
* for us, but our own message is more verbose. */
diag_set(ClientError, ER_NO_SUCH_PROC,
name_end - name, name);
/*
* Parse identifier out of quotes. Lexer stops right after the last
* non-identifier symbol.
*/
static int
lexer_parse_identifier(struct lexer *lexer, const char **str, size_t *str_len)
{
assert(lexer->offset < lexer->src_len);
size_t str_offset = lexer->offset;
*str = lexer->src + str_offset;
UChar32 c;
if (lexer_read_symbol(lexer, &c) != 0)
return -1;
size_t last_offset = lexer->offset;
while (lexer_read_symbol(lexer, &c) == 0) {
if (!is_valid_identifier_symbol(c)) {
lexer_revert_symbol(lexer, last_offset);
break;
}
last_offset = lexer->offset;
}
*str_len = lexer->offset - str_offset;
return 0;
}

/* setting stack that it would contain only
* the function pointer. */
if (index != LUA_GLOBALSINDEX) {
/**
* A helper to find a Lua function by name and put it
* on top of the stack.
*/
static int
box_lua_find(lua_State *L, const char *name, const char *name_end)
{
int top = lua_gettop(L);
int idx = LUA_GLOBALSINDEX;
int objstack = 0;
struct lexer lexer;
lexer_create(&lexer, name, name_end - name);
while (!lexer_is_eof(&lexer)) {
UChar32 c;
if (lexer_read_symbol(&lexer, &c) != 0)
goto err;
switch (c) {
case '[': {
if (lexer_is_eof(&lexer))
goto err;
c = (unsigned char)lexer_current_char(&lexer);
if (c == '"' || c == '\'') {
const char *str;
size_t str_len;
if (lexer_parse_string(&lexer, &str, &str_len,
c) != 0)
goto err;
lua_pushlstring(L, str, str_len);
} else {
int num;
if (lexer_parse_integer(&lexer, &num) != 0)
goto err;
lua_pushnumber(L, num);
}
if (lexer_is_eof(&lexer) ||
lexer_current_char(&lexer) != ']')
goto err;
lexer_skip_char(&lexer);
if (!lexer_is_eof(&lexer)) {
char ch = lexer_current_char(&lexer);
if (ch != '[' && ch != '.' && ch != ':')
goto err;
}
break;
}
case '.':
case ':': {
if (lexer_is_eof(&lexer))
goto err;
const char *str;
size_t str_len;
if (lexer_parse_identifier(&lexer, &str,
&str_len) != 0 ||
(c == ':' && !lexer_is_eof(&lexer)))
goto err;
lua_pushlstring(L, str, str_len);
break;
}
default: {
lexer_revert_symbol(&lexer, 0);
const char *str;
size_t str_len;
if (lexer_parse_identifier(&lexer, &str, &str_len) != 0)
goto err;
lua_pushlstring(L, str, str_len);
}
}
lua_gettable(L, idx);
if (lexer_is_eof(&lexer)) {
if (!lua_isfunction(L, -1) && !lua_istable(L, -1))
goto err;
if (c == ':')
objstack = idx - top;
break;
} else if (!lua_istable(L, -1) &&
(c != ':' ||
(!lua_islightuserdata(L, -1) &&
!lua_isuserdata(L, -1)))) {
goto err;
}
idx = lua_gettop(L);
}
if (idx != LUA_GLOBALSINDEX) {
if (objstack == 0) { /* no object, only a function */
lua_replace(L, top + 1);
lua_pop(L, lua_gettop(L) - top - 1);
Expand All @@ -161,6 +341,13 @@ box_lua_find(lua_State *L, const char *name, const char *name_end)
}
}
return 1 + objstack;
err:
/*
* lua_call or lua_gettable would raise a type error
* for us, but our own message is more verbose.
*/
diag_set(ClientError, ER_NO_SUCH_PROC, name_end - name, name);
return -1;
}

/**
Expand Down

0 comments on commit eed41df

Please sign in to comment.