Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions include/sql_parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,17 @@ class Tokenizer {
const char* start = cursor_;
while (cursor_ < end_) {
char c = *cursor_;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '_') {
bool is_cont = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '_';
// PostgreSQL allows `$` as an identifier continuation char (but
// not as the first char, which is enforced because $ at start
// is handled by the $$ / $N branches in next_token_impl()).
// e.g. `SET search_path = schema$1` — `schema$1` is a single
// identifier, not `schema` followed by the placeholder `$1`.
if (!is_cont && D == Dialect::PostgreSQL && c == '$' && cursor_ > start) {
is_cont = true;
}
if (is_cont) {
++cursor_;
} else {
break;
Expand Down
48 changes: 48 additions & 0 deletions tests/test_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,54 @@ TEST(PgSQLSetP2, NumericPlaceholderStillOk) {
EXPECT_NE(r.status, ParseResult::ERROR);
}

// PostgreSQL identifiers can contain $ after the first character (per
// https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS).
// Earlier versions of the tokenizer stopped at $, so `schema$1` parsed as
// the identifier `schema` followed by the placeholder `$1`, which was then
// truncated/rejected downstream.
TEST(PgSQLSetP2, DollarInUnquotedIdentIsContinuation) {
Parser<Dialect::PostgreSQL> parser;
const char* sql = "SET search_path = schema$1";
auto r = parser.parse(sql, strlen(sql));
EXPECT_EQ(r.status, ParseResult::OK);
const AstNode* v = first_value(r.ast);
ASSERT_NE(v, nullptr);
EXPECT_EQ(std::string(v->value_ptr, v->value_len), "schema$1");
}

TEST(PgSQLSetP2, DollarInMiddleOfIdent) {
Parser<Dialect::PostgreSQL> parser;
const char* sql = "SET search_path = my$schema$2_name";
auto r = parser.parse(sql, strlen(sql));
EXPECT_EQ(r.status, ParseResult::OK);
const AstNode* v = first_value(r.ast);
ASSERT_NE(v, nullptr);
EXPECT_EQ(std::string(v->value_ptr, v->value_len), "my$schema$2_name");
}

TEST(PgSQLSetP2, DollarAtStartStillEmitsError) {
// `$word` (dollar followed by non-digit) is reserved and must still
// emit TK_ERROR. Only mid-identifier `$` becomes a continuation char.
Parser<Dialect::PostgreSQL> parser;
const char* sql = "SET search_path = $bareword";
auto r = parser.parse(sql, strlen(sql));
EXPECT_EQ(r.status, ParseResult::ERROR);
}

// MySQL still disallows $ in unquoted identifiers — the PG-only continuation
// rule must not leak into MySQL parsing.
TEST(MySQLSet, DollarStillBreaksUnquotedIdent) {
Parser<Dialect::MySQL> parser;
const char* sql = "SET schema$1 = 1";
auto r = parser.parse(sql, strlen(sql));
// MySQL: $ stops the identifier so the parse fails (or produces a partial
// result without `schema$1` as a single token).
const AstNode* v = first_value(r.ast);
if (v != nullptr) {
EXPECT_NE(std::string(v->value_ptr, v->value_len), "schema$1");
}
}

// ============================================================================
// Post-1.0.4 audit follow-ups: PG non-GUC SET forms and value-preservation.
// ============================================================================
Expand Down
Loading