From b03cb41a3155623ba15bbde582e46f1f55864f70 Mon Sep 17 00:00:00 2001 From: Liviu Chircu Date: Thu, 14 Jan 2021 14:23:41 +0200 Subject: [PATCH] Export is_username_char() to core and reuse it Also remove the duplicate IS_ALPHANUM() module macro while at it. Work in progress for #2367 --- modules/sipmsgops/sipmsgops.c | 145 +--------------------------------- parser/parse_uri.h | 137 ++++++++++++++++++++++++++++++++ parser/test/test_parser.c | 18 +++-- strcommon.c | 57 +++++-------- 4 files changed, 171 insertions(+), 186 deletions(-) diff --git a/modules/sipmsgops/sipmsgops.c b/modules/sipmsgops/sipmsgops.c index f8ff40a2483..d8659711887 100644 --- a/modules/sipmsgops/sipmsgops.c +++ b/modules/sipmsgops/sipmsgops.c @@ -1370,137 +1370,6 @@ static int sip_validate_hdrs(struct sip_msg *msg) } -static char _is_username_char[128] = { - 0 /* 0 NUL */, - 0 /* 1 SOH */, - 0 /* 2 STX */, - 0 /* 3 ETX */, - 0 /* 4 EOT */, - 0 /* 5 ENQ */, - 0 /* 6 ACK */, - 0 /* 7 BEL */, - 0 /* 8 BS */, - 0 /* 9 HT */, - 0 /* 10 LF */, - 0 /* 11 VT */, - 0 /* 12 FF */, - 0 /* 13 CR */, - 0 /* 14 SO */, - 0 /* 15 SI */, - 0 /* 16 DLE */, - 0 /* 17 DC1 */, - 0 /* 18 DC2 */, - 0 /* 19 DC3 */, - 0 /* 20 DC4 */, - 0 /* 21 NAK */, - 0 /* 22 SYN */, - 0 /* 23 ETB */, - 0 /* 24 CAN */, - 0 /* 25 EM */, - 0 /* 26 SUB */, - 0 /* 27 ESC */, - 0 /* 28 FS */, - 0 /* 29 GS */, - 0 /* 30 RS */, - 0 /* 31 US */, - 0 /* 32 */, - 1 /* 33 ! */, - 0 /* 34 " */, - 0 /* 35 # */, - 1 /* 36 $ */, - 0 /* 37 % */, - 1 /* 38 & */, - 1 /* 39 ' */, - 1 /* 40 ( */, - 1 /* 41 ) */, - 1 /* 42 * */, - 1 /* 43 + */, - 1 /* 44 , */, - 1 /* 45 - */, - 1 /* 46 . */, - 1 /* 47 / */, - 1 /* 48 0 */, - 1 /* 49 1 */, - 1 /* 50 2 */, - 1 /* 51 3 */, - 1 /* 52 4 */, - 1 /* 53 5 */, - 1 /* 54 6 */, - 1 /* 55 7 */, - 1 /* 56 8 */, - 1 /* 57 9 */, - 0 /* 58 : */, - 1 /* 59 ; */, - 0 /* 60 < */, - 1 /* 61 = */, - 0 /* 62 > */, - 1 /* 63 ? */, - 0 /* 64 @ */, - 1 /* 65 A */, - 1 /* 66 B */, - 1 /* 67 C */, - 1 /* 68 D */, - 1 /* 69 E */, - 1 /* 70 F */, - 1 /* 71 G */, - 1 /* 72 H */, - 1 /* 73 I */, - 1 /* 74 J */, - 1 /* 75 K */, - 1 /* 76 L */, - 1 /* 77 M */, - 1 /* 78 N */, - 1 /* 79 O */, - 1 /* 80 P */, - 1 /* 81 Q */, - 1 /* 82 R */, - 1 /* 83 S */, - 1 /* 84 T */, - 1 /* 85 U */, - 1 /* 86 V */, - 1 /* 87 W */, - 1 /* 88 X */, - 1 /* 89 Y */, - 1 /* 90 Z */, - 0 /* 91 [ */, - 0 /* 92 \ */, - 0 /* 93 ] */, - 0 /* 94 ^ */, - 1 /* 95 _ */, - 0 /* 96 ` */, - 1 /* 97 a */, - 1 /* 98 b */, - 1 /* 99 c */, - 1 /* 100 d */, - 1 /* 101 e */, - 1 /* 102 f */, - 1 /* 103 g */, - 1 /* 104 h */, - 1 /* 105 i */, - 1 /* 106 j */, - 1 /* 107 k */, - 1 /* 108 l */, - 1 /* 109 m */, - 1 /* 110 n */, - 1 /* 111 o */, - 1 /* 112 p */, - 1 /* 113 q */, - 1 /* 114 r */, - 1 /* 115 s */, - 1 /* 116 t */, - 1 /* 117 u */, - 1 /* 118 v */, - 1 /* 119 w */, - 1 /* 120 x */, - 1 /* 121 y */, - 1 /* 122 z */, - 0 /* 123 { */, - 0 /* 124 | */, - 0 /* 125 } */, - 1 /* 126 ~ */, - 0 /* 127 DEL */ -}; - static inline int check_username(const str *username) { char *p, *end, c; @@ -1515,7 +1384,7 @@ static inline int check_username(const str *username) if ((p + 3) > end || !_isxdigit(*(p + 1)) || !_isxdigit(*(p + 2))) goto err; p += 2; - } else if (!_is_username_char[(int)c]) { + } else if (!is_username_char(c)) { goto err; } } @@ -1529,11 +1398,6 @@ static inline int check_username(const str *username) } -#define IS_ALPHANUM(_c) ( \ - ((_c) >= 'a' && (_c) <= 'z') || \ - ((_c) >= 'A' && (_c) <= 'Z') || \ - ((_c) >= '0' && (_c) <= '9') ) - static int check_hostname(str *domain) { char *p, *end; @@ -1544,7 +1408,7 @@ static int check_hostname(str *domain) } /* always starts with a ALPHANUM */ - if (!IS_ALPHANUM(domain->s[0]) && (domain->s[0] != '[')) { + if (!_isxdigit(domain->s[0]) && (domain->s[0] != '[')) { LM_DBG("invalid starting character in domain: %c[%d]\n", domain->s[0], domain->s[0]); return -1; @@ -1554,7 +1418,7 @@ static int check_hostname(str *domain) end = domain->s + domain->len - 1; for (p = domain->s + 1; p < end; p++) { - if (!IS_ALPHANUM(*p) && (*p != '-') && (*p != ':')) { + if (!_isxdigit(*p) && (*p != '-') && (*p != ':')) { if (*p != '.') { LM_DBG("invalid character in hostname: %c[%d]\n", *p, *p); return -1; @@ -1566,7 +1430,7 @@ static int check_hostname(str *domain) } /* check if the last character is a '-' */ - if (!IS_ALPHANUM(*end) && (*end != '.') && (*end != ']')) { + if (!_isxdigit(*end) && (*end != '.') && (*end != ']')) { LM_DBG("invalid character at the end of the domain: %c[%d]\n", *end, *end); return -1; @@ -1575,7 +1439,6 @@ static int check_hostname(str *domain) } -#undef IS_ALPHANUM #define CHECK_HEADER(_m, _h) \ do { \ diff --git a/parser/parse_uri.h b/parser/parse_uri.h index 4efc6647214..51ca41aaaac 100644 --- a/parser/parse_uri.h +++ b/parser/parse_uri.h @@ -74,6 +74,9 @@ static inline int get_uri_param_val(const struct sip_uri *uri, static inline int get_uri_param_idx(const str *param, const struct sip_uri *parsed_uri); +/* Test whether a given char is accepted "as is" within a SIP URI 'username' */ +static inline int is_username_char(char c); + char * uri_type2str(const uri_type type, char *result); int uri_typestrlen(const uri_type type); uri_type str2uri_type(char * buf); @@ -230,4 +233,138 @@ static inline int get_uri_param_idx(const str *param, return -1; } +static inline int is_username_char(char c) +{ + return (int[]){ + 0 /* 0 NUL */, + 0 /* 1 SOH */, + 0 /* 2 STX */, + 0 /* 3 ETX */, + 0 /* 4 EOT */, + 0 /* 5 ENQ */, + 0 /* 6 ACK */, + 0 /* 7 BEL */, + 0 /* 8 BS */, + 0 /* 9 HT */, + 0 /* 10 LF */, + 0 /* 11 VT */, + 0 /* 12 FF */, + 0 /* 13 CR */, + 0 /* 14 SO */, + 0 /* 15 SI */, + 0 /* 16 DLE */, + 0 /* 17 DC1 */, + 0 /* 18 DC2 */, + 0 /* 19 DC3 */, + 0 /* 20 DC4 */, + 0 /* 21 NAK */, + 0 /* 22 SYN */, + 0 /* 23 ETB */, + 0 /* 24 CAN */, + 0 /* 25 EM */, + 0 /* 26 SUB */, + 0 /* 27 ESC */, + 0 /* 28 FS */, + 0 /* 29 GS */, + 0 /* 30 RS */, + 0 /* 31 US */, + 0 /* 32 */, + 1 /* 33 ! */, + 0 /* 34 " */, + 0 /* 35 # */, + 1 /* 36 $ */, + 0 /* 37 % */, + 1 /* 38 & */, + 1 /* 39 ' */, + 1 /* 40 ( */, + 1 /* 41 ) */, + 1 /* 42 * */, + 1 /* 43 + */, + 1 /* 44 , */, + 1 /* 45 - */, + 1 /* 46 . */, + 1 /* 47 / */, + 1 /* 48 0 */, + 1 /* 49 1 */, + 1 /* 50 2 */, + 1 /* 51 3 */, + 1 /* 52 4 */, + 1 /* 53 5 */, + 1 /* 54 6 */, + 1 /* 55 7 */, + 1 /* 56 8 */, + 1 /* 57 9 */, + 0 /* 58 : */, + 1 /* 59 ; */, + 0 /* 60 < */, + 1 /* 61 = */, + 0 /* 62 > */, + 1 /* 63 ? */, + 0 /* 64 @ */, + 1 /* 65 A */, + 1 /* 66 B */, + 1 /* 67 C */, + 1 /* 68 D */, + 1 /* 69 E */, + 1 /* 70 F */, + 1 /* 71 G */, + 1 /* 72 H */, + 1 /* 73 I */, + 1 /* 74 J */, + 1 /* 75 K */, + 1 /* 76 L */, + 1 /* 77 M */, + 1 /* 78 N */, + 1 /* 79 O */, + 1 /* 80 P */, + 1 /* 81 Q */, + 1 /* 82 R */, + 1 /* 83 S */, + 1 /* 84 T */, + 1 /* 85 U */, + 1 /* 86 V */, + 1 /* 87 W */, + 1 /* 88 X */, + 1 /* 89 Y */, + 1 /* 90 Z */, + 0 /* 91 [ */, + 0 /* 92 \ */, + 0 /* 93 ] */, + 0 /* 94 ^ */, + 1 /* 95 _ */, + 0 /* 96 ` */, + 1 /* 97 a */, + 1 /* 98 b */, + 1 /* 99 c */, + 1 /* 100 d */, + 1 /* 101 e */, + 1 /* 102 f */, + 1 /* 103 g */, + 1 /* 104 h */, + 1 /* 105 i */, + 1 /* 106 j */, + 1 /* 107 k */, + 1 /* 108 l */, + 1 /* 109 m */, + 1 /* 110 n */, + 1 /* 111 o */, + 1 /* 112 p */, + 1 /* 113 q */, + 1 /* 114 r */, + 1 /* 115 s */, + 1 /* 116 t */, + 1 /* 117 u */, + 1 /* 118 v */, + 1 /* 119 w */, + 1 /* 120 x */, + 1 /* 121 y */, + 1 /* 122 z */, + 0 /* 123 { */, + 0 /* 124 | */, + 0 /* 125 } */, + 1 /* 126 ~ */, + 0 /* 127 DEL */ + }[(int)c]; +} + #endif /* PARSE_URI_H */ diff --git a/parser/test/test_parser.c b/parser/test/test_parser.c index d0c3dcf472b..b16d9d1847b 100644 --- a/parser/test/test_parser.c +++ b/parser/test/test_parser.c @@ -33,13 +33,19 @@ void test_parse_uri(void) str in; /* Basic URI parsing tests */ - in = *_str("sip:@atlanta.org"); - ok(parse_uri(in.s, in.len, &u) < 0, "puri-0"); - in = *_str("sip:atlanta.org"); - ok(parse_uri(in.s, in.len, &u) == 0, "puri-0.1"); - ok(!u.user.s, "puri-0.2"); - ok(u.user.len == 0, "puri-0.3"); + ok(parse_uri(STR_L("sip:@atlanta.org"), &u) < 0, "puri-0"); + + /* Notice how illegal user chars are allowed in these two tests! + * This is by design, since "quick parsing" != "full RFC syntax validation" + */ + ok(parse_uri(STR_L("sip:%@atlanta.org"), &u) == 0, "puri-0.1"); + ok(parse_uri(STR_L("sip:%4`@atlanta.org"), &u) == 0, "puri-0.2"); + + ok(parse_uri(STR_L("sip:%40@atlanta.org"), &u) == 0, "puri-0.3"); + ok(parse_uri(STR_L("sip:atlanta.org"), &u) == 0, "puri-0.4"); + ok(!u.user.s, "puri-0.5"); + ok(u.user.len == 0, "puri-0.6"); in = *_str("sip:alice@atlanta.org;user=phone"); ok(parse_uri(in.s, in.len, &u) == 0, "puri-1"); diff --git a/strcommon.c b/strcommon.c index d35e65dedb2..2f9e39c6f2f 100644 --- a/strcommon.c +++ b/strcommon.c @@ -24,6 +24,8 @@ * \brief Generic string handling functions */ +#include "parser/parse_uri.h" + #include "ut.h" #include "strcommon.h" @@ -280,49 +282,26 @@ int escape_user(str *sin, str *sout) LM_ERR("invalid escaped character <%u>\n", (unsigned int)*p); return -1; } - if (isalnum((int)*p)) - { + + if (is_username_char(*p)) { *at = *p; } else { - switch (*p) { - /* unreserved chars */ - case '-': - case '_': - case '.': - case '!': - case '~': - case '*': - case '\'': - case '(': - case ')': - /* user unreserved chars */ - case '&': - case '=': - case '+': - case '$': - case ',': - case ';': - case '?': - case '/': - *at = *p; - break; - default: - *at++ = '%'; - x = (*p) >> 4; - if (x < 10) - { - *at++ = x + '0'; - } else { - *at++ = x - 10 + 'a'; - } - x = (*p) & 0x0f; - if (x < 10) { - *at = x + '0'; - } else { - *at = x - 10 + 'a'; - } + *at++ = '%'; + x = (*p) >> 4; + if (x < 10) + { + *at++ = x + '0'; + } else { + *at++ = x - 10 + 'a'; + } + x = (*p) & 0x0f; + if (x < 10) { + *at = x + '0'; + } else { + *at = x - 10 + 'a'; } } + at++; p++; }