From 447c059babb4c1676618ea328046a6ee65dad466 Mon Sep 17 00:00:00 2001 From: MisakaVan <2102315149@qq.com> Date: Fri, 13 Dec 2024 23:24:01 +0800 Subject: [PATCH 1/3] feat(parser): implement identifier registration and conflict checking --- readme.md | 2 +- src/lang.c | 81 +++++++++++++++++++++++++++++++++++++++ src/lib.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.h | 47 +++++++++++++++++++++++ src/main.c | 6 +++ 5 files changed, 245 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 4559907..f362dbc 100644 --- a/readme.md +++ b/readme.md @@ -9,7 +9,7 @@ - [x] 有三个移入/规约冲突(关于 `annon-right-type` ) 这些冲突是因为这个标注类型可能为空导致的。 - [ ] 利用 `lib.h` 给的哈希表存放一些类型前置定义 - - [ ] 检查在全局变量/Struct/Union/Enum/Typedef声明中出现的标识符是否已经在全局出现过 + - [x] 检查在全局变量/Struct/Union/Enum/Typedef声明中出现的标识符是否已经在全局出现过 - [ ] 检查Struct/Union/Enum/Typedef被使用时是否已经有前置定义 - [ ] 检查Struct/Union的字段内是否有变量名重复 - [ ] 完善build、test、使用文档 diff --git a/src/lang.c b/src/lang.c index 438b362..d23c748 100644 --- a/src/lang.c +++ b/src/lang.c @@ -1,4 +1,5 @@ #include "lang.h" +#include "lib.h" #include #include #include @@ -78,6 +79,9 @@ struct enum_ele_list* TECons(char* name, struct enum_ele_list* next) { struct enum_ele_list* res = new_enum_ele_list_ptr(); res->name = name; res->next = next; + + register_identifier_enumerator(name); + return res; } @@ -85,6 +89,9 @@ struct left_type* TStructType(char* name) { struct left_type* res = new_left_type_ptr(); res->t = T_STRUCT_TYPE; res->d.STRUCT_TYPE.name = name; + + // TODO: check if the struct name is already declared + return res; } @@ -93,6 +100,8 @@ struct left_type* TNewStructType(char* name, struct type_list* fld) { res->t = T_NEW_STRUCT_TYPE; res->d.NEW_STRUCT_TYPE.name = name; res->d.NEW_STRUCT_TYPE.fld = fld; + + register_identifier_struct(name); return res; } @@ -100,6 +109,9 @@ struct left_type* TUnionType(char* name) { struct left_type* res = new_left_type_ptr(); res->t = T_UNION_TYPE; res->d.UNION_TYPE.name = name; + + // TODO: check if the union name is already declared + return res; } @@ -108,6 +120,9 @@ struct left_type* TNewUnionType(char* name, struct type_list* fld) { res->t = T_NEW_UNION_TYPE; res->d.NEW_UNION_TYPE.name = name; res->d.NEW_UNION_TYPE.fld = fld; + + register_identifier_union(name); + return res; } @@ -115,6 +130,9 @@ struct left_type* TEnumType(char* name) { struct left_type* res = new_left_type_ptr(); res->t = T_ENUM_TYPE; res->d.ENUM_TYPE.name = name; + + // TODO: check if the enum name is already declared + return res; } @@ -123,6 +141,9 @@ struct left_type* TNewEnumType(char* name, struct enum_ele_list* ele) { res->t = T_NEW_ENUM_TYPE; res->d.NEW_ENUM_TYPE.name = name; res->d.NEW_ENUM_TYPE.ele = ele; + + register_identifier_enum(name); + return res; } @@ -142,6 +163,9 @@ struct left_type* TDefinedType(char* name) { struct left_type* res = new_left_type_ptr(); res->t = T_DEFINED_TYPE; res->d.DEFINED_TYPE.name = name; + + // TODO: check if the defined type name is already declared + return res; } @@ -182,6 +206,9 @@ struct glob_item* TStructDef(char* name, struct type_list* fld) { res->t = T_STRUCT_DEF; res->d.STRUCT_DEF.name = name; res->d.STRUCT_DEF.fld = fld; + + register_identifier_struct(name); + return res; } @@ -189,6 +216,9 @@ struct glob_item* TStructDecl(char* name) { struct glob_item* res = new_glob_item_ptr(); res->t = T_STRUCT_DECL; res->d.STRUCT_DECL.name = name; + + register_identifier_struct(name); + return res; } @@ -197,6 +227,9 @@ struct glob_item* TUnionDef(char* name, struct type_list* fld) { res->t = T_UNION_DEF; res->d.UNION_DEF.name = name; res->d.UNION_DEF.fld = fld; + + register_identifier_union(name); + return res; } @@ -204,6 +237,9 @@ struct glob_item* TUnionDecl(char* name) { struct glob_item* res = new_glob_item_ptr(); res->t = T_UNION_DECL; res->d.UNION_DECL.name = name; + + register_identifier_union(name); + return res; } @@ -212,6 +248,9 @@ struct glob_item* TEnumDef(char* name, struct enum_ele_list* ele) { res->t = T_ENUM_DEF; res->d.ENUM_DEF.name = name; res->d.ENUM_DEF.ele = ele; + + register_identifier_enum(name); + return res; } @@ -219,6 +258,9 @@ struct glob_item* TEnumDecl(char* name) { struct glob_item* res = new_glob_item_ptr(); res->t = T_ENUM_DECL; res->d.ENUM_DECL.name = name; + + register_identifier_enum(name); + return res; } @@ -227,6 +269,26 @@ struct glob_item* TTypeDef(struct left_type* t, struct var_decl_expr* e) { res->t = T_TYPE_DEF; res->d.TYPE_DEF.t = t; res->d.TYPE_DEF.e = e; + + // get the core type name and register it + struct var_decl_expr* ptr = e; + while (ptr->t != T_ORIG_TYPE) { + switch (ptr->t) { + case T_PTR_TYPE: + ptr = ptr->d.PTR_TYPE.base; + break; + case T_ARRAY_TYPE: + ptr = ptr->d.ARRAY_TYPE.base; + break; + case T_FUNC_TYPE: + ptr = ptr->d.FUNC_TYPE.ret; + break; + case T_ORIG_TYPE: + break; + } + } + register_identifier_typedef(ptr->d.ORIG_TYPE.name); + return res; } @@ -235,6 +297,25 @@ struct glob_item* TVarDef(struct left_type* t, struct var_decl_expr* e) { res->t = T_VAR_DEF; res->d.VAR_DEF.t = t; res->d.VAR_DEF.e = e; + + // get the core type name and register it + struct var_decl_expr* ptr = e; + while (ptr->t != T_ORIG_TYPE) { + switch (ptr->t) { + case T_PTR_TYPE: + ptr = ptr->d.PTR_TYPE.base; + break; + case T_ARRAY_TYPE: + ptr = ptr->d.ARRAY_TYPE.base; + break; + case T_FUNC_TYPE: + ptr = ptr->d.FUNC_TYPE.ret; + break; + case T_ORIG_TYPE: + break; + } + } + register_identifier_variable(ptr->d.ORIG_TYPE.name); return res; } diff --git a/src/lib.c b/src/lib.c index 1df5755..ea6a0d8 100644 --- a/src/lib.c +++ b/src/lib.c @@ -107,3 +107,113 @@ void SLL_hash_delete(struct SLL_hash_table* t, char* key) { d = &((*d)->tail); } } + +struct SLL_hash_table* identifier_table; + +struct IdentifierInfo* init_identifier_info() { + struct IdentifierInfo* res = + (struct IdentifierInfo*)malloc(sizeof(struct IdentifierInfo)); + if (res == NULL) { + printf("Failure in malloc.\n"); + exit(0); + } + res->flags = 0; + for (int i = 0; i < IDENT_TYPE_COUNT; i++) { + res->lineno[i] = -1; + } + return res; +} + + +/* +struct, union, enum names should not overlap with each other. + +*/ + +int conflict_identifier_register_lut[6][6] = { + {1, 1, 0, 0, 0, 1}, // variable + {1, 1, 0, 0, 0, 1}, // enumerator + {0, 0, 1, 1, 1, 0}, // struct + {0, 0, 1, 1, 1, 0}, // union + {0, 0, 1, 1, 1, 0}, // enum + {1, 1, 0, 0, 0, 1}, // typedef +}; + +char* identifier_type_str[6] = { + "variable", + "enumerator", + "struct", + "union", + "enum", + "typedef", +}; + +// use yylineno to record the line number of the first registration +extern int yylineno; + +void register_identifier(char* name, enum IdentifierType type) { + pdebug("Line %d:\n", yylineno); + pdebug("Registering identifier %s as %s\n", name, identifier_type_str[type]); + + if (name==NULL){ + pdebug("Identifier name is NULL (Anonymous), skip registering\n"); + return; + } + + struct IdentifierInfo *info = NULL; + + info = (struct IdentifierInfo*)SLL_hash_get(identifier_table, name); + if ((long long)info == NONE) { + pdebug("Identifier %s is not in the hashtable\n", name); + info = init_identifier_info(); + SLL_hash_set(identifier_table, name, (long long)info); + } + else{ + pdebug("Identifier %s is in the hashtable\n", name); + } + + pdebug("Identifier %s flags: %lld\n", name, info->flags); + + int conflict = 0; + // check if the identifier is already registered. + for (int i = 0; i < IDENT_TYPE_COUNT; i++) { + if (conflict_identifier_register_lut[type][i] == 0){ + continue; + } + if (info->flags & (1 << i)) { + // identifier is already registered as a conflicting type. + printf("Warning: (Line %d) Identifier %s is already registered as %s at line %d\n", yylineno, name, identifier_type_str[i], info->lineno[i]); + conflict = 1; + } + } + + if (!conflict) { + pdebug("No conflict. Now register %s as %s\n", name, identifier_type_str[type]); + info->flags |= (1 << type); + info->lineno[type] = yylineno; // the line number of the first registration + } +} + +void register_identifier_variable(char* name) { + register_identifier(name, IDENT_TYPE_VARIABLE); +} + +void register_identifier_enumerator(char* name) { + register_identifier(name, IDENT_TYPE_ENUMERATOR); +} + +void register_identifier_struct(char* name) { + register_identifier(name, IDENT_TYPE_STRUCT); +} + +void register_identifier_union(char* name) { + register_identifier(name, IDENT_TYPE_UNION); +} + +void register_identifier_enum(char* name) { + register_identifier(name, IDENT_TYPE_ENUM); +} + +void register_identifier_typedef(char* name) { + register_identifier(name, IDENT_TYPE_TYPEDEF); +} diff --git a/src/lib.h b/src/lib.h index 90ec4fa..fe53a84 100644 --- a/src/lib.h +++ b/src/lib.h @@ -3,6 +3,14 @@ #define NONE 4294967295 +#define VERBOSE 1 + +#ifdef VERBOSE + #define pdebug(fmt, ...) printf("[DEBUG] " fmt, ##__VA_ARGS__) +#else + #define pdebug(fmt, ...) +#endif + // clang-format off unsigned int build_nat(char * c, int len); char * new_str(char * str, int len); @@ -13,4 +21,43 @@ void SLL_hash_set(struct SLL_hash_table * t, char * key, long long value); void SLL_hash_delete(struct SLL_hash_table * t, char * key); // clang-format on + +enum IdentifierType { + IDENT_TYPE_VARIABLE = 0, + IDENT_TYPE_ENUMERATOR, + IDENT_TYPE_STRUCT, + IDENT_TYPE_UNION, + IDENT_TYPE_ENUM, + IDENT_TYPE_TYPEDEF, +}; + +#define IDENT_TYPE_COUNT 6 + +struct IdentifierInfo { + long long flags; + int lineno[IDENT_TYPE_COUNT]; +}; + +// #define IDENT_TYPE_VARIABLE 0 +// #define IDENT_TYPE_ENUMERATOR 1 +// #define IDENT_TYPE_STRUCT 2 +// #define IDENT_TYPE_UNION 3 +// #define IDENT_TYPE_ENUM 4 +// #define IDENT_TYPE_TYPEDEF 5 + + + + +struct IdentifierInfo *init_identifier_info(); + +void register_identifier(char *name, enum IdentifierType type); + +void register_identifier_variable(char *name); +void register_identifier_enumerator(char *name); +void register_identifier_struct(char *name); +void register_identifier_union(char *name); +void register_identifier_enum(char *name); +void register_identifier_typedef(char *name); + + #endif diff --git a/src/main.c b/src/main.c index d18b34c..5f7684a 100644 --- a/src/main.c +++ b/src/main.c @@ -2,6 +2,7 @@ #include "lexer.h" #include "parser.h" #include "astprint.h" +#include "lib.h" #include // use the new printer @@ -9,6 +10,7 @@ // clang-format off extern struct glob_item_list *root; +extern struct SLL_hash_table *identifier_table; int yyparse(); // clang-format on @@ -21,6 +23,10 @@ int main(int argc, char** argv) { printf("Error, too many arguments!\n"); return 0; } + + // initialize the identifier table + identifier_table = init_SLL_hash(); + yyin = fopen(argv[1], "rb"); if (yyin == NULL) { printf("File %s can't be opened.\n", argv[1]); From 96e23570f245d8f8a7187a5ae54ff66c0e5bbcfe Mon Sep 17 00:00:00 2001 From: MisakaVan <2102315149@qq.com> Date: Sat, 14 Dec 2024 00:07:31 +0800 Subject: [PATCH 2/3] feat(parser): add identifier predeclared checking for structs, unions, enums, and typedefs --- readme.md | 2 +- src/lang.c | 8 ++++---- src/lib.c | 40 ++++++++++++++++++++++++++++++++++++++++ src/lib.h | 6 ++++++ 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/readme.md b/readme.md index f362dbc..c6f2905 100644 --- a/readme.md +++ b/readme.md @@ -10,7 +10,7 @@ - [x] 有三个移入/规约冲突(关于 `annon-right-type` ) 这些冲突是因为这个标注类型可能为空导致的。 - [ ] 利用 `lib.h` 给的哈希表存放一些类型前置定义 - [x] 检查在全局变量/Struct/Union/Enum/Typedef声明中出现的标识符是否已经在全局出现过 - - [ ] 检查Struct/Union/Enum/Typedef被使用时是否已经有前置定义 + - [x] 检查Struct/Union/Enum/Typedef被使用时是否已经有前置定义 - [ ] 检查Struct/Union的字段内是否有变量名重复 - [ ] 完善build、test、使用文档 diff --git a/src/lang.c b/src/lang.c index d23c748..38454b0 100644 --- a/src/lang.c +++ b/src/lang.c @@ -90,7 +90,7 @@ struct left_type* TStructType(char* name) { res->t = T_STRUCT_TYPE; res->d.STRUCT_TYPE.name = name; - // TODO: check if the struct name is already declared + check_identifier_struct(name); return res; } @@ -110,7 +110,7 @@ struct left_type* TUnionType(char* name) { res->t = T_UNION_TYPE; res->d.UNION_TYPE.name = name; - // TODO: check if the union name is already declared + check_identifier_union(name); return res; } @@ -131,7 +131,7 @@ struct left_type* TEnumType(char* name) { res->t = T_ENUM_TYPE; res->d.ENUM_TYPE.name = name; - // TODO: check if the enum name is already declared + check_identifier_enum(name); return res; } @@ -164,7 +164,7 @@ struct left_type* TDefinedType(char* name) { res->t = T_DEFINED_TYPE; res->d.DEFINED_TYPE.name = name; - // TODO: check if the defined type name is already declared + check_identifier_typedef(name); return res; } diff --git a/src/lib.c b/src/lib.c index ea6a0d8..74b0e8e 100644 --- a/src/lib.c +++ b/src/lib.c @@ -217,3 +217,43 @@ void register_identifier_enum(char* name) { void register_identifier_typedef(char* name) { register_identifier(name, IDENT_TYPE_TYPEDEF); } + +void check_identifier(char *name, enum IdentifierType using_type){ + pdebug("Line %d:\n", yylineno); + struct IdentifierInfo *info = NULL; + info = (struct IdentifierInfo*)SLL_hash_get(identifier_table, name); + if ((long long)info == NONE) { + printf("Warning: (Line %d) Identifier %s has never been registered\n", yylineno, name); + return; + } + pdebug("Identifier %s flags: %lld\n", name, info->flags); + + if (!(info->flags & (1 << using_type))) { + printf("Warning: (Line %d) Identifier %s is not registered as %s\n", yylineno, name, identifier_type_str[using_type]); + for (int i = 0; i < IDENT_TYPE_COUNT; i++) { + if (info->flags & (1 << i)) { + pdebug(" - Identifier %s is registered as %s at line %d\n", name, identifier_type_str[i], info->lineno[i]); + } + } + } + else { + pdebug("OK: Identifier %s has been registered as %s at line %d\n", name, identifier_type_str[using_type], info->lineno[using_type]); + } +} + +void check_identifier_enum(char *name){ + check_identifier(name, IDENT_TYPE_ENUM); +} + +void check_identifier_struct(char *name){ + check_identifier(name, IDENT_TYPE_STRUCT); +} + +void check_identifier_union(char *name){ + check_identifier(name, IDENT_TYPE_UNION); +} + +void check_identifier_typedef(char *name){ + check_identifier(name, IDENT_TYPE_TYPEDEF); +} + diff --git a/src/lib.h b/src/lib.h index fe53a84..4355499 100644 --- a/src/lib.h +++ b/src/lib.h @@ -59,5 +59,11 @@ void register_identifier_union(char *name); void register_identifier_enum(char *name); void register_identifier_typedef(char *name); +// check if the identifier is already declared when using it +void check_identifier(char *name, enum IdentifierType using_type); +void check_identifier_enum(char *name); +void check_identifier_struct(char *name); +void check_identifier_union(char *name); +void check_identifier_typedef(char *name); #endif From a84e606ddc25173e0964ff972dcf800b8c2a0918 Mon Sep 17 00:00:00 2001 From: MisakaVan <2102315149@qq.com> Date: Sat, 14 Dec 2024 02:39:22 +0800 Subject: [PATCH 3/3] feat(tests): add test cases for identifier predeclared checking and redefinition warnings --- .../testcase1.jtl.h | 13 +++++++++ .../testcase-redefine-var.jtl.h | 27 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 resource/test/test-identifier-predeclared/testcase1.jtl.h create mode 100644 resource/test/test-indentifier-register/testcase-redefine-var.jtl.h diff --git a/resource/test/test-identifier-predeclared/testcase1.jtl.h b/resource/test/test-identifier-predeclared/testcase1.jtl.h new file mode 100644 index 0000000..3c92a54 --- /dev/null +++ b/resource/test/test-identifier-predeclared/testcase1.jtl.h @@ -0,0 +1,13 @@ +struct Foo; + +struct Foo a; // OK: Foo is registered as struct at line 1 + +enum Foo b; // Warning: Identifier Foo has been registered as struct, not enum + +union Foo c; // Warning: Identifier Foo has been registered as struct, not union + + +typedef Foo foo_t; +foo_t d; // OK: foo_t has been registered as typedef +bar_t e; // Warning: Identifier bar_t has never been registered + diff --git a/resource/test/test-indentifier-register/testcase-redefine-var.jtl.h b/resource/test/test-indentifier-register/testcase-redefine-var.jtl.h new file mode 100644 index 0000000..2b089ac --- /dev/null +++ b/resource/test/test-indentifier-register/testcase-redefine-var.jtl.h @@ -0,0 +1,27 @@ +// redefine a variable-registered identifier as other types + +// some variables to be used in the test +char var1; +char var2; +char var3; +char var4; +char var5; +char var6; + +// 1. redefine as a variable +int var1; // Warning + +// 2. redefine as an enumerator +enum SomeEnum { var2 }; // Warning + +// 3. redefine as a struct +struct var3 { int a; }; // OK + +// 4. redefine as a union +union var4 { int a; }; // OK + +// 5. redefine as an enum +enum var5 { a, b, c }; // OK + +// 6. redefine as a typedef +typedef int var6; // Warning \ No newline at end of file