2929#include <string.h>
3030#include "parse.h"
3131#include "utils.h"
32+ #include "dynarr/dynarr.h"
33+
34+ // Internal functions
35+ void * p_malloc (size_t size , char * context );
3236
3337const char CHR_COMMENT = ';' ;
3438const char CHR_DIRECTIVE = '#' ;
@@ -177,19 +181,32 @@ static void print_token(struct Token *token) {
177181 putchar ('\n' );
178182}
179183
180- bool parse (char * code ) {
181- if (setjmp (parse_error .jump )) return false ;
184+ char * parse (char * code ) {
185+ if (setjmp (parse_error .jump )) return parse_error . msg ;
182186
183187 struct TokenList token_list = token_get_list (code );
184188 if (!token_list .length ) raise_mem ("generating token list" );
185189 struct TokenListNode * token_list_node = token_list .head ;
186190
191+ puts ("> Printing tokens" );
187192 if (token_list .dirty ) fputs ("!!! WARNING: Unknown token(s) encountered !!!\n" , stderr );
188193 do {
189194 struct Token * token = token_list_node -> token ;
190195 if (token -> type != TOK_WHITESPACE ) print_token (token );
191196 token_list_node = token_list_node -> next ;
192197 } while (token_list_node );
198+ //return true;
199+ puts ("> Printing units" );
200+ struct Token * tokens = token_list_to_array (& token_list , true, true);
201+ if (!tokens ) raise_mem ("flattening token list" );
202+ struct Token * curr_token = tokens + 1 ;
203+ do {
204+ unit_get (curr_token , & curr_token );
205+ } while (curr_token -> type != TOK_EOF );
206+
207+ //expression_get(tokens + 1, token_list.length);
208+
209+ return NULL ;
193210}
194211
195212struct Token token_get (char * code , char * * next ) {
@@ -208,6 +225,7 @@ struct Token token_get(char *code, char **next) {
208225 token .type = TOK_WHITESPACE ;
209226 token .data = code ;
210227 token .data_len = length ;
228+ token .newline = * code == '\n' || * code == '\r' ;
211229 } else if (* code == CHR_COMMENT || * code == CHR_DIRECTIVE ) {
212230 // Comment or Directive
213231 token .type = * code == CHR_COMMENT ? TOK_COMMENT : TOK_DIRECTIVE ;
@@ -402,21 +420,33 @@ struct TokenList token_get_list(char *code) {
402420 end : return list ;
403421};
404422
405- struct Token * token_list_to_array (struct TokenList * list , bool pad ) {
406- struct Token * tokens = malloc (sizeof (struct Token ) * (list -> length + (pad ? 2 : 0 )));
423+ struct Token * token_list_to_array (struct TokenList * list , bool pad , bool strip_ws ) {
424+ size_t token_count = list -> length ;
425+ if (strip_ws ) {
426+ struct TokenListNode * node = list -> head ;
427+ do {
428+ if (node -> token -> type == TOK_WHITESPACE && !node -> token -> newline ) -- token_count ;
429+ } while (node = node -> next );
430+ }
431+
432+ struct Token * tokens = malloc (sizeof (struct Token ) * (token_count + (pad ? 2 : 0 )));
407433 if (!tokens ) return NULL ;
408434 if (pad ) /* Reserve first element for padding */ ++ tokens ;
409435
410436 struct TokenListNode * node = list -> head ;
411- for (size_t i = 0 ; i < list -> length ; ++ i ) {
437+
438+ for (size_t i = 0 ; i < token_count ; ++ i ) {
439+ if (node -> token -> type == TOK_WHITESPACE && !node -> token -> newline ) {
440+ -- i ; // No increment in the next iteration
441+ goto next_node ;
442+ }
412443 tokens [i ] = * node -> token ;
413- node = node -> next ;
444+ next_node : node = node -> next ;
414445 }
415446
416447 if (pad ) {
417448 // Apply padding
418- //struct Token padding = {.type = TOK_EOF};
419- tokens [list -> length ] = (struct Token ){
449+ tokens [token_count ] = (struct Token ){
420450 .type = TOK_EOF ,
421451 .data = list -> tail -> token -> data + list -> tail -> token -> data_len ,
422452 .data_len = 0 ,
@@ -671,6 +701,11 @@ bool kwd_is_declarator(enum Keyword kwd) {
671701struct Expression expression_get (struct Token * tokens , size_t count ) {
672702 struct Expression expression = {.op = OP_NOP };
673703
704+ if (count == 0 ) {
705+ // Assume the expression ends at line end
706+ for (;;++ count ) if (tokens [count ].type == TOK_EOF || tokens [count ].type == TOK_WHITESPACE && tokens [count ].newline ) break ;
707+ }
708+
674709 // Calculate the number of actual tokens (anything not a whitespace)
675710 size_t actual_count = 0 ;
676711 struct Token * actual_tokens = tokens ;
@@ -904,6 +939,161 @@ struct Token *find_token_by_opr(struct Token *tokens, size_t count, enum Operato
904939 return NULL ;
905940}
906941
942+ struct Statement statement_get (struct Token * token , struct Token * * next ) {
943+ struct Statement statement ;
944+ struct Token * next_token = NULL ;
945+
946+ bool function , declaration = false;
947+ if (token -> type == TOK_WORD && kwd_is_declarator (token -> keyword )) {
948+ function = token -> keyword == KWD_FUNC ;
949+ declaration = true;
950+ }
951+
952+ if (declaration ) {
953+ statement .type = SMT_DECLARATION ;
954+ statement .declaration = malloc (sizeof * statement .declaration );
955+ if (statement .declaration == NULL ) raise_mem ("parsing declaration statement" );
956+
957+ statement .declaration -> is_function = function ;
958+ if (function ) {
959+ // Function Declaration
960+ statement .declaration -> scope = SCO_GLOBAL ;
961+ statement .declaration -> is_function = true;
962+ statement .declaration -> name = NULL ;
963+ statement .declaration -> code .block = NULL ;
964+ statement .declaration -> code .size = 0 ;
965+
966+ // Name
967+ ++ token ;
968+ if (token -> type != TOK_WORD ) raise_unexpected_token ("a function name" , token );
969+ statement .declaration -> name = p_malloc (token -> data_len + 1 , "storing function name" );
970+ strncpy (statement .declaration -> name , token -> data , token -> data_len );
971+
972+ // Parameters
973+ // TODO: Implement a dynamic array library
974+ // TODO: make an "expect function"
975+ expect_token (++ token , & (struct Token ){.type = TOK_BRACKET , .data = "(" }, "opening bracket for function parameters" );
976+ for (;;) {
977+ // ...
978+ }
979+
980+ // Code block
981+ dynarr code_block = dynarr_init (sizeof * statement .declaration -> code .block );
982+ do {
983+ struct Statement func_stmt = statement_get (token , & next_token );
984+ dynarr_push (& code_block , & func_stmt );
985+ } while (next_token -> type != TOK_WORD || next_token -> kwd != KWD_END_FUNC )
986+ statement .declaration -> code .block = dynarr_get (& code_block , & statement .declaration -> code .size );
987+ } else {
988+ // Variable Declaration
989+ statement .declaration -> scope = SCO_AUTO ;
990+ statement .declaration -> is_static = false;
991+ statement .declaration -> is_constant = false;
992+ statement .declaration -> name = NULL ;
993+ statement .declaration -> initializer = NULL ;
994+
995+ // Metadata
996+ do {
997+ if (token -> keyword == KWD_NONE ) /* Not a keyword*/ break ;
998+ if (!kwd_is_declarator (token -> keyword )) break ;
999+ switch (token -> keyword ) {
1000+ case KWD_GLOBAL :
1001+ statement .declaration -> scope = SCO_GLOBAL ;
1002+ break ;
1003+ case KWD_LOCAL :
1004+ statement .declaration -> scope = SCO_LOCAL ;
1005+ break ;
1006+ case KWD_STATIC :
1007+ statement .declaration -> is_static = true;
1008+ break ;
1009+ case KWD_CONST :
1010+ statement .declaration -> is_constant = true;
1011+ break ;
1012+ }
1013+ } while (TOK_WORD == (++ token )-> type );
1014+
1015+ // Name
1016+ if (token -> type != TOK_VARIABLE ) raise_unexpected_token ("a variable" , token );
1017+
1018+ statement .declaration -> name = malloc (token -> data_len + 1 );
1019+ if (!statement .declaration -> name ) raise_mem ("storing variable name" );
1020+ strncpy (statement .declaration -> name , token -> data , token -> data_len );
1021+
1022+ // Initializer
1023+ if (token [1 ].type != TOK_OPERATOR ) goto next ;
1024+ if (token [1 ].op_info .sym != OPR_EQU ) raise_unexpected_token ("simple assignment operator (=)" , token );
1025+ statement .declaration -> initializer = malloc (sizeof * statement .declaration -> initializer );
1026+ if (!statement .declaration -> initializer ) raise_mem ("parsing initializer" );
1027+ * statement .declaration -> initializer = expression_get (token + 2 , 0 );
1028+ }
1029+ } else {
1030+ statement .type = SMT_EXPRESSION ;
1031+ statement .expression = malloc (sizeof * statement .expression );
1032+ if (!statement .expression ) raise_mem ("parsing expression statement" );
1033+ size_t token_count = 0 ;
1034+ while (true) {
1035+ if (token [token_count ].type == TOK_WHITESPACE && token [token_count ].newline || token [token_count ].type == TOK_EOF ) break ;
1036+ ++ token_count ;
1037+ }
1038+ * statement .expression = expression_get (token , token_count );
1039+ next_token = token + token_count + 1 ;
1040+ }
1041+
1042+ // Set the next token
1043+ next : * next = next_token ? next_token : token + 1 ;
1044+ return statement ;
1045+ }
1046+
1047+ struct Unit unit_get (struct Token * token , struct Token * * next ) {
1048+ struct Unit unit ;
1049+ struct Token * next_token = NULL ;
1050+
1051+ switch (token -> type ) {
1052+ case TOK_WHITESPACE :
1053+ break ;
1054+ case TOK_COMMENT :
1055+ case TOK_DIRECTIVE :
1056+ unit .type = token -> type == TOK_COMMENT ? UNT_COMMENT : UNT_DIRECTIVE ;
1057+ unit .token = token ;
1058+ puts ("It's a comment/directive" );
1059+ break ;
1060+ default :
1061+ // Statement
1062+ unit .type = UNT_STATEMENT ;
1063+ unit .statement = malloc (sizeof * unit .statement );
1064+ if (!unit .statement ) raise_mem ("parsing statement" );
1065+ * unit .statement = statement_get (token , & next_token );
1066+ puts ("It's a statement" );
1067+ break ;
1068+ }
1069+
1070+ // Set the next token
1071+ * next = next_token ? next_token : token + 1 ;
1072+
1073+ return unit ;
1074+ }
1075+
1076+ //struct Token *token_peek() {}
1077+
1078+ void expect_token (struct Token * token , struct Token * expected , char * description ) {
1079+ bool match = false;
1080+ if (token -> type != expected -> type ) goto unexpected ;
1081+ switch (token -> type ) {
1082+ case TOK_BRACKET :
1083+ match = * token -> data == * expected -> data ;
1084+ break ;
1085+ }
1086+ if (match ) return ;
1087+ unexpected : raise_unexpected_token (description , token );
1088+ };
1089+
1090+ void * p_malloc (size_t size , char * context ) {
1091+ // 'p' as in parser
1092+ void * mem = malloc (size );
1093+ if (!mem ) raise_mem (context );
1094+ return mem ;
1095+ }
1096+
9071097noreturn void raise_error (char * msg , bool free_msg ) {
9081098 if (parse_error .free_msg && parse_error .msg ) free (parse_error .msg );
9091099
0 commit comments