- Simplified parsing of identifiers

- _ is now a valid identifier when lexing as Modelica 3 - It is Absyn.WILD when lexing as MetaModelica git-svn-id: https://openmodelica.org/svn/OpenModelica/trunk@6030 f25d12d1-65f4-0310-ae8a-bbce733d8d8e
OpenModelica · Sep 6, 2010 · 4bb4943 · 4bb4943
1 parent 2b628e7
commit 4bb4943
Show file tree

Hide file tree

Showing 7 changed files with 124 additions and 62 deletions.
diff --git a/Parser/BaseModelica_Lexer.g b/Parser/BaseModelica_Lexer.g
@@ -139,6 +139,8 @@ POWER_EW;
 COLONCOLON;
 MOD;
 
+IDENT;
+
 }
 
 T_ALGORITHM : 'algorithm';
@@ -242,11 +244,21 @@ CODE : 'Code' | '$Code';
 CODE_EXP : '$Exp';
 CODE_VAR : '$Var';
 
-IDENT :
-       ('_' {  $type = WILD; } | NONDIGIT { $type = IDENT; })
-       (('_' | NONDIGIT | DIGIT) { $type = IDENT; })*
-    | (QIDENT { $type = IDENT; })
-    ;
+
+STRING : '"' STRING_GUTS '"'
+       {SETTEXT($STRING_GUTS.text);};
+
+fragment
+STRING_GUTS: (SCHAR | SESCAPE)*
+       ;
+
+fragment
+SCHAR :  NL | '\t' | ~('\n' | '\t' | '\r' | '\\' | '"');
+
+fragment
+SESCAPE : '\\' ('\\' | '"' | '\'' | '?' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v');
+
+IDENT : NONDIGIT (NONDIGIT | DIGIT)* | QIDENT;
 
 fragment
 QIDENT :
@@ -256,7 +268,7 @@ fragment
 QCHAR :  NL  | '\t' | ~('\n' | '\t' | '\r' | '\\' | '\'');
 
 fragment
-NONDIGIT :   ('a'..'z' | 'A'..'Z');
+NONDIGIT :   ('_' | 'a'..'z' | 'A'..'Z');
 
 fragment
 DIGIT :
@@ -282,16 +294,3 @@ UNSIGNED_INTEGER :
           )?
       )
   ;
-
-STRING : '"' STRING_GUTS '"'
-       {SETTEXT($STRING_GUTS.text);};
-
-fragment
-STRING_GUTS: (SCHAR | SESCAPE)*
-       ;
-
-fragment
-SCHAR :  NL | '\t' | ~('\n' | '\t' | '\r' | '\\' | '"');
-
-fragment
-SESCAPE : '\\' ('\\' | '"' | '\'' | '?' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v');
diff --git a/Parser/FlatModelica_Lexer.g b/Parser/FlatModelica_Lexer.g
@@ -0,0 +1,60 @@
+/*
+ * This file is part of OpenModelica.
+ *
+ * Copyright (c) 1998-CurrentYear, Linkoping University,
+ * Department of Computer and Information Science,
+ * SE-58183 Linkoping, Sweden.
+ *
+ * All rights reserved.
+ *
+ * THIS PROGRAM IS PROVIDED UNDER THE TERMS OF GPL VERSION 3 
+ * AND THIS OSMC PUBLIC LICENSE (OSMC-PL). 
+ * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS PROGRAM CONSTITUTES RECIPIENT'S 
+ * ACCEPTANCE OF THE OSMC PUBLIC LICENSE.
+ *
+ * The OpenModelica software and the Open Source Modelica
+ * Consortium (OSMC) Public License (OSMC-PL) are obtained
+ * from Linkoping University, either from the above address,
+ * from the URLs: http://www.ida.liu.se/projects/OpenModelica or 
+ * http://www.openmodelica.org, and in the OpenModelica distribution. 
+ * GNU version 3 is obtained from: http://www.gnu.org/copyleft/gpl.html.
+ *
+ * This program is distributed WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE, EXCEPT AS EXPRESSLY SET FORTH
+ * IN THE BY RECIPIENT SELECTED SUBSIDIARY LICENSE CONDITIONS
+ * OF OSMC-PL.
+ *
+ * See the full OSMC Public License conditions for more details.
+ *
+ */
+lexer grammar FlatModelica_Lexer;
+
+options {
+  language = C;
+}
+
+import BaseModelica_Lexer;
+
+STAR       : '*';
+MINUS      : '-';
+PLUS       : '+';
+LESS       : '<';
+LESSEQ     : '<=';
+LESSGT     : '<>';
+GREATER    : '>';
+GREATEREQ  : '>=';
+EQEQ       : '==';
+POWER      : '^';
+SLASH      : '/';
+
+/* Modelica 3.0 elementwise operators */ 
+PLUS_EW : '.+'; /* Modelica 3.0 */
+MINUS_EW : '.-'; /* Modelica 3.0 */ 
+STAR_EW : '.*'; /* Modelica 3.0 */
+SLASH_EW : './'; /* Modelica 3.0 */ 
+POWER_EW : '.^'; /* Modelica 3.0 */
+
+/* Modelica 3.1 */
+STREAM : 'stream'; /* for Modelica 3.1 stream connectors */
+
diff --git a/Parser/Makefile b/Parser/Makefile
@@ -22,6 +22,7 @@ ModelicaParser.o: ModelicaParserCommon.h
 OBJS = \
 MetaModelica_Lexer_BaseModelica_Lexer.o MetaModelica_Lexer.o \
 Modelica_3_Lexer_BaseModelica_Lexer.o Modelica_3_Lexer.o \
+FlatModelica_Lexer_BaseModelica_Lexer.o FlatModelica_Lexer.o \
 ModelicaParser.o
 # Modelica_2_BaseModelica_Lexer.o Modelica_2_Lexer.o \
 # ModelicaParser.o BaseModelica_Lexer.o
@@ -45,6 +46,9 @@ Modelica_3_Lexer_BaseModelica_Lexer.c Modelica_3_Lexer_BaseModelica_Lexer.h Mode
 MetaModelica_Lexer_BaseModelica_Lexer.c MetaModelica_Lexer_BaseModelica_Lexer.h MetaModelica_Lexer.c MetaModelica_Lexer.h: MetaModelica_Lexer.g BaseModelica_Lexer.g
 	$(ANTLRCMD) $<
 
+FlatModelica_Lexer_BaseModelica_Lexer.c FlatModelica_Lexer_BaseModelica_Lexer.h FlatModelica_Lexer.c FlatModelica_Lexer.h: FlatModelica_Lexer.g BaseModelica_Lexer.g
+	$(ANTLRCMD) $<
+
 ModelicaParser: libantlr3.a libomparse.a main.o
 	gcc -o $@ main.o libomparse.a $(LDFLAGS) libantlr3.a
 omc.exe: libantlr3.a libomparse.a

diff --git a/Parser/MetaModelica_Lexer.g b/Parser/MetaModelica_Lexer.g
@@ -70,18 +70,6 @@ EQEQ    : '==' {METAMODELICA_REAL_STRING_OP()};
 POWER    : '^' {METAMODELICA_REAL_OP()};
 SLASH    : '/' {METAMODELICA_REAL_OP()};
 
-/*STAR    : '*' ('. ')?;
-MINUS    : '-' ('. ')?;
-PLUS    : '+' ('. '|'&')?;
-LESS    : '<' ('. ')?;
-LESSEQ    : '<=' ('. ')?;
-LESSGT    : '<>' ('. ')?;
-GREATER    : '>' ('. ')?;
-GREATEREQ  : '>=' ('. ')?;
-EQEQ    : '==' ('. '|'&')?;
-POWER    : '^' ('. ')?;
-SLASH    : '/' ('. ')?;*/
-
 /* Modelica 3.0 elementwise operators */ 
 PLUS_EW : '.+'; /* Modelica 3.0 */
 MINUS_EW : '.-'; /* Modelica 3.0 */ 

diff --git a/Parser/Modelica.g b/Parser/Modelica.g
@@ -958,8 +958,9 @@ component_reference returns [void* ast] :
     {
       if (cr)
         ast = Absyn__CREF_5fQUAL(token_to_scon(id), or_nil(arr), cr);
-      else
+      else {
         ast = Absyn__CREF_5fIDENT(token_to_scon(id), or_nil(arr));
+      }
     }
   | WILD {ast = Absyn__WILD;}
   ;

diff --git a/Parser/Modelica.tokens b/Parser/Modelica.tokens
@@ -1,10 +1,10 @@
 FUNCTION=34
 PACKAGE=49
 EXTERNAL=29
-EXPONENT=107
+EXPONENT=110
 STAR=112
 WHILE=63
-QIDENT=103
+QIDENT=108
 MOD=96
 CONNECTOR=13
 CASE=69
@@ -32,7 +32,7 @@ EXPANDABLE=26
 ENCAPSULATED=25
 T_TRUE=59
 MATCHCONTINUE=74
-NL=97
+NL=98
 EACH=18
 STREAM=67
 EQEQ=120
@@ -41,7 +41,7 @@ RBRACK=83
 T_INPUT=40
 RECORD=54
 RBRACE=85
-LINE_COMMENT=99
+LINE_COMMENT=100
 INITIAL=38
 ELSE=19
 POWER=121
@@ -55,11 +55,11 @@ T_IN=37
 T_OUTPUT=48
 PLUS_EW=91
 COLONCOLON=78
-WS=98
-QCHAR=105
+WS=99
+QCHAR=109
 FLOW=32
 T_FALSE=30
-NONDIGIT=101
+NONDIGIT=106
 WITHIN=64
 CONSTANT=14
 POWER_EW=95
@@ -69,29 +69,29 @@ CLASS=11
 LBRACK=82
 INNER=39
 DISCRETE=15
-STRING_GUTS=109
+STRING_GUTS=102
 LBRACE=84
 DER=16
 FOR=33
 CODE_EXP=9
 UNSIGNED_REAL=61
 T_ANNOTATION=6
 IF=35
-ML_COMMENT=100
+ML_COMMENT=101
 AS=68
-UNSIGNED_INTEGER=108
+UNSIGNED_INTEGER=111
 SLASH=122
-SCHAR=111
+SCHAR=104
 THEN=58
 LESSEQ=116
 COMMA=88
 FAILURE=71
 SUBTYPEOF=77
-SESCAPE=106
-IDENT=104
+SESCAPE=105
+IDENT=97
 PLUS=114
 MODEL=42
-DIGIT=102
+DIGIT=107
 MINUS_EW=92
 DOT=79
 CONSTRAINEDBY=28
@@ -117,4 +117,4 @@ RPAR=81
 T_NOT=43
 EXTENDS=27
 PUBLIC=53
-STRING=110
+STRING=103
diff --git a/Parser/parse.c b/Parser/parse.c
@@ -171,23 +171,23 @@ static void handleLexerError(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 *
 void handleParseError(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
 {
   pANTLR3_PARSER      parser;
-  pANTLR3_TREE_PARSER  tparser;
   pANTLR3_INT_STREAM  is;
   pANTLR3_STRING      ttext;
   pANTLR3_EXCEPTION      ex;
-  pANTLR3_COMMON_TOKEN   theToken;
+  pANTLR3_COMMON_TOKEN   preToken,nextToken;
   pANTLR3_BASE_TREE      theBaseTree;
   pANTLR3_COMMON_TREE    theCommonTree;
+  pANTLR3_TOKEN_STREAM tokenStream;
   ANTLR3_UINT32 ttype;
   int type;
   const char *error_type = "TRANSLATION";
   const char *token_text[2] = {0,0};
-  int offset, error_id = 0, line;
+  int p_offset, n_offset, error_id = 0, p_line, n_line;
   recognizer->state->error = ANTLR3_TRUE;
+  recognizer->state->failed = ANTLR3_TRUE;
 
   if (lexerFailed)
     return;
-  recognizer->state->failed = ANTLR3_TRUE;
 
   // Retrieve some info for easy reading.
   ex      =    recognizer->state->exception;
@@ -196,9 +196,17 @@ void handleParseError(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenN
   switch  (recognizer->type)
   {
   case  ANTLR3_TYPE_PARSER:
-    offset = ex->charPositionInLine+1;
+    parser = (pANTLR3_PARSER) (recognizer->super);
     token_text[1] = (const char*) ex->message;
     type = ex->type;
+    tokenStream = parser->getTokenStream(parser);
+    preToken = tokenStream->_LT(tokenStream,1);
+    nextToken = tokenStream->_LT(tokenStream,2);
+    if (preToken == NULL) preToken = nextToken;
+    p_line = preToken->line;
+    n_line = nextToken->line;
+    p_offset = preToken->charPosition+1;
+    n_offset = nextToken->charPosition+1;
     break;
 
   default:
@@ -211,26 +219,26 @@ void handleParseError(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenN
   switch (type) {
   case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
     token_text[0] = ex->expecting == ANTLR3_TOKEN_EOF ? "<EOF>" : (const char*) tokenNames[ex->expecting];
-    c_add_source_message(2, "SYNTAX", "Error", "Unwanted token '%s'.", token_text, 1, ex->line, offset, ex->line, offset, false, ModelicaParser_filename_C);
+    c_add_source_message(2, "SYNTAX", "Error", "Unwanted token: %s", token_text, 1, p_line, p_offset, n_line, n_offset, false, ModelicaParser_filename_C);
     break;
   case ANTLR3_MISSING_TOKEN_EXCEPTION:
     token_text[0] = ex->expecting == ANTLR3_TOKEN_EOF ? "<EOF>" : (const char*) tokenNames[ex->expecting];
-    c_add_source_message(2, "SYNTAX", "Error", "Missing token '%s'.", token_text, 1, ex->line, offset, ex->line, offset, false, ModelicaParser_filename_C);
+    c_add_source_message(2, "SYNTAX", "Error", "Missing token: %s", token_text, 1, p_line, p_offset, n_line, n_offset, false, ModelicaParser_filename_C);
     break;
   case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
-    ttype = ((pANTLR3_COMMON_TOKEN)ex->token)->type;
-    token_text[0] = ttype == ANTLR3_TOKEN_EOF ? "<EOF>" : (const char*) tokenNames[ttype];
-    c_add_source_message(2, "SYNTAX", "Error", "No viable alternative near token %s. ", token_text, 1, ex->line, offset, ex->line, offset, false, ModelicaParser_filename_C);
+    token_text[0] = nextToken->getText(nextToken)->chars;
+    c_add_source_message(2, "SYNTAX", "Error", "No viable alternative near token: %s", token_text, 1, p_line, p_offset, n_line, n_offset, false, ModelicaParser_filename_C);
     break;
   case ModelicaParserException:
-    c_add_source_message(2, "SYNTAX", "Error", "%s.", token_text+1, 1, ex->line, offset, ex->line, offset, false, ModelicaParser_filename_C);
+    c_add_source_message(2, "SYNTAX", "Error", "%s.", token_text+1, 1, p_line, p_offset, n_line, n_offset, false, ModelicaParser_filename_C);
     break;
   case ANTLR3_MISMATCHED_SET_EXCEPTION:
   case ANTLR3_EARLY_EXIT_EXCEPTION:
   case ANTLR3_RECOGNITION_EXCEPTION:
   default:
-    token_text[0] = ex->message;
-    c_add_source_message(2, "SYNTAX", "Error", "Parser error: %s", token_text, 1, ex->line, offset, ex->line, offset, false, ModelicaParser_filename_C);
+    token_text[1] = ex->message;
+    token_text[0] = preToken->getText(preToken)->chars;
+    c_add_source_message(2, "SYNTAX", "Error", "Parser error: %s near: %s", token_text, 2, p_line, p_offset, n_line, n_offset, false, ModelicaParser_filename_C);
     break;
   }
 
@@ -312,16 +320,18 @@ void* parseFile(void* fileNameRML, int flags)
 
   pANTLR3_UINT8               fName;
   pANTLR3_INPUT_STREAM        input;
-
   ModelicaParser_filename_C = RML_STRINGDATA(fileNameRML);
+
+  int len = strlen(ModelicaParser_filename_C);
+  if (len > 3 && 0==strcmp(ModelicaParser_filename_C+len-4,".mof"))
+    fprintf(stderr, "Flat Modelica\n");
   /* For some reason we get undefined values if we use the old pointer; but only in rare cases */
   ModelicaParser_filename_RML = mk_scon((char*)ModelicaParser_filename_C);
   ModelicaParser_flags = flags;
 
   fName  = (pANTLR3_UINT8)ModelicaParser_filename_C;
   input  = antlr3AsciiFileStreamNew(fName);
   if ( input == NULL ) {
-    fprintf(stderr, "Unable to open file %s\n", ModelicaParser_filename_C);
     return NULL;
   }
   return parseStream(input);