Adding support for lex files

- Correct handling of C comment start and end tokens as well as Cpp comment start in rules. These tokes can give "Reached end of file while still inside a (nested) comment..." - Correct other warnings in respect to lex files
doxygen · Feb 18, 2021 · 789625c · 789625c
1 parent 0f0b282
commit 789625c
Show file tree

Hide file tree

Showing 15 changed files with 398 additions and 271 deletions.
diff --git a/libxml/xml.h b/libxml/xml.h
@@ -76,7 +76,7 @@ class XMLParser : public XMLLocator
     /*! Parses a file gives the contents of the file as a string.
      *  @param fileName the name of the file, used for error reporting.
      *  @param inputString the contents of the file as a zero terminated UTF-8 string.
-     *  @param debugEnable indicates if debugging via -d lex is enabled or not.
+     *  @param debugEnabled indicates if debugging via -d lex is enabled or not.
      */
     void parse(const char *fileName,const char *inputString,bool debugEnabled);
 

diff --git a/src/code.l b/src/code.l
diff --git a/src/commentcnv.l b/src/commentcnv.l
@@ -171,6 +171,23 @@ FLOAT_NUMBER {FLOAT_DECIMAL}|{FLOAT_HEXADECIMAL}
 NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
   //- end: NUMBER ---------------------------------------------------------------------------
 
+  // C start comment 
+CCS   "/\*"
+  // C end comment
+CCE   "*\/"
+  // Cpp comment 
+CPPC  "/\/"
+
+  // Optional any character
+ANYopt .*
+
+  // Optional white space
+WSopt [ \t\r]*
+  // readline non special
+RLopt [^\\@\n\*\/]*
+  // Optional slash
+SLASHopt [/]*
+
 %%
 
 <Scan>{NUMBER}			    { //Note similar code in code.l
@@ -277,8 +294,8 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 <Scan>\n                           { /* new line */ 
                                      copyToOutput(yyscanner,yytext,(int)yyleng); 
                                    }
-<Scan>"//!"/.*\n[ \t]*"//"[\/!][^\/] | /* start C++ style special comment block */
-<Scan>("///"[/]*)/[^/].*\n[ \t]*"//"[\/!][^\/] { /* start C++ style special comment block */
+<Scan>{CPPC}"!"/.*\n[ \t]*{CPPC}[\/!][^\/] | /* start C++ style special comment block */
+<Scan>({CPPC}"/"[/]*)/[^/].*\n[ \t]*{CPPC}[\/!][^\/] { /* start C++ style special comment block */
   				     if (yyextra->mlBrief) 
 				     {
 				       REJECT; // bail out if we do not need to convert
@@ -299,7 +316,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				       BEGIN(ReadLine);
 				     }
                                    }
-<Scan>"//##Documentation".*/\n	   { /* Start of Rational Rose ANSI C++ comment block */
+<Scan>{CPPC}"##Documentation"{ANYopt}/\n	   { /* Start of Rational Rose ANSI C++ comment block */
                                      if (yyextra->mlBrief) REJECT;
                                      int i=17; //=strlen("//##Documentation");
 				     yyextra->blockHeadCol=yyextra->col;
@@ -308,22 +325,22 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				     yyextra->inRoseComment=TRUE;
 				     BEGIN(SComment);
   				   }
-<Scan>"//"[!\/]/.*\n[ \t]*"//"[|\/][ \t]*[@\\]"}" { // next line contains an end marker, see bug 752712
+<Scan>{CPPC}[!\/]/.*\n[ \t]*{CPPC}[|\/][ \t]*[@\\]"}" { // next line contains an end marker, see bug 752712
 				     yyextra->inSpecialComment=yytext[2]=='/' || yytext[2]=='!';
   				     copyToOutput(yyscanner,yytext,(int)yyleng); 
 				     yyextra->readLineCtx=YY_START;
 				     BEGIN(ReadLine);
                                    }
-<Scan>"//"/.*\n	                   { /* one line C++ comment */ 
+<Scan>{CPPC}/.*\n	                   { /* one line C++ comment */ 
 				     yyextra->inSpecialComment=yytext[2]=='/' || yytext[2]=='!';
   				     copyToOutput(yyscanner,yytext,(int)yyleng); 
 				     yyextra->readLineCtx=YY_START;
 				     BEGIN(ReadLine);
 				   }
-<Scan>"/**/"                       { /* avoid matching next rule for empty C comment, see bug 711723 */
+<Scan>{CCS}{CCE}                       { /* avoid matching next rule for empty C comment, see bug 711723 */
                                      copyToOutput(yyscanner,yytext,(int)yyleng);
                                    }
-<Scan>"/*"[*!]?			   { /* start of a C comment */
+<Scan>{CCS}[*!]?			   { /* start of a C comment */
                                      if (yyextra->lang==SrcLangExt_Python)
 				     {
 				       REJECT;
@@ -444,8 +461,14 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				     yyextra->lastCommentContext = YY_START;
                                      BEGIN(Verbatim);
                                    }
+<Scan>"\\\""                       { /* escaped double quote */
+                                     copyToOutput(yyscanner,yytext,(int)yyleng);
+                                   }
+<Scan>"\\\\"                       { /* escaped backslash */
+                                     copyToOutput(yyscanner,yytext,(int)yyleng);
+                                   }
 <Scan>.                            { /* any other character */
-                                     copyToOutput(yyscanner,yytext,(int)yyleng); 
+                                     copyToOutput(yyscanner,yytext,(int)yyleng);
                                    }
 <Verbatim>[\\@]("endverbatim"|"endlatexonly"|"endhtmlonly"|"endxmlonly"|"enddocbookonly"|"endrtfonly"|"endmanonly"|"f$"|"f]"|"f}") { /* end of verbatim block */
                                      copyToOutput(yyscanner,yytext,(int)yyleng);
@@ -502,7 +525,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				       BEGIN(yyextra->lastCommentContext);
 				     }
                                    }
-<VerbatimCode>^[ \t]*"//"[\!\/]?   { /* skip leading comments */
+<VerbatimCode>^[ \t]*{CPPC}[\!\/]?   { /* skip leading comments */
   				     if (!yyextra->inSpecialComment)
 				     {
                                        copyToOutput(yyscanner,yytext,(int)yyleng); 
@@ -531,7 +554,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 <Verbatim,VerbatimCode>\n	   { /* new line in verbatim block */
                                      copyToOutput(yyscanner,yytext,(int)yyleng); 
                                    }
-<Verbatim>^[ \t]*"//"[/!]          {
+<Verbatim>^[ \t]*{CPPC}[/!]          {
   				     if (yyextra->blockName=="dot" || yyextra->blockName=="msc" || yyextra->blockName=="uml" || yyextra->blockName.at(0)=='f')
 				     {
 				       // see bug 487871, strip /// from dot images and formulas.
@@ -597,7 +620,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 <CComment>[^ `~<\\!@*\n{\"\/]*     { /* anything that is not a '*' or command */ 
                                      copyToOutput(yyscanner,yytext,(int)yyleng); 
                                    }
-<CComment>"*"+[^*/\\@\n{\"]*       { /* stars without slashes */
+<CComment>"*"+[^*\/\\@\n{\"]*      { /* stars without slashes */
                                      copyToOutput(yyscanner,yytext,(int)yyleng); 
                                    }
 <CComment>"\"\"\""                 { /* end of Python docstring */
@@ -729,45 +752,45 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 <CComment>.			   {
                                      copyToOutput(yyscanner,yytext,(int)yyleng); 
   				   }
-<SComment>^[ \t]*"///"[\/]*/\n     {
+<SComment>^[ \t]*{CPPC}"/"{SLASHopt}/\n     {
   				     replaceComment(yyscanner,0);
   				   }
-<SComment>\n[ \t]*"///"[\/]*/\n    {
+<SComment>\n[ \t]*{CPPC}"/"{SLASHopt}/\n    {
                                      replaceComment(yyscanner,1); 
                                    }
-<SComment>^[ \t]*"///"[^\/\n]/.*\n { 
+<SComment>^[ \t]*{CPPC}"/"[^\/\n]/.*\n { 
   				     replaceComment(yyscanner,0);
 				     yyextra->readLineCtx=YY_START;
 				     BEGIN(ReadLine);
   				   }
-<SComment>\n[ \t]*"//"[\/!]("<")?[ \t]*[\\@]"}".*\n {   
+<SComment>\n[ \t]*{CPPC}[\/!]("<")?[ \t]*[\\@]"}".*\n {   
                                      /* See Bug 752712: end the multiline comment when finding a @} or \} command */
                                      copyToOutput(yyscanner," */",3); 
 				     copyToOutput(yyscanner,yytext,(int)yyleng); 
 				     yyextra->inSpecialComment=FALSE;
 				     yyextra->inRoseComment=FALSE;
 				     BEGIN(Scan); 
                                    }
-<SComment>\n[ \t]*"///"[^\/\n]/.*\n  { 
+<SComment>\n[ \t]*{CPPC}"/"[^\/\n]/.*\n  { 
                                      replaceComment(yyscanner,1); 
 				     yyextra->readLineCtx=YY_START;
 				     BEGIN(ReadLine);
   				   }
-<SComment>^[ \t]*"//!"             |    // just //!
-<SComment>^[ \t]*"//!<"/.*\n       |    // or   //!< something
-<SComment>^[ \t]*"//!"[^<]/.*\n    {    // or   //!something
+<SComment>^[ \t]*{CPPC}"!"             |    // just //!
+<SComment>^[ \t]*{CPPC}"!<"/.*\n       |    // or   //!< something
+<SComment>^[ \t]*{CPPC}"!"[^<]/.*\n    {    // or   //!something
   				     replaceComment(yyscanner,0);
 				     yyextra->readLineCtx=YY_START;
 				     BEGIN(ReadLine);
                                    }
-<SComment>\n[ \t]*"//!"            |
-<SComment>\n[ \t]*"//!<"/.*\n      |
-<SComment>\n[ \t]*"//!"[^<\n]/.*\n { 
+<SComment>\n[ \t]*{CPPC}"!"            |
+<SComment>\n[ \t]*{CPPC}"!<"/.*\n      |
+<SComment>\n[ \t]*{CPPC}"!"[^<\n]/.*\n { 
                                      replaceComment(yyscanner,1); 
 				     yyextra->readLineCtx=YY_START;
 				     BEGIN(ReadLine);
                                    }
-<SComment>^[ \t]*"//##"/.*\n       {
+<SComment>^[ \t]*{CPPC}"##"/.*\n       {
                                      if (!yyextra->inRoseComment)
 				     {
 				       REJECT;
@@ -779,7 +802,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				       BEGIN(ReadLine);
 				     }
                                    }
-<SComment>\n[ \t]*"//##"/.*\n      {
+<SComment>\n[ \t]*{CPPC}"##"/.*\n      {
                                      if (!yyextra->inRoseComment)
 				     {
 				       REJECT;
@@ -798,19 +821,19 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				     yyextra->inRoseComment=FALSE;
 				     BEGIN(Scan); 
                                    }
-<ReadLine>"/**"                    {
+<ReadLine>{CCS}"*"                    {
 				     copyToOutput(yyscanner,"/&zwj;**",8);
 				   }
-<ReadLine>"*/"                     {
+<ReadLine>{CCE}                     {
 				     copyToOutput(yyscanner,"*&zwj;/",7);
 				   }
 <ReadLine>"*"                      {
 				     copyToOutput(yyscanner,yytext,(int)yyleng);
 				   }
-<ReadLine>[^\\@\n\*/]*             {
+<ReadLine>{RLopt}                  {
 				     copyToOutput(yyscanner,yytext,(int)yyleng);
 				   }
-<ReadLine>[^\\@\n\*/]*/\n          {
+<ReadLine>{RLopt}/\n               {
 				     copyToOutput(yyscanner,yytext,(int)yyleng);
 				     BEGIN(yyextra->readLineCtx);
 				   }
@@ -844,7 +867,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 <CondLine>[!()&| \ta-z_A-Z0-9.\-]+ {
                                      handleCondSectionId(yyscanner,yytext);
   				   }
-<CComment,ReadLine>[\\@]"cond"[ \t\r]*/\n {
+<CComment,ReadLine>[\\@]"cond"{WSopt}/\n {
   				     yyextra->condCtx=YY_START;
                                      handleCondSectionId(yyscanner," "); // fake section id causing the section to be hidden unconditionally
                                    }
@@ -862,9 +885,9 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
 				     yyextra->lastEscaped=0;
 				     BEGIN( ReadAliasArgs );
   				   }
-<ReadAliasArgs>^[ \t]*"//"[/!]/[^\n]+   { // skip leading special comments (see bug 618079)
+<ReadAliasArgs>^[ \t]*{CPPC}[/!]/[^\n]+   { // skip leading special comments (see bug 618079)
   				   }
-<ReadAliasArgs>"*/"		   { // oops, end of comment in the middle of an alias?
+<ReadAliasArgs>{CCE}		   { // oops, end of comment in the middle of an alias?
                                      if (yyextra->lang==SrcLangExt_Python)
 				     {
 				       REJECT;

diff --git a/src/commentscan.l b/src/commentscan.l
@@ -469,7 +469,8 @@ DETAILEDHTMLOPT {CODE}
 BN        [ \t\n\r]
 BL        [ \t\r]*"\n"
 B         [ \t]
-BS        ^(({B}*"//")?)(({B}*"*"+)?){B}*
+Bopt      {B}*
+BS        ^(({B}*"/""/")?)(({B}*"*"+)?){B}*
 ATTR      ({B}+[^>\n]*)?
 DOCNL     "\n"|"\\ilinebr"
 LC        "\\"{B}*"\n"
@@ -488,6 +489,18 @@ TMPLSPEC  "<"{BN}*[^>]+{BN}*">"
 MAILADDR   [a-z_A-Z0-9.+\-]+"@"[a-z_A-Z0-9\-]+("."[a-z_A-Z0-9\-]+)+[a-z_A-Z0-9\-]+
 RCSTAG    "$"{ID}":"[^\n$]+"$"
 
+  // C start comment 
+CCS   "/\*"
+  // C end comment
+CCE   "*\/"
+  // Cpp comment 
+CPPC  "/\/"
+
+  // end of section title with asterisk
+STAopt [^\n@\\*]*
+  // end of section title without asterisk
+STopt  [^\n@\\]*
+
 %option noyywrap
 
   /* comment parsing states. */
@@ -541,7 +554,7 @@ RCSTAG    "$"{ID}":"[^\n$]+"$"
   /* What can happen in while parsing a comment block:
    *   commands (e.g. @page, or \page)
    *   escaped commands (e.g. @@page or \\page).
-   *   formulas (e.g. \f$ \f[ \f{..)
+   *   formulas (e.g. \f$...\f$ \f[...\f] \f{...\f})
    *   directories (e.g. \doxygen\src\)
    *   autolist end. (e.g. a dot on an otherwise empty line)
    *   newlines.
@@ -770,7 +783,7 @@ RCSTAG    "$"{ID}":"[^\n$]+"$"
 <Comment>[a-z_A-Z]+                     { // normal word
                                           addOutput(yyscanner,yytext);
                                         }
-<Comment>^{B}*"."{B}*/\n                { // explicit end autolist: e.g "  ."
+<Comment>^{B}*"."{Bopt}/\n                { // explicit end autolist: e.g "  ."
                                             addOutput(yyscanner,yytext);
                                         }
 <Comment>^{B}*[1-9][0-9]*"."{B}+        |
@@ -797,7 +810,7 @@ RCSTAG    "$"{ID}":"[^\n$]+"$"
                                           }
                                           addOutput(yyscanner,yytext);
                                         }
-<Comment>^{B}*([\-:|]{B}*)*("--"|"---")({B}*[\-:|])*{B}*/\n { // horizontal line (dashed)
+<Comment>^{B}*([\-:|]{B}*)*("--"|"---")({B}*[\-:|])*{Bopt}/\n { // horizontal line (dashed)
                                             addOutput(yyscanner,yytext);
                                         }
 <Comment>{CMD}"---"                     { // escaped mdash
@@ -1344,12 +1357,12 @@ RCSTAG    "$"{ID}":"[^\n$]+"$"
                                               );
                                           BEGIN(Comment);
                                         }
-<SectionTitle>[^\n@\\*]*/"\n"           { // end of section title
+<SectionTitle>{STAopt}/"\n"             { // end of section title
                                           addSection(yyscanner);
                                           addOutput(yyscanner,yytext);
                                           BEGIN( Comment );
                                         }
-<SectionTitle>[^\n@\\]*/"\\ilinebr"     { // end of section title
+<SectionTitle>{STopt}/"\\ilinebr"       { // end of section title
                                           addSection(yyscanner);
                                           addOutput(yyscanner,yytext);
                                           BEGIN( Comment );
@@ -1451,11 +1464,11 @@ RCSTAG    "$"{ID}":"[^\n$]+"$"
                                           if (*yytext=='\n') yyextra->lineNr++;
                                           addOutput(yyscanner,'\n');
                                         }
-<FormatBlock>"/*"                       { // start of a C-comment
+<FormatBlock>{CCS}                       { // start of a C-comment
                                           if (!(yyextra->blockName=="code" || yyextra->blockName=="verbatim")) yyextra->commentCount++;
                                           addOutput(yyscanner,yytext);
                                         }
-<FormatBlock>"*/"                       { // end of a C-comment
+<FormatBlock>{CCE}                       { // end of a C-comment
                                           addOutput(yyscanner,yytext);
                                           if (!(yyextra->blockName=="code" || yyextra->blockName=="verbatim"))
                                           {

diff --git a/src/declinfo.l b/src/declinfo.l
@@ -83,6 +83,7 @@ static yy_size_t yyread(char *buf,yy_size_t max_size, yyscan_t yyscanner);
 %}
 
 B       [ \t]
+Bopt    {B}*
 ID	"$"?([a-z_A-Z\x80-\xFF][a-z_A-Z0-9\x80-\xFF]*)|(@[0-9]+)
 
 %x      Start
@@ -188,11 +189,11 @@ ID	"$"?([a-z_A-Z\x80-\xFF][a-z_A-Z0-9\x80-\xFF]*)|(@[0-9]+)
 <Template>.			{
   				  yyextra->name+=*yytext;
   				}
-<Operator>{B}*"("{B}*")"{B}*"<>"{B}*/"("	{
+<Operator>{B}*"("{B}*")"{B}*"<>"{Bopt}/"("	{
   				  yyextra->name+="() <>";
 				  BEGIN(ReadArgs);
   				}
-<Operator>{B}*"("{B}*")"{B}*/"("	{
+<Operator>{B}*"("{B}*")"{Bopt}/"("	{
   				  yyextra->name+="()";
 				  BEGIN(ReadArgs);
   				}