Adding support for lex files

Adding support for lex. The lex files consist of a mixture of lex specific structures, that are handled in the the lexcode.l and lexscanner.l, abd C code that is handled by the C parsers (hence the rules used are partly copied from scanner.l). Special attention has been paid to memberdef.cpp as the initial values should not be handled by the lex parsers but by the C parsers.
doxygen · Feb 12, 2021 · 77c6339 · 77c6339
1 parent 4d8e0f3
commit 77c6339
Show file tree

Hide file tree

Showing 20 changed files with 2,459 additions and 146 deletions.
diff --git a/Doxyfile b/Doxyfile
@@ -106,10 +106,12 @@ WARN_LOGFILE           = warnings.log
 # Configuration options related to the input files
 #---------------------------------------------------------------------------
 INPUT                  = src \
-                         vhdlparser
+                         vhdlparser \
+                         xml
 INPUT_ENCODING         = UTF-8
 FILE_PATTERNS          = *.h \
                          *.cpp \
+                         *.l \
                          *.md
 RECURSIVE              = NO
 EXCLUDE                =

diff --git a/doc/features.doc b/doc/features.doc
@@ -28,7 +28,7 @@
     output even from undocumented code.
 <li>Generates structured XML output for parsed sources, which can be 
     used by external tools.
-<li>Supports C/C++, Java, (Corba and Microsoft) Java, Python, VHDL, PHP
+<li>Supports C/C++, Lex, Java, (Corba and Microsoft) Java, Python, VHDL, PHP
     IDL, C#, Fortran, Objective-C 2.0, and to some extent D sources.
 <li>Supports documentation of files, namespaces, packages, classes, 
     structs, unions, templates, variables, functions, typedefs, enums and 

diff --git a/doc/starting.doc b/doc/starting.doc
@@ -37,7 +37,7 @@ tries to be complete):
 \section step0 Step 0: Check if doxygen supports your programming language
 
 First, assure that your programming language has a reasonable chance of being
-recognized by doxygen. These languages are supported by default: C, C++, C#,
+recognized by doxygen. These languages are supported by default: C, C++, Lex, C#,
 Objective-C, IDL, Java, VHDL, PHP, Python, Fortran and D. It
 is possible to configure certain file type extensions to use certain parsers:
 see the \ref cfg_extension_mapping "Configuration/ExtensionMappings" for details.
@@ -120,7 +120,7 @@ Extension | Language | Extension | Language     | Extension | Language
 .ixx      |C / C++   | .php5     |PHP           | .vhdl     |VHDL
 .ipp      |C / C++   | .inc      |PHP           | .ucf      |VHDL
 .i++      |C / C++   | .phtml    |PHP           | .qsf      |VHDL
-.inl      |C / C++   | .m        |Objective-C   | &nbsp;    |&nbsp;
+.inl      |C / C++   | .m        |Objective-C   | .l        |Lex
 .h        |C / C++   | .M        |Objective-C   | .md       |Markdown
 .H        |C / C++   | .py       |Python        | .markdown |Markdown
 .hh       |C / C++   | .pyw      |Python        | .ice      |Slice

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -117,6 +117,8 @@ set(LEX_FILES scanner
     constexp
     xmlcode
     sqlcode
+    lexcode
+    lexscanner
     configimpl)
 
 # unfortunately ${LEX_FILES_H} and ${LEX_FILES_CPP} don't work in older versions of CMake (like 3.6.2) for add_library
@@ -184,6 +186,8 @@ add_library(doxymain STATIC
     ${GENERATED_SRC}/doctokenizer.l.h
     ${GENERATED_SRC}/fortrancode.l.h
     ${GENERATED_SRC}/fortranscanner.l.h
+    ${GENERATED_SRC}/lexcode.l.h
+    ${GENERATED_SRC}/lexscanner.l.h
     ${GENERATED_SRC}/pre.l.h
     ${GENERATED_SRC}/pycode.l.h
     ${GENERATED_SRC}/pyscanner.l.h
@@ -199,6 +203,8 @@ add_library(doxymain STATIC
     ${GENERATED_SRC}/doctokenizer.cpp
     ${GENERATED_SRC}/fortrancode.cpp
     ${GENERATED_SRC}/fortranscanner.cpp
+    ${GENERATED_SRC}/lexcode.cpp
+    ${GENERATED_SRC}/lexscanner.cpp
     ${GENERATED_SRC}/pre.cpp
     ${GENERATED_SRC}/pycode.cpp
     ${GENERATED_SRC}/pyscanner.cpp

diff --git a/src/code.h b/src/code.h
@@ -46,6 +46,7 @@ class CCodeParser : public CodeParserInterface
                    bool collectXRefs=TRUE
                   );
     void resetCodeParserState();
+    void setStartCodeLine(const bool inp);
   private:
     struct Private;
     std::unique_ptr<Private> p;

diff --git a/src/code.l b/src/code.l
@@ -98,6 +98,11 @@ struct codeYY_state
   QCString      parmType;
   QCString      parmName;
 
+  bool          beginCodeLine = true; //!< signals whether or not we should with the first line
+                                      //!< write a start line code or not. Essential
+                                      //!< when this code parser is called from another 
+                                      //!< code parser.
+
   const char *  inputString = 0;     //!< the code fragment as text
   yy_size_t     inputPosition = 0;   //!< read offset during parsing
   int           inputLines = 0;      //!< number of line in the code fragment
@@ -3788,6 +3793,12 @@ void CCodeParser::resetCodeParserState()
   yyextra->anchorCount = 0;
 }
 
+void CCodeParser::setStartCodeLine(const bool inp)
+{
+  struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
+  yyextra->beginCodeLine = inp;
+}
+
 void CCodeParser::parseCode(CodeOutputInterface &od,const char *className,const QCString &s,
                 SrcLangExt lang,bool exBlock, const char *exName,FileDef *fd,
                 int startLine,int endLine,bool inlineFragment,
@@ -3862,7 +3873,8 @@ void CCodeParser::parseCode(CodeOutputInterface &od,const char *className,const
   }
   yyextra->includeCodeFragment = inlineFragment;
   //printf("** exBlock=%d exName=%s include=%d\n",exBlock,exName,inlineFragment);
-  startCodeLine(yyscanner);
+
+  if (yyextra->beginCodeLine) startCodeLine(yyscanner);
   yyextra->type.resize(0);
   yyextra->name.resize(0);
   yyextra->args.resize(0);

diff --git a/src/config.xml b/src/config.xml
@@ -647,7 +647,7 @@ Go to the <a href="commands.html">next</a> section or return to the
  With this tag you can assign which parser to use for a given extension.
  Doxygen has a built-in mapping, but you can override or extend it using this tag.
  The format is <code>ext=language</code>, where \c ext is a file extension, and language is one of
- the parsers supported by doxygen: IDL, Java, JavaScript, Csharp (C#), C, C++, D, PHP,
+ the parsers supported by doxygen: IDL, Java, JavaScript, Csharp (C#), C, C++, Lex, D, PHP,
  md (Markdown), Objective-C, Python, Slice, VHDL, Fortran (fixed format Fortran: FortranFixed,
  free formatted Fortran: FortranFree, unknown formatted Fortran: Fortran. In
  the later case the parser tries to guess whether the code is fixed or free
@@ -1434,6 +1434,7 @@ FILE_VERSION_FILTER = "cleartool desc -fmt \%Vn"
       <value name='*.hxx'/>
       <value name='*.hpp'/>
       <value name='*.h++'/>
+      <value name='*.l'/>
       <value name='*.cs'/>
       <value name='*.d'/>
       <value name='*.php'/>

diff --git a/src/context.cpp b/src/context.cpp
@@ -1540,6 +1540,7 @@ class DefinitionContext
         case SrcLangExt_SQL:      result="sql";      break;
         case SrcLangExt_Markdown: result="markdown"; break;
         case SrcLangExt_Slice:    result="slice";    break;
+        case SrcLangExt_Lex:      result="lex";      break;
       }
       return result;
     }

diff --git a/src/defargs.l b/src/defargs.l
@@ -27,12 +27,12 @@
  *  The Argument list as a whole can be pure, constant or volatile.
  *
  *  Examples of input strings are:
- *  \code
+ *  \verbatim
  *    "(int a,int b) const"
  *    "(const char *s="hello world",int=5) = 0"
  *    "<class T,class N>"
  *    "(char c,const char)"
- *  \endcode
+ *  \endverbatim
  *
  *  Note: It is not always possible to distinguish between the name and 
  *        type of an argument. In case of doubt the name is added to the

diff --git a/src/docsets.cpp b/src/docsets.cpp
@@ -328,6 +328,7 @@ void DocSets::addIndexItem(const Definition *context,const MemberDef *md,
     case SrcLangExt_SQL:     lang="sql"; break;        // Sql
     case SrcLangExt_Markdown:lang="markdown"; break;   // Markdown
     case SrcLangExt_Slice:   lang="slice"; break;      // Slice
+    case SrcLangExt_Lex:     lang="lex"; break;        // Lex
     case SrcLangExt_Unknown: lang="unknown"; break;    // should not happen!
   }
 

diff --git a/src/doxygen.cpp b/src/doxygen.cpp
@@ -77,6 +77,8 @@
 #include "fortranscanner.h"
 #include "xmlcode.h"
 #include "sqlcode.h"
+#include "lexcode.h"
+#include "lexscanner.h"
 #include "code.h"
 #include "portable.h"
 #include "vhdljjparser.h"
@@ -10058,6 +10060,8 @@ void initDoxygen()
                                                          make_parser_factory<SQLCodeParser>());
   Doxygen::parserManager->registerParser("md",           make_parser_factory<MarkdownOutlineParser>(),
                                                          make_parser_factory<FileCodeParser>());
+  Doxygen::parserManager->registerParser("lex",          make_parser_factory<LexOutlineParser>(),
+                                                         make_parser_factory<LexCodeParser>());
 
   // register any additional parsers here...
 

diff --git a/src/lexcode.h b/src/lexcode.h
@@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 1997-2021 by Dimitri van Heesch.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation under the terms of the GNU General Public License is hereby
+ * granted. No representations are made about the suitability of this software
+ * for any purpose. It is provided "as is" without express or implied warranty.
+ * See the GNU General Public License for more details.
+ *
+ * Documents produced by Doxygen are derivative works derived from the
+ * input used in their production; they are not affected by this license.
+ *
+ */
+
+
+#ifndef LEXCODE_H
+#define LEXCODE_H
+
+#include "parserintf.h"
+
+class CodeOutputInterface;
+class FileDef;
+class MemberDef;
+class QCString;
+class Definition;
+
+/** LEX code scanner.
+ */
+class LexCodeParser : public CodeParserInterface
+{
+  public:
+    LexCodeParser();
+    virtual ~LexCodeParser();
+    void parseCode(CodeOutputInterface &codeOutIntf,
+                   const char *scopeName,
+                   const QCString &input,
+                   SrcLangExt,
+                   bool isExampleBlock,
+                   const char *exampleName=0,
+                   FileDef *fileDef=0,
+                   int startLine=-1,
+                   int endLine=-1,
+                   bool inlineFragment=FALSE,
+                   const MemberDef *memberDef=0,
+                   bool showLineNumbers=TRUE,
+                   const Definition *searchCtx=0,
+                   bool collectXRefs=TRUE
+                  );
+    void resetCodeParserState();
+  private:
+    struct Private;
+    std::unique_ptr<Private> p;
+};
+
+
+#endif