Skip to content

Commit

Permalink
Adding support for lex files
Browse files Browse the repository at this point in the history
- Correct handling of C comment  start and end tokens  as well as Cpp comment start in rules. These tokes can give "Reached end of file while still inside a (nested) comment..."
- Correct other warnings in respect to lex files
  • Loading branch information
albert-github committed Feb 18, 2021
1 parent 0f0b282 commit 789625c
Show file tree
Hide file tree
Showing 15 changed files with 398 additions and 271 deletions.
2 changes: 1 addition & 1 deletion libxml/xml.h
Expand Up @@ -76,7 +76,7 @@ class XMLParser : public XMLLocator
/*! Parses a file gives the contents of the file as a string.
* @param fileName the name of the file, used for error reporting.
* @param inputString the contents of the file as a zero terminated UTF-8 string.
* @param debugEnable indicates if debugging via -d lex is enabled or not.
* @param debugEnabled indicates if debugging via -d lex is enabled or not.
*/
void parse(const char *fileName,const char *inputString,bool debugEnabled);

Expand Down
79 changes: 47 additions & 32 deletions src/code.l

Large diffs are not rendered by default.

85 changes: 54 additions & 31 deletions src/commentcnv.l
Expand Up @@ -171,6 +171,23 @@ FLOAT_NUMBER {FLOAT_DECIMAL}|{FLOAT_HEXADECIMAL}
NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
//- end: NUMBER ---------------------------------------------------------------------------
// C start comment
CCS "/\*"
// C end comment
CCE "*\/"
// Cpp comment
CPPC "/\/"
// Optional any character
ANYopt .*
// Optional white space
WSopt [ \t\r]*
// readline non special
RLopt [^\\@\n\*\/]*
// Optional slash
SLASHopt [/]*
%%
<Scan>{NUMBER} { //Note similar code in code.l
Expand Down Expand Up @@ -277,8 +294,8 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
<Scan>\n { /* new line */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<Scan>"//!"/.*\n[ \t]*"//"[\/!][^\/] | /* start C++ style special comment block */
<Scan>("///"[/]*)/[^/].*\n[ \t]*"//"[\/!][^\/] { /* start C++ style special comment block */
<Scan>{CPPC}"!"/.*\n[ \t]*{CPPC}[\/!][^\/] | /* start C++ style special comment block */
<Scan>({CPPC}"/"[/]*)/[^/].*\n[ \t]*{CPPC}[\/!][^\/] { /* start C++ style special comment block */
if (yyextra->mlBrief)
{
REJECT; // bail out if we do not need to convert
Expand All @@ -299,7 +316,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
BEGIN(ReadLine);
}
}
<Scan>"//##Documentation".*/\n { /* Start of Rational Rose ANSI C++ comment block */
<Scan>{CPPC}"##Documentation"{ANYopt}/\n { /* Start of Rational Rose ANSI C++ comment block */
if (yyextra->mlBrief) REJECT;
int i=17; //=strlen("//##Documentation");
yyextra->blockHeadCol=yyextra->col;
Expand All @@ -308,22 +325,22 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
yyextra->inRoseComment=TRUE;
BEGIN(SComment);
}
<Scan>"//"[!\/]/.*\n[ \t]*"//"[|\/][ \t]*[@\\]"}" { // next line contains an end marker, see bug 752712
<Scan>{CPPC}[!\/]/.*\n[ \t]*{CPPC}[|\/][ \t]*[@\\]"}" { // next line contains an end marker, see bug 752712
yyextra->inSpecialComment=yytext[2]=='/' || yytext[2]=='!';
copyToOutput(yyscanner,yytext,(int)yyleng);
yyextra->readLineCtx=YY_START;
BEGIN(ReadLine);
}
<Scan>"//"/.*\n { /* one line C++ comment */
<Scan>{CPPC}/.*\n { /* one line C++ comment */
yyextra->inSpecialComment=yytext[2]=='/' || yytext[2]=='!';
copyToOutput(yyscanner,yytext,(int)yyleng);
yyextra->readLineCtx=YY_START;
BEGIN(ReadLine);
}
<Scan>"/**/" { /* avoid matching next rule for empty C comment, see bug 711723 */
<Scan>{CCS}{CCE} { /* avoid matching next rule for empty C comment, see bug 711723 */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<Scan>"/*"[*!]? { /* start of a C comment */
<Scan>{CCS}[*!]? { /* start of a C comment */
if (yyextra->lang==SrcLangExt_Python)
{
REJECT;
Expand Down Expand Up @@ -444,8 +461,14 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
yyextra->lastCommentContext = YY_START;
BEGIN(Verbatim);
}
<Scan>"\\\"" { /* escaped double quote */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<Scan>"\\\\" { /* escaped backslash */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<Scan>. { /* any other character */
copyToOutput(yyscanner,yytext,(int)yyleng);
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<Verbatim>[\\@]("endverbatim"|"endlatexonly"|"endhtmlonly"|"endxmlonly"|"enddocbookonly"|"endrtfonly"|"endmanonly"|"f$"|"f]"|"f}") { /* end of verbatim block */
copyToOutput(yyscanner,yytext,(int)yyleng);
Expand Down Expand Up @@ -502,7 +525,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
BEGIN(yyextra->lastCommentContext);
}
}
<VerbatimCode>^[ \t]*"//"[\!\/]? { /* skip leading comments */
<VerbatimCode>^[ \t]*{CPPC}[\!\/]? { /* skip leading comments */
if (!yyextra->inSpecialComment)
{
copyToOutput(yyscanner,yytext,(int)yyleng);
Expand Down Expand Up @@ -531,7 +554,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
<Verbatim,VerbatimCode>\n { /* new line in verbatim block */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<Verbatim>^[ \t]*"//"[/!] {
<Verbatim>^[ \t]*{CPPC}[/!] {
if (yyextra->blockName=="dot" || yyextra->blockName=="msc" || yyextra->blockName=="uml" || yyextra->blockName.at(0)=='f')
{
// see bug 487871, strip /// from dot images and formulas.
Expand Down Expand Up @@ -597,7 +620,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
<CComment>[^ `~<\\!@*\n{\"\/]* { /* anything that is not a '*' or command */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<CComment>"*"+[^*/\\@\n{\"]* { /* stars without slashes */
<CComment>"*"+[^*\/\\@\n{\"]* { /* stars without slashes */
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<CComment>"\"\"\"" { /* end of Python docstring */
Expand Down Expand Up @@ -729,45 +752,45 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
<CComment>. {
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<SComment>^[ \t]*"///"[\/]*/\n {
<SComment>^[ \t]*{CPPC}"/"{SLASHopt}/\n {
replaceComment(yyscanner,0);
}
<SComment>\n[ \t]*"///"[\/]*/\n {
<SComment>\n[ \t]*{CPPC}"/"{SLASHopt}/\n {
replaceComment(yyscanner,1);
}
<SComment>^[ \t]*"///"[^\/\n]/.*\n {
<SComment>^[ \t]*{CPPC}"/"[^\/\n]/.*\n {
replaceComment(yyscanner,0);
yyextra->readLineCtx=YY_START;
BEGIN(ReadLine);
}
<SComment>\n[ \t]*"//"[\/!]("<")?[ \t]*[\\@]"}".*\n {
<SComment>\n[ \t]*{CPPC}[\/!]("<")?[ \t]*[\\@]"}".*\n {
/* See Bug 752712: end the multiline comment when finding a @} or \} command */
copyToOutput(yyscanner," */",3);
copyToOutput(yyscanner,yytext,(int)yyleng);
yyextra->inSpecialComment=FALSE;
yyextra->inRoseComment=FALSE;
BEGIN(Scan);
}
<SComment>\n[ \t]*"///"[^\/\n]/.*\n {
<SComment>\n[ \t]*{CPPC}"/"[^\/\n]/.*\n {
replaceComment(yyscanner,1);
yyextra->readLineCtx=YY_START;
BEGIN(ReadLine);
}
<SComment>^[ \t]*"//!" | // just //!
<SComment>^[ \t]*"//!<"/.*\n | // or //!< something
<SComment>^[ \t]*"//!"[^<]/.*\n { // or //!something
<SComment>^[ \t]*{CPPC}"!" | // just //!
<SComment>^[ \t]*{CPPC}"!<"/.*\n | // or //!< something
<SComment>^[ \t]*{CPPC}"!"[^<]/.*\n { // or //!something
replaceComment(yyscanner,0);
yyextra->readLineCtx=YY_START;
BEGIN(ReadLine);
}
<SComment>\n[ \t]*"//!" |
<SComment>\n[ \t]*"//!<"/.*\n |
<SComment>\n[ \t]*"//!"[^<\n]/.*\n {
<SComment>\n[ \t]*{CPPC}"!" |
<SComment>\n[ \t]*{CPPC}"!<"/.*\n |
<SComment>\n[ \t]*{CPPC}"!"[^<\n]/.*\n {
replaceComment(yyscanner,1);
yyextra->readLineCtx=YY_START;
BEGIN(ReadLine);
}
<SComment>^[ \t]*"//##"/.*\n {
<SComment>^[ \t]*{CPPC}"##"/.*\n {
if (!yyextra->inRoseComment)
{
REJECT;
Expand All @@ -779,7 +802,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
BEGIN(ReadLine);
}
}
<SComment>\n[ \t]*"//##"/.*\n {
<SComment>\n[ \t]*{CPPC}"##"/.*\n {
if (!yyextra->inRoseComment)
{
REJECT;
Expand All @@ -798,19 +821,19 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
yyextra->inRoseComment=FALSE;
BEGIN(Scan);
}
<ReadLine>"/**" {
<ReadLine>{CCS}"*" {
copyToOutput(yyscanner,"/&zwj;**",8);
}
<ReadLine>"*/" {
<ReadLine>{CCE} {
copyToOutput(yyscanner,"*&zwj;/",7);
}
<ReadLine>"*" {
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<ReadLine>[^\\@\n\*/]* {
<ReadLine>{RLopt} {
copyToOutput(yyscanner,yytext,(int)yyleng);
}
<ReadLine>[^\\@\n\*/]*/\n {
<ReadLine>{RLopt}/\n {
copyToOutput(yyscanner,yytext,(int)yyleng);
BEGIN(yyextra->readLineCtx);
}
Expand Down Expand Up @@ -844,7 +867,7 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
<CondLine>[!()&| \ta-z_A-Z0-9.\-]+ {
handleCondSectionId(yyscanner,yytext);
}
<CComment,ReadLine>[\\@]"cond"[ \t\r]*/\n {
<CComment,ReadLine>[\\@]"cond"{WSopt}/\n {
yyextra->condCtx=YY_START;
handleCondSectionId(yyscanner," "); // fake section id causing the section to be hidden unconditionally
}
Expand All @@ -862,9 +885,9 @@ NUMBER {INTEGER_NUMBER}|{FLOAT_NUMBER}
yyextra->lastEscaped=0;
BEGIN( ReadAliasArgs );
}
<ReadAliasArgs>^[ \t]*"//"[/!]/[^\n]+ { // skip leading special comments (see bug 618079)
<ReadAliasArgs>^[ \t]*{CPPC}[/!]/[^\n]+ { // skip leading special comments (see bug 618079)
}
<ReadAliasArgs>"*/" { // oops, end of comment in the middle of an alias?
<ReadAliasArgs>{CCE} { // oops, end of comment in the middle of an alias?
if (yyextra->lang==SrcLangExt_Python)
{
REJECT;
Expand Down
29 changes: 21 additions & 8 deletions src/commentscan.l
Expand Up @@ -469,7 +469,8 @@ DETAILEDHTMLOPT {CODE}
BN [ \t\n\r]
BL [ \t\r]*"\n"
B [ \t]
BS ^(({B}*"//")?)(({B}*"*"+)?){B}*
Bopt {B}*
BS ^(({B}*"/""/")?)(({B}*"*"+)?){B}*
ATTR ({B}+[^>\n]*)?
DOCNL "\n"|"\\ilinebr"
LC "\\"{B}*"\n"
Expand All @@ -488,6 +489,18 @@ TMPLSPEC "<"{BN}*[^>]+{BN}*">"
MAILADDR [a-z_A-Z0-9.+\-]+"@"[a-z_A-Z0-9\-]+("."[a-z_A-Z0-9\-]+)+[a-z_A-Z0-9\-]+
RCSTAG "$"{ID}":"[^\n$]+"$"

// C start comment
CCS "/\*"
// C end comment
CCE "*\/"
// Cpp comment
CPPC "/\/"

// end of section title with asterisk
STAopt [^\n@\\*]*
// end of section title without asterisk
STopt [^\n@\\]*

%option noyywrap

/* comment parsing states. */
Expand Down Expand Up @@ -541,7 +554,7 @@ RCSTAG "$"{ID}":"[^\n$]+"$"
/* What can happen in while parsing a comment block:
* commands (e.g. @page, or \page)
* escaped commands (e.g. @@page or \\page).
* formulas (e.g. \f$ \f[ \f{..)
* formulas (e.g. \f$...\f$ \f[...\f] \f{...\f})
* directories (e.g. \doxygen\src\)
* autolist end. (e.g. a dot on an otherwise empty line)
* newlines.
Expand Down Expand Up @@ -770,7 +783,7 @@ RCSTAG "$"{ID}":"[^\n$]+"$"
<Comment>[a-z_A-Z]+ { // normal word
addOutput(yyscanner,yytext);
}
<Comment>^{B}*"."{B}*/\n { // explicit end autolist: e.g " ."
<Comment>^{B}*"."{Bopt}/\n { // explicit end autolist: e.g " ."
addOutput(yyscanner,yytext);
}
<Comment>^{B}*[1-9][0-9]*"."{B}+ |
Expand All @@ -797,7 +810,7 @@ RCSTAG "$"{ID}":"[^\n$]+"$"
}
addOutput(yyscanner,yytext);
}
<Comment>^{B}*([\-:|]{B}*)*("--"|"---")({B}*[\-:|])*{B}*/\n { // horizontal line (dashed)
<Comment>^{B}*([\-:|]{B}*)*("--"|"---")({B}*[\-:|])*{Bopt}/\n { // horizontal line (dashed)
addOutput(yyscanner,yytext);
}
<Comment>{CMD}"---" { // escaped mdash
Expand Down Expand Up @@ -1344,12 +1357,12 @@ RCSTAG "$"{ID}":"[^\n$]+"$"
);
BEGIN(Comment);
}
<SectionTitle>[^\n@\\*]*/"\n" { // end of section title
<SectionTitle>{STAopt}/"\n" { // end of section title
addSection(yyscanner);
addOutput(yyscanner,yytext);
BEGIN( Comment );
}
<SectionTitle>[^\n@\\]*/"\\ilinebr" { // end of section title
<SectionTitle>{STopt}/"\\ilinebr" { // end of section title
addSection(yyscanner);
addOutput(yyscanner,yytext);
BEGIN( Comment );
Expand Down Expand Up @@ -1451,11 +1464,11 @@ RCSTAG "$"{ID}":"[^\n$]+"$"
if (*yytext=='\n') yyextra->lineNr++;
addOutput(yyscanner,'\n');
}
<FormatBlock>"/*" { // start of a C-comment
<FormatBlock>{CCS} { // start of a C-comment
if (!(yyextra->blockName=="code" || yyextra->blockName=="verbatim")) yyextra->commentCount++;
addOutput(yyscanner,yytext);
}
<FormatBlock>"*/" { // end of a C-comment
<FormatBlock>{CCE} { // end of a C-comment
addOutput(yyscanner,yytext);
if (!(yyextra->blockName=="code" || yyextra->blockName=="verbatim"))
{
Expand Down
5 changes: 3 additions & 2 deletions src/declinfo.l
Expand Up @@ -83,6 +83,7 @@ static yy_size_t yyread(char *buf,yy_size_t max_size, yyscan_t yyscanner);
%}

B [ \t]
Bopt {B}*
ID "$"?([a-z_A-Z\x80-\xFF][a-z_A-Z0-9\x80-\xFF]*)|(@[0-9]+)

%x Start
Expand Down Expand Up @@ -188,11 +189,11 @@ ID "$"?([a-z_A-Z\x80-\xFF][a-z_A-Z0-9\x80-\xFF]*)|(@[0-9]+)
<Template>. {
yyextra->name+=*yytext;
}
<Operator>{B}*"("{B}*")"{B}*"<>"{B}*/"(" {
<Operator>{B}*"("{B}*")"{B}*"<>"{Bopt}/"(" {
yyextra->name+="() <>";
BEGIN(ReadArgs);
}
<Operator>{B}*"("{B}*")"{B}*/"(" {
<Operator>{B}*"("{B}*")"{Bopt}/"(" {
yyextra->name+="()";
BEGIN(ReadArgs);
}
Expand Down

0 comments on commit 789625c

Please sign in to comment.