%{ /********************************************************************** markdown_parser.leg - markdown parser in C using a PEG grammar. (c) 2008 John MacFarlane (jgm at berkeley dot edu). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ***********************************************************************/ #include #include #include "markdown_peg.h" #include "utility_functions.c" /********************************************************************** PEG grammar and parser actions for markdown syntax. ***********************************************************************/ %} Doc = a:StartList ( Block { a = cons($$, a); } )* { parse_result = reverse(a); } Block = BlankLine* ( BlockQuote | Verbatim | Note | Reference | HorizontalRule | Heading | OrderedList | BulletList | HtmlBlock | StyleBlock | Para | Plain ) Para = NonindentSpace a:Inlines BlankLine+ { $$ = a; $$->key = PARA; } Plain = a:Inlines { $$ = a; $$->key = PLAIN; } AtxInline = !Newline !(Sp '#'* Sp Newline) Inline AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > { $$ = mk_element(H1 + (strlen(yytext) - 1)); } AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp '#'* Sp)? Newline { $$ = mk_list(s->key, a); free(s); } SetextHeading = SetextHeading1 | SetextHeading2 SetextHeading1 = a:StartList ( !Endline Inline { a = cons($$, a); } )+ Newline "===" '='* Newline { $$ = mk_list(H1, a); } SetextHeading2 = a:StartList ( !Endline Inline { a = cons($$, a); } )+ Newline "---" '-'* Newline { $$ = mk_list(H2, a); } Heading = AtxHeading | SetextHeading BlockQuote = a:BlockQuoteRaw { $$ = mk_element(BLOCKQUOTE); $$->children = a; } BlockQuoteRaw = a:StartList (( '>' ' '? Line { a = cons($$, a); } ) ( !'>' !BlankLine Line { a = cons($$, a); } )* ( BlankLine { a = cons(mk_str("\n"), a); } )* )+ { $$ = mk_str_from_list(a, true); $$->key = RAW; } NonblankIndentedLine = !BlankLine IndentedLine VerbatimChunk = a:StartList ( BlankLine { a = cons(mk_str("\n"), a); } )* ( NonblankIndentedLine { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); } Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); $$->key = VERBATIM; } HorizontalRule = NonindentSpace ( '*' Sp '*' Sp '*' (Sp '*')* | '-' Sp '-' Sp '-' (Sp '-')* | '_' Sp '_' Sp '_' (Sp '_')*) Sp Newline BlankLine+ { $$ = mk_element(HRULE); } Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ BulletList = &Bullet (ListTight | ListLoose) { $$->key = BULLETLIST; } ListTight = a:StartList ( ListItem { a = cons($$, a); } )+ BlankLine* !(Bullet | Enumerator) { $$ = mk_list(LIST, a); } ListLoose = a:StartList ( b:ListItem BlankLine* { element *li; li = b->children; li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3); strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */ a = cons(b, a); } )+ { $$ = mk_list(LIST, a); } ListItem = ( Bullet | Enumerator ) a:StartList ListBlock { a = cons($$, a); } ( ListContinuationBlock { a = cons($$, a); } )* { element *raw; raw = mk_str_from_list(a, false); raw->key = RAW; $$ = mk_element(LISTITEM); $$->children = raw; } ListBlock = a:StartList Line { a = cons($$, a); } ( ListBlockLine { a = cons($$, a); } )* { $$ = mk_str_from_list(a, false); } ListContinuationBlock = a:StartList ( < BlankLine* > { if (strlen(yytext) == 0) a = cons(mk_str("\001"), a); /* block separator */ else a = cons(mk_str(yytext), a); } ) ( Indent ListBlock { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); } Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ OrderedList = &Enumerator (ListTight | ListLoose) { $$->key = ORDEREDLIST; } ListBlockLine = !( Indent? (Bullet | Enumerator) ) !BlankLine !HorizontalRule OptionallyIndentedLine # Parsers for different kinds of block-level HTML content. # This is repetitive due to constraints of PEG grammar. HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' HtmlBlockInTags = HtmlBlockOpenAddress (HtmlBlockInTags | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress | HtmlBlockOpenBlockquote (HtmlBlockInTags | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote | HtmlBlockOpenCenter (HtmlBlockInTags | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter | HtmlBlockOpenDir (HtmlBlockInTags | !HtmlBlockCloseDir .)* HtmlBlockCloseDir | HtmlBlockOpenDiv (HtmlBlockInTags | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv | HtmlBlockOpenDl (HtmlBlockInTags | !HtmlBlockCloseDl .)* HtmlBlockCloseDl | HtmlBlockOpenFieldset (HtmlBlockInTags | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset | HtmlBlockOpenForm (HtmlBlockInTags | !HtmlBlockCloseForm .)* HtmlBlockCloseForm | HtmlBlockOpenH1 (HtmlBlockInTags | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 | HtmlBlockOpenH2 (HtmlBlockInTags | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 | HtmlBlockOpenH3 (HtmlBlockInTags | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 | HtmlBlockOpenH4 (HtmlBlockInTags | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 | HtmlBlockOpenH5 (HtmlBlockInTags | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 | HtmlBlockOpenH6 (HtmlBlockInTags | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 | HtmlBlockOpenMenu (HtmlBlockInTags | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu | HtmlBlockOpenNoframes (HtmlBlockInTags | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes | HtmlBlockOpenNoscript (HtmlBlockInTags | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript | HtmlBlockOpenOl (HtmlBlockInTags | !HtmlBlockCloseOl .)* HtmlBlockCloseOl | HtmlBlockOpenP (HtmlBlockInTags | !HtmlBlockCloseP .)* HtmlBlockCloseP | HtmlBlockOpenPre (HtmlBlockInTags | !HtmlBlockClosePre .)* HtmlBlockClosePre | HtmlBlockOpenTable (HtmlBlockInTags | !HtmlBlockCloseTable .)* HtmlBlockCloseTable | HtmlBlockOpenUl (HtmlBlockInTags | !HtmlBlockCloseUl .)* HtmlBlockCloseUl | HtmlBlockOpenDd (HtmlBlockInTags | !HtmlBlockCloseDd .)* HtmlBlockCloseDd | HtmlBlockOpenDt (HtmlBlockInTags | !HtmlBlockCloseDt .)* HtmlBlockCloseDt | HtmlBlockOpenFrameset (HtmlBlockInTags | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset | HtmlBlockOpenLi (HtmlBlockInTags | !HtmlBlockCloseLi .)* HtmlBlockCloseLi | HtmlBlockOpenTbody (HtmlBlockInTags | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody | HtmlBlockOpenTd (HtmlBlockInTags | !HtmlBlockCloseTd .)* HtmlBlockCloseTd | HtmlBlockOpenTfoot (HtmlBlockInTags | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot | HtmlBlockOpenTh (HtmlBlockInTags | !HtmlBlockCloseTh .)* HtmlBlockCloseTh | HtmlBlockOpenThead (HtmlBlockInTags | !HtmlBlockCloseThead .)* HtmlBlockCloseThead | HtmlBlockOpenTr (HtmlBlockInTags | !HtmlBlockCloseTr .)* HtmlBlockCloseTr | HtmlBlockOpenScript (HtmlBlockInTags | !HtmlBlockCloseScript .)* HtmlBlockCloseScript HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > BlankLine+ { if (extension(EXT_FILTER_HTML)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTMLBLOCK; } } HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' InStyleTags = StyleOpen (!StyleClose .)* StyleClose StyleBlock = < InStyleTags > BlankLine* { if (extension(EXT_FILTER_STYLES)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTMLBLOCK; } } Inlines = a:StartList ( !Endline Inline { a = cons($$, a); } | c:Endline &Inline { a = cons(c, a); } )+ Endline? { $$ = mk_list(LIST, a); } Inline = Str | Endline | UlOrStarLine | Space | Strong | Emph | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Smart | Symbol Space = Spacechar+ { $$ = mk_str(" "); $$->key = SPACE; } Str = < NormalChar+ > { $$ = mk_str(yytext); } EscapedChar = '\\' !Newline < . > { $$ = mk_str(yytext); } Entity = ( HexEntity | DecEntity | CharEntity ) { $$ = mk_str(yytext); $$->key = HTML; } Endline = LineBreak | TerminalEndline | NormalEndline NormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(Line ("===" '='* | "---" '-'*) Newline) { $$ = mk_str("\n"); $$->key = SPACE; } TerminalEndline = Sp Newline Eof { $$ = NULL; } LineBreak = " " NormalEndline { $$ = mk_element(LINEBREAK); } Symbol = < SpecialChar > { $$ = mk_str(yytext); } # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext); } StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > Emph = EmphStar | EmphUl OneStarOpen = !StarLine '*' !Spacechar !Newline OneStarClose = !Spacechar !Newline a:Inline !StrongStar '*' { $$ = a; } EmphStar = OneStarOpen a:StartList ( !OneStarClose Inline { a = cons($$, a); } )* OneStarClose { a = cons($$, a); } { $$ = mk_list(EMPH, a); } OneUlOpen = !UlLine '_' !Spacechar !Newline OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a; } EmphUl = OneUlOpen a:StartList ( !OneUlClose Inline { a = cons($$, a); } )* OneUlClose { a = cons($$, a); } { $$ = mk_list(EMPH, a); } Strong = StrongStar | StrongUl TwoStarOpen = !StarLine "**" !Spacechar !Newline TwoStarClose = !Spacechar !Newline a:Inline "**" { $$ = a; } StrongStar = TwoStarOpen a:StartList ( !TwoStarClose Inline { a = cons($$, a); } )* TwoStarClose { a = cons($$, a); } { $$ = mk_list(STRONG, a); } TwoUlOpen = !UlLine "__" !Spacechar !Newline TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a; } StrongUl = TwoUlOpen a:StartList ( !TwoUlClose Inline { a = cons($$, a); } )* TwoUlClose { a = cons($$, a); } { $$ = mk_list(STRONG, a); } Image = '!' ( ExplicitLink | ReferenceLink ) { $$->key = IMAGE; } Link = ExplicitLink | ReferenceLink | AutoLink ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label { link match; if (find_reference(&match, b->children)) { $$ = mk_link(a->children, match.url, match.title); free(a); free_element_list(b); } else { element *result; result = mk_element(LIST); result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), cons(mk_str("["), cons(b, mk_str("]"))))))); $$ = result; } } ReferenceLinkSingle = a:Label < (Spnl "[]")? > { link match; if (find_reference(&match, a->children)) { $$ = mk_link(a->children, match.url, match.title); free(a); } else { element *result; result = mk_element(LIST); result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); $$ = result; } } ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')' { $$ = mk_link(l->children, s->contents.str, t->contents.str); free_element(s); free_element(t); free(l); } Source = ( '<' < SourceContents > '>' | < SourceContents > ) { $$ = mk_str(yytext); } SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* | "" Title = ( TitleSingle | TitleDouble | < "" > ) { $$ = mk_str(yytext); } TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) !Newline . )* > '\'' TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) !Newline . )* > '"' AutoLink = AutoLinkUrl | AutoLinkEmail AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' { $$ = mk_link(mk_str(yytext), yytext, ""); } AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>' { char *mailto = malloc(strlen(yytext) + 8); sprintf(mailto, "mailto:%s", yytext); $$ = mk_link(mk_str(yytext), mailto, ""); free(mailto); } Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine* { $$ = mk_link(l->children, s->contents.str, t->contents.str); free_element(s); free_element(t); free(l); $$->key = REFERENCE; } Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } ) a:StartList ( !']' Inline { a = cons($$, a); } )* ']' { $$ = mk_list(LIST, a); } RefSrc = < Nonspacechar+ > { $$ = mk_str(yytext); $$->key = HTML; } RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) { $$ = mk_str(yytext); } EmptyTitle = < "" > RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"' RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')' References = a:StartList ( b:Reference { a = cons(b, a); } | SkipBlock )* { references = reverse(a); } Ticks1 = "`" Ticks2 = "``" Ticks3 = "```" Ticks4 = "````" Ticks5 = "`````" Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 ) { $$ = mk_str(yytext); $$->key = CODE; } RawHtml = < (HtmlComment | HtmlTag) > { if (extension(EXT_FILTER_HTML)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTML; } } BlankLine = Sp Newline Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' HtmlAttribute = (Alphanumeric | '-')+ Spnl ('=' Spnl (Quoted | Nonspacechar+))? Spnl HtmlComment = "" .)* "-->" HtmlTag = '<' Spnl '/'? Alphanumeric+ Spnl HtmlAttribute* '/'? Spnl '>' Eof = !. Spacechar = ' ' | '\t' Nonspacechar = !Spacechar !Newline . Newline = '\n' | '\r' '\n'? Sp = Spacechar* Spnl = Sp (Newline Sp)? SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '\\' | ExtendedSpecialChar NormalChar = !( SpecialChar | Spacechar | Newline ) . Alphanumeric = [A-Za-z0-9] Digit = [0-9] HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > DecEntity = < '&' '#' [0-9]+ > ';' > CharEntity = < '&' [A-Za-z0-9]+ ';' > NonindentSpace = " " | " " | " " | "" Indent = "\t" | " " IndentedLine = Indent Line OptionallyIndentedLine = Indent? Line # StartList starts a list data structure that can be added to with cons: StartList = &. { $$ = NULL; } Line = RawLine { $$ = mk_str(yytext); } RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) SkipBlock = ( !BlankLine RawLine )+ BlankLine* | BlankLine+ # Syntax extensions ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"') | &{ extension(EXT_NOTES) } ( '^' ) Smart = &{ extension(EXT_SMART) } ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) Apostrophe = '\'' { $$ = mk_element(APOSTROPHE); } Ellipsis = ("..." | ". . .") { $$ = mk_element(ELLIPSIS); } Dash = EmDash | EnDash EnDash = '-' &Digit { $$ = mk_element(ENDASH); } EmDash = ("---" | "--") { $$ = mk_element(EMDASH); } SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric ) SingleQuoteEnd = '\'' !Alphanumeric SingleQuoted = SingleQuoteStart a:StartList ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+ SingleQuoteEnd { $$ = mk_list(SINGLEQUOTED, a); } DoubleQuoteStart = '"' DoubleQuoteEnd = '"' DoubleQuoted = DoubleQuoteStart a:StartList ( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+ DoubleQuoteEnd { $$ = mk_list(DOUBLEQUOTED, a); } NoteReference = &{ extension(EXT_NOTES) } ref:RawNoteReference { element *match; if (find_note(&match, ref->contents.str)) { $$ = mk_element(NOTE); assert(match->children != NULL); $$->children = match->children; $$->contents.str = 0; } else { char *s; s = malloc(strlen(ref->contents.str) + 4); sprintf(s, "[^%s]", ref->contents.str); $$ = mk_str(s); free(s); } } RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' { $$ = mk_str(yytext); } Note = &{ extension(EXT_NOTES) } NonindentSpace ref:RawNoteReference ':' Sp a:StartList ( RawNoteBlock { a = cons($$, a); } ) ( &Indent RawNoteBlock { a = cons($$, a); } )* { $$ = mk_list(NOTE, a); $$->contents.str = strdup(ref->contents.str); } InlineNote = &{ extension(EXT_NOTES) } "^[" a:StartList ( !']' Inline { a = cons($$, a); } )+ ']' { $$ = mk_list(NOTE, a); $$->contents.str = 0; } Notes = a:StartList ( b:Note { a = cons(b, a); } | SkipBlock )* { notes = reverse(a); } RawNoteBlock = a:StartList ( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+ ( < BlankLine* > { a = cons(mk_str(yytext), a); } ) { $$ = mk_str_from_list(a, true); $$->key = RAW; } %% #include "parsing_functions.c"