%{
/**********************************************************************
markdown_parser.leg - markdown parser in C using a PEG grammar.
(c) 2008 John MacFarlane (jgm at berkeley dot edu).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#include <stdbool.h>
#include <assert.h>
#include "markdown_peg.h"
extern int strcasecmp(const char *string1, const char *string2);
int yyparse(void);
/**********************************************************************
List manipulation functions
***********************************************************************/
/* cons - cons an element onto a list, returning pointer to new head */
element *cons(element new, element *list) {
element *head = malloc(sizeof(element));
assert(head != NULL);
*head = new;
head->next = list;
return head;
}
/* pushelt - push an element onto the (list) contents of an element */
static void pushelt(element new, element *lst) {
assert(lst->key == LIST);
lst->children = cons(new, lst->children);
}
/* reverse - reverse a list, returning pointer to new list */
element *reverse(element *list) {
element *new = NULL;
while (list != NULL) {
new = cons(*list, new);
list = list->next;
}
return new;
}
/* length_of_strings - returns sum of length of strings in a list of STR elements */
static int length_of_strings(element *list) {
int len = 0;
while (list != NULL) {
assert(list->key == STR);
assert(list->contents.str != NULL);
len += strlen(list->contents.str);
list = list->next;
}
return len;
}
/* concat_string_list - concatenates string contents of list of STR elements */
static char *concat_string_list(element *list) {
char *result = malloc(length_of_strings(list) + 2); /* leave room for optional \n and \0 */
*result = '\0';
while (list != NULL) {
assert(list->key == STR);
assert(list->contents.str != NULL);
result = strcat(result, list->contents.str);
list = list->next;
}
return result;
}
/**********************************************************************
Global variables used in parsing
***********************************************************************/
static char *charbuf = ""; /* Buffer of characters to be parsed. */
static element *references; /* List of link references found. */
static element *notes; /* List of footnotes found. */
static int output_format;
static element parse_result; /* Results of parse. */
static int syntax_extensions; /* Syntax extensions selected. */
/**********************************************************************
Auxiliary functions for parsing actions.
These make it easier to build up data structures (including lists)
in the parsing actions.
***********************************************************************/
/* mk_element - generic constructor for element */
static element mk_element(int key) {
element result;
result.key = key;
result.children = NULL;
result.next = NULL;
return result;
}
/* mk_str - constructor for STR element */
static element mk_str(char *string) {
element result;
assert(string != NULL);
result = mk_element(STR);
result.contents.str = strdup(string);
return result;
}
/* mk_list - constructs an element with key 'key' and children from 'lst' (reversed).
* This is designed to be used with pushelt to build lists in a parser action.
* The reversing is necessary because pushelt adds to the head of a list. */
static element mk_list(int key, element lst) {
element result;
result = mk_element(key);
result.children = reverse(lst.children);
return result;
}
/* mk_link - constructor for LINK element */
static element mk_link(element *label, char *url, char *title) {
element result;
result = mk_element(LINK);
result.contents.link.label = label;
result.contents.link.url = strdup(url);
result.contents.link.title = strdup(title);
return result;
}
/* extension = returns true if extension is selected */
static bool extension(int ext) {
return (syntax_extensions & ext);
}
/* match_inlines - returns true if inline lists match (case-insensitive...) */
static bool match_inlines(element *l1, element *l2) {
while (l1 != NULL && l2 != NULL) {
if (l1->key != l2->key)
return false;
switch (l1->key) {
case SPACE:
case LINEBREAK:
break;
case CODE:
case STR:
case HTML:
if (strcasecmp(l1->contents.str, l2->contents.str) == 0)
break;
else
return false;
case EMPH:
case STRONG:
case LIST:
if (match_inlines(l1->children, l2->children))
break;
else
return false;
case LINK:
case IMAGE:
return false; /* No links or images within links */
default:
fprintf(stderr, "match_inlines encountered unknown key = %d\n", l1->key);
exit(EXIT_FAILURE);
break;
}
l1 = l1->next;
l2 = l2->next;
}
return (l1 == NULL && l2 == NULL); /* return true if both lists exhausted */
}
/* find_reference - return true if link found in references matching label.
* 'link' is modified with the matching url and title. */
static bool find_reference(link *result, element *label) {
element *cur = references; /* pointer to walk up list of references */
link curitem;
while (cur != NULL) {
curitem = cur->contents.link;
if (match_inlines(label, curitem.label)) {
*result = curitem;
return true;
}
else
cur = cur->next;
}
return false;
}
/* find_note - return true if note found in notes matching label.
if found, 'result' is set to point to matched note. */
static bool find_note(element **result, char *label) {
element *cur = notes; /* pointer to walk up list of notes */
while (cur != NULL) {
if (strcmp(label, cur->contents.str) == 0) {
*result = cur;
return true;
}
else
cur = cur->next;
}
return false;
}
/**********************************************************************
Definitions for leg parser generator.
YY_INPUT is the function the parser calls to get new input.
We take all new input from (static) charbuf.
***********************************************************************/
# define YYSTYPE element
#define YY_INPUT(buf, result, max_size) \
{ \
int yyc; \
if (charbuf && *charbuf != '\0') { \
yyc= *charbuf++; \
} else { \
yyc= EOF; \
} \
result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \
}
/**********************************************************************
PEG grammar and parser actions for markdown syntax.
***********************************************************************/
%}
Doc = a:Blocks BlankLine* Eof
{ parse_result = a; }
Blocks = a:StartList ( Block { pushelt($$, &a); } )*
{ $$ = mk_list(LIST, a); }
Block = BlankLine*
( BlockQuote
| Verbatim
| Note
| Reference
| Heading
| OrderedList
| BulletList
| HorizontalRule
| HtmlBlock
| Para
| Plain )
Para = NonindentSpace a:Inlines BlankLine+
{ $$ = a; $$.key = PARA; }
Plain = a:Inlines
{ $$ = a; $$.key = PLAIN; }
AtxInline = !Newline !(Sp '#'* Sp Newline) Inline
AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) >
{ $$.key = H1 + (strlen(yytext) - 1); }
AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { pushelt($$, &a); } )+ (Sp '#'* Sp)? Newline
{ $$ = mk_list(s.key, a); }
SetextHeading = SetextHeading1 | SetextHeading2
SetextHeading1 = a:StartList ( !Endline Inline { pushelt($$, &a); } )+ Newline "===" '='* Newline
{ $$ = mk_list(H1, a); }
SetextHeading2 = a:StartList ( !Endline Inline { pushelt($$, &a) ; } )+ Newline "---" '-'* Newline
{ $$ = mk_list(H2, a); }
Heading = AtxHeading | SetextHeading
BlockQuote = a:BlockQuoteRaw
{ element *raw;
raw = malloc(sizeof(element));
*raw = a;
$$ = mk_element(BLOCKQUOTE);
$$.children = raw;
}
BlockQuoteRaw = a:StartList
(( '>' ' '? Line { pushelt($$, &a); } )
( !'>' !BlankLine Line { pushelt($$, &a); } )*
( BlankLine { pushelt($$, &a); } )*
)+
{ char *c = concat_string_list(reverse(a.children));
strcat(c, "\n"); /* Note: an extra byte was allocated for this */
$$ = mk_str(c);
$$.key = RAW;
}
NonblankIndentedLine = !BlankLine IndentedLine
VerbatimChunk = a:StartList
( BlankLine { pushelt($$, &a); } )*
( NonblankIndentedLine { pushelt($$, &a); } )+
{ $$ = mk_str(concat_string_list(reverse(a.children))); }
Verbatim = a:StartList ( VerbatimChunk { pushelt($$, &a); } )+
{ $$ = mk_str(concat_string_list(reverse(a.children))); $$.key = VERBATIM; }
HorizontalRule = NonindentSpace
( '*' Sp '*' Sp '*' (Sp '*')*
| '-' Sp '-' Sp '-' (Sp '-')*
| '_' Sp '_' Sp '_' (Sp '_')*)
Sp Newline BlankLine+
{ $$.key = HRULE; }
Bullet = NonindentSpace ('+' | '*' | '-') Spacechar+
BulletList = BulletListTight | BulletListLoose
BulletListTight = a:StartList
( BulletListItem { pushelt($$, &a); } )+
BlankLine* !BulletListLoose
{ $$ = mk_list(BULLETLIST, a); }
BulletListLoose = a:StartList
( b:BulletListItem BlankLine*
{ element *li;
li = b.children;
li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3);
strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */
pushelt(b, &a);
} )+
{ $$ = mk_list(BULLETLIST, a); }
BulletListItem = !HorizontalRule &Bullet ListItem
ListItem = ( Bullet | Enumerator )
a:StartList
ListBlock { pushelt($$, &a); }
( ListContinuationBlock { pushelt($$, &a); } )*
{ element *raw;
raw = malloc(sizeof(element));
*raw = mk_str(concat_string_list(reverse(a.children)));
raw->key = RAW;
$$ = mk_element(LISTITEM);
$$.children = raw;
}
ListBlock = a:StartList
Line { pushelt($$, &a); }
( ListBlockLine { pushelt($$, &a); } )*
{ $$ = mk_str(concat_string_list(reverse(a.children))); }
ListContinuationBlock = a:StartList
( BlankLines
{ if (strlen($$.contents.str) == 0)
$$.contents.str = strdup("\001"); /* block separator */
pushelt($$, &a); } )
( Indent ListBlock { pushelt($$, &a); } )+
{ $$ = mk_str(concat_string_list(reverse(a.children))); }
Enumerator = NonindentSpace [0-9]+ '.' Spacechar+
OrderedList = OrderedListTight | OrderedListLoose
OrderedListTight = a:StartList
( OrderedListItem { pushelt($$, &a); } )+
BlankLine* !OrderedListLoose
{ $$ = mk_list(ORDEREDLIST, a); }
OrderedListLoose = a:StartList
( b:OrderedListItem BlankLine*
{ element *li;
li = b.children;
li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3);
strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */
pushelt(b, &a);
} )+
{ $$ = mk_list(ORDEREDLIST, a); }
OrderedListItem = !HorizontalRule &Enumerator ListItem
BlankLines = < BlankLine* >
{ $$ = mk_str(yytext); }
ListBlockLine = !( Indent? ( BulletListItem | OrderedListItem ) )
!BlankLine
OptionallyIndentedLine
# Parsers for different kinds of block-level HTML content.
# This is repetitive due to constraints of PEG grammar.
HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>'
HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>'
HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>'
HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>'
HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>'
HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>'
HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>'
HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>'
HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>'
HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>'
HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>'
HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>'
HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>'
HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>'
HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>'
HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>'
HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>'
HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>'
HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>'
HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>'
HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>'
HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>'
HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>'
HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>'
HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>'
HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>'
HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>'
HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>'
HtmlBlockOpenHr = '<' Spnl ("hr" | "HR") Spnl HtmlAttribute* '>'
HtmlBlockCloseHr = '<' Spnl '/' ("hr" | "HR") Spnl '>'
HtmlBlockOpenIsindex = '<' Spnl ("isindex" | "ISINDEX") Spnl HtmlAttribute* '>'
HtmlBlockCloseIsindex = '<' Spnl '/' ("isindex" | "ISINDEX") Spnl '>'
HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>'
HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>'
HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>'
HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>'
HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>'
HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>'
HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>'
HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>'
HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>'
HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>'
HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>'
HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>'
HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>'
HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>'
HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>'
HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>'
HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>'
HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>'
HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>'
HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>'
HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>'
HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>'
HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>'
HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>'
HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>'
HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>'
HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>'
HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>'
HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>'
HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>'
HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>'
HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>'
HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>'
HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>'
HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>'
HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>'
HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>'
HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>'
HtmlBlockInTags = HtmlBlockOpenAddress (HtmlBlockInTags | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress
| HtmlBlockOpenBlockquote (HtmlBlockInTags | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote
| HtmlBlockOpenCenter (HtmlBlockInTags | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter
| HtmlBlockOpenDir (HtmlBlockInTags | !HtmlBlockCloseDir .)* HtmlBlockCloseDir
| HtmlBlockOpenDiv (HtmlBlockInTags | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv
| HtmlBlockOpenDl (HtmlBlockInTags | !HtmlBlockCloseDl .)* HtmlBlockCloseDl
| HtmlBlockOpenFieldset (HtmlBlockInTags | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset
| HtmlBlockOpenForm (HtmlBlockInTags | !HtmlBlockCloseForm .)* HtmlBlockCloseForm
| HtmlBlockOpenH1 (HtmlBlockInTags | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1
| HtmlBlockOpenH2 (HtmlBlockInTags | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2
| HtmlBlockOpenH3 (HtmlBlockInTags | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3
| HtmlBlockOpenH4 (HtmlBlockInTags | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4
| HtmlBlockOpenH5 (HtmlBlockInTags | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5
| HtmlBlockOpenH6 (HtmlBlockInTags | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6
| HtmlBlockOpenHr (HtmlBlockInTags | !HtmlBlockCloseHr .)* HtmlBlockCloseHr
| HtmlBlockOpenIsindex (HtmlBlockInTags | !HtmlBlockCloseIsindex .)* HtmlBlockCloseIsindex
| HtmlBlockOpenMenu (HtmlBlockInTags | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu
| HtmlBlockOpenNoframes (HtmlBlockInTags | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes
| HtmlBlockOpenNoscript (HtmlBlockInTags | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript | HtmlBlockOpenOl (HtmlBlockInTags | !HtmlBlockCloseOl .)* HtmlBlockCloseOl
| HtmlBlockOpenP (HtmlBlockInTags | !HtmlBlockCloseP .)* HtmlBlockCloseP
| HtmlBlockOpenPre (HtmlBlockInTags | !HtmlBlockClosePre .)* HtmlBlockClosePre
| HtmlBlockOpenTable (HtmlBlockInTags | !HtmlBlockCloseTable .)* HtmlBlockCloseTable
| HtmlBlockOpenUl (HtmlBlockInTags | !HtmlBlockCloseUl .)* HtmlBlockCloseUl
| HtmlBlockOpenDd (HtmlBlockInTags | !HtmlBlockCloseDd .)* HtmlBlockCloseDd
| HtmlBlockOpenDt (HtmlBlockInTags | !HtmlBlockCloseDt .)* HtmlBlockCloseDt
| HtmlBlockOpenFrameset (HtmlBlockInTags | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset
| HtmlBlockOpenLi (HtmlBlockInTags | !HtmlBlockCloseLi .)* HtmlBlockCloseLi
| HtmlBlockOpenTbody (HtmlBlockInTags | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody
| HtmlBlockOpenTd (HtmlBlockInTags | !HtmlBlockCloseTd .)* HtmlBlockCloseTd
| HtmlBlockOpenTfoot (HtmlBlockInTags | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot
| HtmlBlockOpenTh (HtmlBlockInTags | !HtmlBlockCloseTh .)* HtmlBlockCloseTh
| HtmlBlockOpenThead (HtmlBlockInTags | !HtmlBlockCloseThead .)* HtmlBlockCloseThead
| HtmlBlockOpenTr (HtmlBlockInTags | !HtmlBlockCloseTr .)* HtmlBlockCloseTr
| HtmlBlockOpenScript (HtmlBlockInTags | !HtmlBlockCloseScript .)* HtmlBlockCloseScript
HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) >
BlankLine+
{ $$ = mk_str(yytext); $$.key = HTMLBLOCK; }
HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>'
HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" |
"h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" |
"ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" |
"ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" |
"H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" |
"UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT"
Inlines = a:StartList ( !Endline Inline { pushelt($$, &a); }
| c:Endline &Inline { pushelt(c, &a); } )+ Endline?
{ $$ = mk_list(LIST, a); }
Inline = Str
| LineBreak
| Endline
| Space
| Strong
| Emph
| Image
| Link
| NoteReference
| InlineNote
| Code
| RawHtml
| Entity
| EscapedChar
| Smart
| Symbol
Space = Spacechar+
{ $$.key = SPACE; $$.contents.str = " "; }
Str = < NormalChar+ >
{ $$ = mk_str(yytext); }
EscapedChar = '\\' !Newline < . >
{ $$ = mk_str(yytext); }
Entity = ( HexEntity | DecEntity | CharEntity )
{ $$ = mk_str(yytext); $$.key = HTML; }
Endline = TerminalEndline | NormalEndline
NormalEndline = Sp Newline !BlankLine !BlockQuote !AtxStart
!(Line ("===" '='* | "---" '-'*) Newline)
{ $$.key = SPACE; $$.contents.str = "\n"; }
TerminalEndline = Sp Newline Eof
{ $$.key = SPACE; $$.contents.str = ""; }
LineBreak = " " Endline
{ $$.key = LINEBREAK; }
Symbol = < SpecialChar >
{ $$ = mk_str(yytext); }
Emph = EmphStar | EmphUl
EmphStar = OneStar !Spacechar !Newline
a:StartList
( EmphInlineStar { pushelt($$, &a); } )+
OneStar
{ $$ = mk_list(EMPH, a); }
EmphInlineStar = StrongStar
| !(Spnl OneStar) Inline
EmphUl = OneUl !Spacechar !Newline
a:StartList
( EmphInlineUl { pushelt($$, &a); } )+
OneUl !Alphanumeric
{ $$ = mk_list(EMPH, a); }
EmphInlineUl = StrongUl
| !(Spnl OneUl) Inline
Strong = StrongStar | StrongUl
StrongStar = TwoStar !Spacechar !Newline
a:StartList
( StrongInlineStar { pushelt($$, &a); } )+
TwoStar
{ $$ = mk_list(STRONG, a); }
StrongInlineStar = !(Spnl TwoStar) Inline
StrongUl = TwoUl !Spacechar !Newline
a:StartList
( StrongInlineUl { pushelt($$, &a); } )+
TwoUl
{ $$ = mk_list(STRONG, a); }
StrongInlineUl = !(Spnl TwoUl) Inline
Image = '!' ( ExplicitLink | ReferenceLink )
{ $$.key = IMAGE; }
Link = ExplicitLink | ReferenceLink | AutoLink
ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle
ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label
{ link match;
if (find_reference(&match, b.children))
$$ = mk_link(a.children, match.url, match.title);
else {
/* $$.key == LIST; (not needed because $$.key set by Label match */
$$.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext),
cons(mk_str("["), cons(b, cons(mk_str("]"), NULL)))))));
}
}
ReferenceLinkSingle = a:Label < (Spnl "[]")? >
{ link match;
if (find_reference(&match, a.children)) {
$$ = mk_link(a.children, match.url, match.title);
}
else {
$$.key = LIST;
$$.children = cons(mk_str("["), cons(a, cons(mk_str("]"),cons(mk_str(yytext),NULL))));
}
}
ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')'
{ $$ = mk_link(l.children, s.contents.str, t.contents.str); }
Source = ( '<' < SourceContents > '>' | < SourceContents > )
{ $$ = mk_str(yytext); }
SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')*
| ""
Title = ( TitleSingle | TitleDouble | < "" > )
{ $$ = mk_str(yytext); }
TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) !Newline . )* > '\''
TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) !Newline . )* > '"'
AutoLink = AutoLinkUrl | AutoLinkEmail
AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>'
{ $$ = mk_link(cons(mk_str(yytext), NULL), yytext, ""); }
AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>'
{ char *mailto = malloc(strlen(yytext) + 8);
sprintf(mailto, "mailto:%s", yytext);
$$ = mk_link(cons(mk_str(yytext), NULL), mailto, "");
}
Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine*
{ $$ = mk_link(l.children, s.contents.str, t.contents.str); $$.key = REFERENCE; }
Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } )
a:StartList
( !']' Inline { pushelt($$, &a); } )*
']'
{ $$ = mk_list(LIST, a); }
RefSrc = < Nonspacechar+ > { $$ = mk_str(yytext); $$.key = HTML; }
RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle )
{ $$ = mk_str(yytext); }
EmptyTitle = < "" >
RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\''
RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"'
RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')'
References = a:StartList
( b:Reference { pushelt(b, &a); } | SkipBlock )*
{ references = a.children; }
Ticks1 = "`"
Ticks2 = "``"
Ticks3 = "```"
Ticks4 = "````"
Ticks5 = "`````"
Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1
| Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2
| Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3
| Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4
| Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5
)
{ $$ = mk_str(yytext); $$.key = CODE; }
RawHtml = < (HtmlComment | HtmlTag) >
{ $$ = mk_str(yytext); $$.key = HTML; }
BlankLine = Sp Newline
{ $$ = mk_str("\n"); }
Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\''
HtmlAttribute = (Alphanumeric | '-')+ Spnl ('=' Spnl (Quoted | Nonspacechar+))? Spnl
HtmlComment = "<!--" (!"-->" .)* "-->"
HtmlTag = '<' Spnl '/'? Alphanumeric+ Spnl HtmlAttribute* '/'? Spnl '>'
Eof = !.
Spacechar = ' ' | '\t'
Nonspacechar = !Spacechar !Newline .
Newline = '\n' | '\r' '\n'?
Sp = Spacechar*
Spnl = Sp (Newline Sp)?
SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '\\' | ExtendedSpecialChar
NormalChar = !( SpecialChar | Spacechar | Newline ) .
Alphanumeric = [A-Za-z0-9]
Digit = [0-9]
HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' >
DecEntity = < '&' '#' [0-9]+ > ';' >
CharEntity = < '&' [A-Za-z0-9]+ ';' >
OneStar = '*' !OneStar
OneUl = '_' !OneUl
TwoStar = "**" !TwoStar
TwoUl = "__" !TwoUl
NonindentSpace = " " | " " | " " | ""
Indent = "\t" | " "
IndentedLine = Indent Line
OptionallyIndentedLine = Indent? Line
# StartList starts a list data structure that can be added to with pushelt:
StartList = &.
{ $$.key = LIST; $$.children = NULL; }
Line = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof )
{ $$ = mk_str(yytext); }
SkipBlock = ( !BlankLine Line )+ BlankLine*
| BlankLine+
# Syntax extensions
ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"')
| &{ extension(EXT_NOTES) } ( '^' )
Smart = &{ extension(EXT_SMART) }
( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe )
Apostrophe = '\''
{ $$ = mk_element(APOSTROPHE); }
Ellipsis = ("..." | ". . .")
{ $$ = mk_element(ELLIPSIS); }
Dash = EmDash | EnDash
EnDash = '-' &Digit
{ $$ = mk_element(ENDASH); }
EmDash = Sp ("---" | "--") Sp
{ $$ = mk_element(EMDASH); }
SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric )
SingleQuoteEnd = '\'' !Alphanumeric
SingleQuoted = SingleQuoteStart
a:StartList
( !SingleQuoteEnd b:Inline { pushelt(b, &a); } )+