Skip to content

Commit

Permalink
Merge pull request #4 from KristopherGBaker/update-0.29.0
Browse files Browse the repository at this point in the history
Update to cmark-gfm 0.29.0
  • Loading branch information
KristopherGBaker committed Apr 20, 2019
2 parents d37611b + 047a148 commit 2abfe8c
Show file tree
Hide file tree
Showing 18 changed files with 958 additions and 1,046 deletions.
118 changes: 89 additions & 29 deletions Sources/libcmark_gfm/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
}

static bool S_last_line_checked(const cmark_node *node) {
return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
}

static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
return (cmark_node_type)node->type;
}
Expand All @@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
}

static void S_set_last_line_checked(cmark_node *node) {
node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
}

static CMARK_INLINE bool S_is_line_end_char(char c) {
return (c == '\n' || c == '\r');
}
Expand Down Expand Up @@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
parser->root = document;
parser->current = document;

parser->last_buffer_ended_with_cr = false;

parser->syntax_extensions = saved_exts;
parser->inline_syntax_extensions = saved_inline_exts;
parser->options = saved_options;
Expand Down Expand Up @@ -234,26 +240,43 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {

// Check to see if a node ends with a blank line, descending
// if needed into lists and sublists.
static bool ends_with_blank_line(cmark_node *node) {
cmark_node *cur = node;
while (cur != NULL) {
if (S_last_line_blank(cur)) {
return true;
}
if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) {
cur = cur->last_child;
} else {
cur = NULL;
}
static bool S_ends_with_blank_line(cmark_node *node) {
if (S_last_line_checked(node)) {
return(S_last_line_blank(node));
} else if ((S_type(node) == CMARK_NODE_LIST ||
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
S_set_last_line_checked(node);
return(S_ends_with_blank_line(node->last_child));
} else {
S_set_last_line_checked(node);
return (S_last_line_blank(node));
}
return false;
}

// returns true if content remains after link defs are resolved.
static bool resolve_reference_link_definitions(
cmark_parser *parser,
cmark_node *b) {
bufsize_t pos;
cmark_strbuf *node_content = &b->content;
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
while (chunk.len && chunk.data[0] == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
parser->refmap))) {

chunk.data += pos;
chunk.len -= pos;
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
return !is_blank(&b->content, 0);
}

static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
bufsize_t pos;
cmark_node *item;
cmark_node *subitem;
cmark_node *parent;
bool has_content;

parent = b->parent;
assert(b->flags &
Expand Down Expand Up @@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
{
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
while (chunk.len && chunk.data[0] == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {

chunk.data += pos;
chunk.len -= pos;
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
if (is_blank(node_content, 0)) {
has_content = resolve_reference_link_definitions(parser, b);
if (!has_content) {
// remove blank node (former reference def)
cmark_node_free(b);
}
Expand Down Expand Up @@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
// spaces between them:
subitem = item->first_child;
while (subitem) {
if (ends_with_blank_line(subitem) && (item->next || subitem->next)) {
if ((item->next || subitem->next) &&
S_ends_with_blank_line(subitem)) {
b->as.list.tight = false;
break;
}
Expand Down Expand Up @@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
}
}

// Check for thematic break. On failure, return 0 and update
// thematic_break_kill_pos with the index at which the
// parse fails. On success, return length of match.
// "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
bufsize_t offset) {
bufsize_t i;
char c;
char nextc = '\0';
int count;
i = offset;
c = peek_at(input, i);
if (!(c == '*' || c == '_' || c == '-')) {
parser->thematic_break_kill_pos = i;
return 0;
}
count = 1;
while ((nextc = peek_at(input, ++i))) {
if (nextc == c) {
count++;
} else if (nextc != ' ' && nextc != '\t') {
break;
}
}
if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
return (i - offset) + 1;
} else {
parser->thematic_break_kill_pos = i;
return 0;
}
}

// Find first nonspace character from current offset, setting
// parser->first_nonspace, parser->first_nonspace_column,
// parser->indent, and parser->blank. Does not advance parser->offset.
Expand Down Expand Up @@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
bufsize_t matched = 0;
int lev = 0;
bool save_partially_consumed_tab;
bool has_content;
int save_offset;
int save_column;

Expand Down Expand Up @@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
(lev =
scan_setext_heading_line(input, parser->first_nonspace))) {
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
(*container)->as.heading.level = lev;
(*container)->as.heading.setext = true;
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
// finalize paragraph, resolving reference links
has_content = resolve_reference_link_definitions(parser, *container);

if (has_content) {

(*container)->type = (uint16_t)CMARK_NODE_HEADING;
(*container)->as.heading.level = lev;
(*container)->as.heading.setext = true;
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
}
} else if (!indented &&
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
(matched = scan_thematic_break(input, parser->first_nonspace))) {
(parser->thematic_break_kill_pos <= parser->first_nonspace) &&
(matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
// it's only now that we know the line is not part of a setext heading:
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
parser->first_nonspace + 1);
Expand Down Expand Up @@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
parser->column = 0;
parser->first_nonspace = 0;
parser->first_nonspace_column = 0;
parser->thematic_break_kill_pos = 0;
parser->indent = 0;
parser->blank = false;
parser->partially_consumed_tab = false;
Expand Down
1 change: 0 additions & 1 deletion Sources/libcmark_gfm/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "config.h"
#include "cmark_ctype.h"
#include "buffer.h"
#include "memory.h"

/* Used as default value for cmark_strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
Expand Down
13 changes: 9 additions & 4 deletions Sources/libcmark_gfm/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
needs_escaping =
c < 0x80 && escape != LITERAL &&
((escape == NORMAL &&
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
(c < 0x20 ||
c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
Expand All @@ -50,14 +51,18 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));

if (needs_escaping) {
if (cmark_isspace((char)c)) {
if (escape == URL && cmark_isspace((char)c)) {
// use percent encoding for spaces
snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += 3;
} else {
} else if (cmark_ispunct((char)c)) {
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
} else { // render as entity
snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += (int)strlen(encoded);
}
} else {
cmark_render_code_point(renderer, c);
Expand Down
8 changes: 4 additions & 4 deletions Sources/libcmark_gfm/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if (!(!(options & CMARK_OPT_UNSAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
Expand All @@ -372,8 +372,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if (!(!(options & CMARK_OPT_UNSAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
Expand Down
1 change: 0 additions & 1 deletion Sources/libcmark_gfm/include/chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <assert.h>
#include "cmark-gfm.h"
#include "buffer.h"
//#include "memory.h"
#include "cmark_ctype.h"

#define CMARK_CHUNK_EMPTY \
Expand Down
3 changes: 3 additions & 0 deletions Sources/libcmark_gfm/include/cmark-gfm-core-extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
CMARK_GFM_EXTENSIONS_EXPORT
int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);

CMARK_GFM_EXTENSIONS_EXPORT
char *cmark_gfm_extensions_get_tasklist_state(cmark_node *node);

#ifdef __cplusplus
}
#endif
Expand Down
22 changes: 14 additions & 8 deletions Sources/libcmark_gfm/include/cmark-gfm.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,20 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)

/** `CMARK_OPT_SAFE` is defined here for API compatibility,
but it no longer has any effect. "Safe" mode is now the default:
set `CMARK_OPT_UNSAFE` to disable it.
*/
#define CMARK_OPT_SAFE (1 << 3)

/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). By default,
* raw HTML is replaced by a placeholder HTML comment. Unsafe
* links are replaced by empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)

/** Render `softbreak` elements as spaces.
*/
#define CMARK_OPT_NOBREAKS (1 << 4)
Expand Down Expand Up @@ -738,14 +752,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
*/
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)

/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
* all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
* or `image/webp` mime types are allowed. Without this option, raw HTML is
* replaced by a placeholder HTML comment, and unsafe links are replaced by
* empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)

/**
* ## Version information
*/
Expand Down
1 change: 0 additions & 1 deletion Sources/libcmark_gfm/include/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ extern "C" {
#endif

#include "cmark-gfm.h"
//#include "memory.h"

typedef struct {
cmark_event_type ev_type;
Expand Down
1 change: 0 additions & 1 deletion Sources/libcmark_gfm/include/map.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#ifndef CMARK_MAP_H
#define CMARK_MAP_H

//#include "memory.h"
#include "chunk.h"

#ifdef __cplusplus
Expand Down
1 change: 1 addition & 0 deletions Sources/libcmark_gfm/include/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ typedef struct {
enum cmark_node__internal_flags {
CMARK_NODE__OPEN = (1 << 0),
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
};

struct cmark_node {
Expand Down
3 changes: 2 additions & 1 deletion Sources/libcmark_gfm/include/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#define CMARK_PARSER_H

#include <stdio.h>
#include "references.h"
#include "node.h"
#include "buffer.h"
//#include "memory.h"

#ifdef __cplusplus
extern "C" {
Expand All @@ -30,6 +30,7 @@ struct cmark_parser {
bufsize_t first_nonspace;
/* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */
bufsize_t first_nonspace_column;
bufsize_t thematic_break_kill_pos;
/* See the documentation for cmark_parser_get_indent() in cmark.h */
int indent;
/* See the documentation for cmark_parser_is_blank() in cmark.h */
Expand Down
1 change: 0 additions & 1 deletion Sources/libcmark_gfm/include/render.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ extern "C" {
#include <stdlib.h>
#include "buffer.h"
#include "chunk.h"
//#include "memory.h"

typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;

Expand Down
2 changes: 0 additions & 2 deletions Sources/libcmark_gfm/include/scanners.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ bufsize_t _scan_link_title(const unsigned char *p);
bufsize_t _scan_spacechars(const unsigned char *p);
bufsize_t _scan_atx_heading_start(const unsigned char *p);
bufsize_t _scan_setext_heading_line(const unsigned char *p);
bufsize_t _scan_thematic_break(const unsigned char *p);
bufsize_t _scan_open_code_fence(const unsigned char *p);
bufsize_t _scan_close_code_fence(const unsigned char *p);
bufsize_t _scan_entity(const unsigned char *p);
Expand All @@ -50,7 +49,6 @@ bufsize_t _scan_footnote_definition(const unsigned char *p);
#define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
#define scan_setext_heading_line(c, n) \
_scan_at(&_scan_setext_heading_line, c, n)
#define scan_thematic_break(c, n) _scan_at(&_scan_thematic_break, c, n)
#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
#define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
Expand Down

0 comments on commit 2abfe8c

Please sign in to comment.