Skip to content

Commit

Permalink
Detect normalized-out boldness in headings
Browse files Browse the repository at this point in the history
Fixes #113 by looking for headings with a mix of children with normal/400 weight and no set weight, then updating the children with no set weight to be bold.
  • Loading branch information
Mr0grog committed Feb 3, 2024
1 parent d0bda0e commit f899339
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
16 changes: 16 additions & 0 deletions lib/css.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ function getResolvedStyleProperty(propertyName, node, ancestors) {
return getResolvedStyleProperty(propertyName, parent, parentAncestors);
}

/**
* Set a local style property on a node. Later calls to look up the node's
* styles or resolved styles will use the new property value.
* @param {string} propertyName
* @param {string} propertyValue
* @param {RehypeNode} node
*/
function setStyleProperty(propertyName, propertyValue, node) {
const style = getNodeStyle(node);
style[propertyName] = propertyValue;
}

/**
* Get an object with properties representing a node's fully resolved styles,
* including anything inherited from ancestors.
Expand All @@ -120,5 +132,9 @@ export function resolveNodeStyle(node, ancestors) {
}
return target[property];
},
set(_target, property, value, _receiver) {
setStyleProperty(property, value, node);
return value;
},
});
}
50 changes: 50 additions & 0 deletions lib/fix-google-html.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ const blockElements = new Set([
'ul',
]);

const headingElements = new Set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']);

// These elements convert to Markdown nodes that can't start or end with spaces.
// For example, you can't start emphasis with a space: `This * is emphasized*`.
const spaceSensitiveElements = new Set(['em', 'strong']);
Expand All @@ -72,6 +74,7 @@ const isSpaceSensitive = (node) =>
node && spaceSensitiveElements.has(node.tagName);
const isCell = (node) => node.tagName === 'th' || node.tagName === 'td';
const isAnchor = (node) => node.tagName === 'a';
const isHeading = (node) => headingElements.has(node?.tagName);

const spaceAtStartPattern = /^(\s+)/;
const spaceAtEndPattern = /(\s+)$/;
Expand Down Expand Up @@ -667,13 +670,60 @@ function fixInternalLinks(node, sliceClip) {
);
}

/**
* Mark spans of bold-formatted text in headings as bold.
*
* In some browsers, spans of bold-formatted text in a heading get their
* bold-ness normalized out (because the heading itself is bold, so the
* bold-ness of text in it can be inherited from the heading itself). This
* isn't universal, though, since not all browsers format headings as bold by
* default (they also may not all do the same normalizations), so we can't just
* infer that text without a `font-weight` is bold.
*
* Instead, this looks for a heading with children that are a mix of explicit
* font-weight and non-explicit font-weight, and adds font-weights back to the
* ones that are missing. This means that *all* bold won't get marked up as
* bold, which could be good or bad depending on your perspective.
*
* @param {RehypeNode} node Fix the tree below this node
*/
function fixBoldInHeadings(node) {
visitParents(node, isHeading, (node, parents) => {
const childrenParents = [...parents, node];
let hasNormalChildren = false;
let hasBoldChildren = false;
let unknownChildrenStyles = [];

// For Google Docs, we are only concerned about immediate children.
for (const child of node.children) {
const style = resolveNodeStyle(child, childrenParents);

const weight = style['font-weight'];
if (!weight) {
unknownChildrenStyles.push(style);
} else if (weight === 'bold' || weight === '700') {
hasBoldChildren = true;
} else if (weight === 'normal' || weight === '400') {
hasNormalChildren = true;
}
}

if (hasNormalChildren && !hasBoldChildren) {
for (const style of unknownChildrenStyles) {
style['font-weight'] = '700';
}
}
});
}

/**
* A rehype plugin to clean up the HTML of a Google Doc. .This applies to the
* live HTML of Doc, as when you copy and paste it; not *exported* HTML (it
* might apply there, too; I haven’t looked into it).
*/
export default function fixGoogleHtml() {
return (tree, _file) => {
fixBoldInHeadings(tree);
unInlineStyles(tree);
createCodeBlocks(tree);
moveSpaceOutsideSensitiveChildren(tree);
Expand Down

0 comments on commit f899339

Please sign in to comment.