Skip to content

Commit

Permalink
fix(frontend): replace markdown with html when update ES (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
lionelB committed Nov 17, 2020
1 parent ded694f commit 25f3143
Show file tree
Hide file tree
Showing 6 changed files with 456 additions and 10 deletions.
8 changes: 8 additions & 0 deletions targets/frontend/package.json
Expand Up @@ -30,6 +30,7 @@
"http-proxy-middleware": "^1.0.6",
"isomorphic-unfetch": "^3.1.0",
"jsonwebtoken": "^8.5.1",
"memoizee": "^0.4.14",
"next": "^10.0.1",
"next-transpile-modules": "^4.1.0",
"next-urql": "^2.1.1",
Expand All @@ -43,10 +44,17 @@
"react-icons": "^3.11.0",
"react-is": "^16.13.1",
"react-sortable-hoc": "^1.11.0",
"rehype-raw": "^5.0.0",
"rehype-stringify": "^8.0.0",
"remark-parse": "^9.0.0",
"remark-rehype": "^8.0.0",
"remark-stringify": "^9.0.0",
"semver": "^7.3.2",
"sentry-testkit": "^3.2.1",
"strip-markdown": "^4.0.0",
"styled-components": "^5.2.1",
"theme-ui": "^0.3.1",
"unified": "^9.2.0",
"unist-util-parents": "^1.0.3",
"unist-util-select": "^3.0.2",
"urql": "^1.11.1",
Expand Down
89 changes: 89 additions & 0 deletions targets/frontend/src/lib/preview/glossary.js
@@ -0,0 +1,89 @@
const conventionMatchers = [
"convention collective",
"conventions collectives",
"accords de branches",
"accord de branche",
"disposition conventionnelle",
"dispositions conventionnelles",
];

// we cannot use \b word boundary since \w does not match diacritics
// So we do a kind of \b equivalent.
// the main différence is that matched pattern can include a whitespace as first char
const frDiacritics = "àâäçéèêëïîôöùûüÿœæÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸŒÆ";
const wordBoundaryStart = `(?:^|[^_/\\w${frDiacritics}-])`;
const wordBoundaryEnd = `(?![\\w${frDiacritics}])`;

const startTag = `(?<=>[^><]*)`;
const endTag = `(?=[^<]*</)`;

export function addGlossary(entries, htmlContent) {
if (!htmlContent) return "";

let idHtmlContent = htmlContent;

let glossary = [];
entries.forEach(({ abbrs, definition, title, variants }) => {
glossary = glossary.concat(
[title, ...variants].map((term) => ({
definition,
pattern: new RegExp(
`${startTag}${wordBoundaryStart}(${term})${wordBoundaryEnd}${endTag}`,
"gi"
),
term,
}))
);
if (abbrs) {
glossary.push({
definition,
pattern: new RegExp(`${startTag}\\b(${abbrs})\\b${endTag}`, "g"),
term: abbrs,
});
}
});

// we make sure that bigger terms are replaced first
glossary.sort((previous, next) => {
return next.term.length - previous.term.length;
});

// we also sure that cc matchers are replaced first
conventionMatchers.forEach((matcher) => {
glossary.unshift({
definition: false,
pattern: new RegExp(`${startTag}(${matcher})${endTag}`, "gi"),
term: matcher,
});
});

const idToWebComponent = new Map();

glossary.forEach(({ definition, pattern, term }, index) => {
// while we loop, we replace the matches with an id to prevent nested matches
idHtmlContent = idHtmlContent.replace(pattern, function (
match // contains the matching term with the word boundaries
) {
const id = "__tt__" + index;
const webComponent = definition
? `<webcomponent-tooltip content="${encodeURIComponent(
definition.replace(/'/g, "’").replace("<p>", "").replace("</p>", "")
)}">${term}</webcomponent-tooltip>`
: `<webcomponent-tooltip-cc>${term}</webcomponent-tooltip-cc>`;
idToWebComponent.set(id, webComponent);
return match.replace(new RegExp(term), id);
});
});

// In the end, we replace the id with its related component
let finalContent = idHtmlContent;
idToWebComponent.forEach((webComponent, id) => {
// make sure we don't match larger numbers
finalContent = finalContent.replace(
new RegExp(`${id}([^1-9])`, "g"),
`${webComponent}$1`
);
});

return finalContent;
}
45 changes: 45 additions & 0 deletions targets/frontend/src/lib/preview/markdown.js
@@ -0,0 +1,45 @@
import htmlAstToAnotherHtmlAst from "rehype-raw";
import htmlAstStringify from "rehype-stringify";
import markdownToMardownAst from "remark-parse";
import markdownAstToHtmlAst from "remark-rehype";
import markdownAstStringify from "remark-stringify";
import markdownAstStrip from "strip-markdown";
import unified from "unified";

import { addGlossary } from "./glossary";

const textProcessor = unified()
.use(markdownToMardownAst)
.use(markdownAstStrip)
.use(markdownAstStringify);

const htmlProcessor = unified()
.use(markdownToMardownAst)
.use(markdownAstToHtmlAst, { allowDangerousHtml: true })
.use(htmlAstToAnotherHtmlAst)
.use(htmlAstStringify);

export function markdownTransform(glossary, document) {
document.intro = addGlossary(
glossary,
htmlProcessor.processSync(document.intro).contents
);

document.contents.forEach((content) => {
content.html = addGlossary(
glossary,
htmlProcessor.processSync(content.markdown).contents
);
delete content.markdown;
});

document.text =
textProcessor.processSync(document.intro) +
document.contents
.map(({ markdown }) =>
textProcessor.processSync(markdown).contents.replace(/\s\s+/g, " ")
)
.join("");

return document;
}
46 changes: 44 additions & 2 deletions targets/frontend/src/pages/api/preview.js
@@ -1,22 +1,55 @@
import { Client } from "@elastic/elasticsearch";
import { client as gqlClient } from "@shared/graphql-client";
import { SOURCES } from "@socialgouv/cdtn-sources";
import memoizee from "memoizee";
import { markdownTransform } from "src/lib/preview/markdown";

const getGlossary = `
query getGlossary {
glossary(order_by: {term: asc}) {
abbreviations
definition
id
references
term
variants
}
}
`;

async function _fetchGlossary() {
const result = await gqlClient.query(getGlossary).toPromise();
if (result.error) {
console.error("[fetchGlossary]", result.error);
throw result.error;
}
return result.data.glossary;
}
const fetchGlossary = memoizee(_fetchGlossary, {
maxAge: 1000 * 5 * 60,
preFetch: true,
promise: true,
});

export default async function (req, res) {
if (!process.env.ELASTICSEARCH_API_KEY || !process.env.ELASTICSEARCH_URL) {
res.status(304).json({ message: "not modified" });
}

const glossary = await fetchGlossary();

const client = new Client({
auth: {
apiKey: process.env.ELASTICSEARCH_API_KEY,
},
node: `${process.env.ELASTICSEARCH_URL}`,
});

const { cdtnId, document } = req.body;
const { cdtnId, source, document } = req.body;
try {
await client.update({
body: {
doc: document,
doc: await transform(source, document, glossary),
},
id: cdtnId,
index: `cdtn-master_documents`,
Expand All @@ -27,3 +60,12 @@ export default async function (req, res) {
res.status(response.statusCode).json({ message: response.body.error });
}
}

async function transform(source, document, glossary) {
switch (source) {
case SOURCES.EDITORIAL_CONTENT:
return markdownTransform(glossary, document);
default:
return document;
}
}
16 changes: 13 additions & 3 deletions targets/frontend/src/pages/contenus/[id].js
Expand Up @@ -48,7 +48,7 @@ mutation updateDocument($cdtnId: String!, $metaDescription: String!, $title: Str
cdtn_id: $cdtnId
}
){
cdtnId:cdtn_id, title, metaDescription: meta_description, document
cdtnId:cdtn_id, title, source, metaDescription: meta_description, document
}
}`;

Expand Down Expand Up @@ -81,9 +81,19 @@ export function DocumentsPage() {
metaDescription: jsonDoc.current.meta_description,
title: jsonDoc.current.title,
}).then(({ data }) => {
const { cdtnId, title, metaDescription, document } = data.document;
const {
cdtnId,
title,
source,
metaDescription,
document,
} = data.document;
request("/api/preview", {
body: { cdtnId, document: { ...document, metaDescription, title } },
body: {
cdtnId,
document: { ...document, metaDescription, title },
source,
},
});
if (result.error) {
console.error(result.error);
Expand Down

0 comments on commit 25f3143

Please sign in to comment.