fix(frontend): replace markdown with html when update ES (#179)

SocialGouv · Nov 17, 2020 · 25f3143 · 25f3143
1 parent ded694f
commit 25f3143
Show file tree

Hide file tree

Showing 6 changed files with 456 additions and 10 deletions.
diff --git a/targets/frontend/package.json b/targets/frontend/package.json
@@ -30,6 +30,7 @@
     "http-proxy-middleware": "^1.0.6",
     "isomorphic-unfetch": "^3.1.0",
     "jsonwebtoken": "^8.5.1",
+    "memoizee": "^0.4.14",
     "next": "^10.0.1",
     "next-transpile-modules": "^4.1.0",
     "next-urql": "^2.1.1",
@@ -43,10 +44,17 @@
     "react-icons": "^3.11.0",
     "react-is": "^16.13.1",
     "react-sortable-hoc": "^1.11.0",
+    "rehype-raw": "^5.0.0",
+    "rehype-stringify": "^8.0.0",
+    "remark-parse": "^9.0.0",
+    "remark-rehype": "^8.0.0",
+    "remark-stringify": "^9.0.0",
     "semver": "^7.3.2",
     "sentry-testkit": "^3.2.1",
+    "strip-markdown": "^4.0.0",
     "styled-components": "^5.2.1",
     "theme-ui": "^0.3.1",
+    "unified": "^9.2.0",
     "unist-util-parents": "^1.0.3",
     "unist-util-select": "^3.0.2",
     "urql": "^1.11.1",

diff --git a/targets/frontend/src/lib/preview/glossary.js b/targets/frontend/src/lib/preview/glossary.js
@@ -0,0 +1,89 @@
+const conventionMatchers = [
+  "convention collective",
+  "conventions collectives",
+  "accords de branches",
+  "accord de branche",
+  "disposition conventionnelle",
+  "dispositions conventionnelles",
+];
+
+// we cannot use \b word boundary since \w does not match diacritics
+// So we do a kind of \b equivalent.
+// the main différence is that matched pattern can include a whitespace as first char
+const frDiacritics = "àâäçéèêëïîôöùûüÿœæÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸŒÆ";
+const wordBoundaryStart = `(?:^|[^_/\\w${frDiacritics}-])`;
+const wordBoundaryEnd = `(?![\\w${frDiacritics}])`;
+
+const startTag = `(?<=>[^><]*)`;
+const endTag = `(?=[^<]*</)`;
+
+export function addGlossary(entries, htmlContent) {
+  if (!htmlContent) return "";
+
+  let idHtmlContent = htmlContent;
+
+  let glossary = [];
+  entries.forEach(({ abbrs, definition, title, variants }) => {
+    glossary = glossary.concat(
+      [title, ...variants].map((term) => ({
+        definition,
+        pattern: new RegExp(
+          `${startTag}${wordBoundaryStart}(${term})${wordBoundaryEnd}${endTag}`,
+          "gi"
+        ),
+        term,
+      }))
+    );
+    if (abbrs) {
+      glossary.push({
+        definition,
+        pattern: new RegExp(`${startTag}\\b(${abbrs})\\b${endTag}`, "g"),
+        term: abbrs,
+      });
+    }
+  });
+
+  // we make sure that bigger terms are replaced first
+  glossary.sort((previous, next) => {
+    return next.term.length - previous.term.length;
+  });
+
+  // we also sure that cc matchers are replaced first
+  conventionMatchers.forEach((matcher) => {
+    glossary.unshift({
+      definition: false,
+      pattern: new RegExp(`${startTag}(${matcher})${endTag}`, "gi"),
+      term: matcher,
+    });
+  });
+
+  const idToWebComponent = new Map();
+
+  glossary.forEach(({ definition, pattern, term }, index) => {
+    // while we loop, we replace the matches with an id to prevent nested matches
+    idHtmlContent = idHtmlContent.replace(pattern, function (
+      match // contains the matching term with the word boundaries
+    ) {
+      const id = "__tt__" + index;
+      const webComponent = definition
+        ? `<webcomponent-tooltip content="${encodeURIComponent(
+            definition.replace(/'/g, "’").replace("<p>", "").replace("</p>", "")
+          )}">${term}</webcomponent-tooltip>`
+        : `<webcomponent-tooltip-cc>${term}</webcomponent-tooltip-cc>`;
+      idToWebComponent.set(id, webComponent);
+      return match.replace(new RegExp(term), id);
+    });
+  });
+
+  // In the end, we replace the id with its related component
+  let finalContent = idHtmlContent;
+  idToWebComponent.forEach((webComponent, id) => {
+    // make sure we don't match larger numbers
+    finalContent = finalContent.replace(
+      new RegExp(`${id}([^1-9])`, "g"),
+      `${webComponent}$1`
+    );
+  });
+
+  return finalContent;
+}
diff --git a/targets/frontend/src/lib/preview/markdown.js b/targets/frontend/src/lib/preview/markdown.js
@@ -0,0 +1,45 @@
+import htmlAstToAnotherHtmlAst from "rehype-raw";
+import htmlAstStringify from "rehype-stringify";
+import markdownToMardownAst from "remark-parse";
+import markdownAstToHtmlAst from "remark-rehype";
+import markdownAstStringify from "remark-stringify";
+import markdownAstStrip from "strip-markdown";
+import unified from "unified";
+
+import { addGlossary } from "./glossary";
+
+const textProcessor = unified()
+  .use(markdownToMardownAst)
+  .use(markdownAstStrip)
+  .use(markdownAstStringify);
+
+const htmlProcessor = unified()
+  .use(markdownToMardownAst)
+  .use(markdownAstToHtmlAst, { allowDangerousHtml: true })
+  .use(htmlAstToAnotherHtmlAst)
+  .use(htmlAstStringify);
+
+export function markdownTransform(glossary, document) {
+  document.intro = addGlossary(
+    glossary,
+    htmlProcessor.processSync(document.intro).contents
+  );
+
+  document.contents.forEach((content) => {
+    content.html = addGlossary(
+      glossary,
+      htmlProcessor.processSync(content.markdown).contents
+    );
+    delete content.markdown;
+  });
+
+  document.text =
+    textProcessor.processSync(document.intro) +
+    document.contents
+      .map(({ markdown }) =>
+        textProcessor.processSync(markdown).contents.replace(/\s\s+/g, " ")
+      )
+      .join("");
+
+  return document;
+}
diff --git a/targets/frontend/src/pages/api/preview.js b/targets/frontend/src/pages/api/preview.js
@@ -1,22 +1,55 @@
 import { Client } from "@elastic/elasticsearch";
+import { client as gqlClient } from "@shared/graphql-client";
+import { SOURCES } from "@socialgouv/cdtn-sources";
+import memoizee from "memoizee";
+import { markdownTransform } from "src/lib/preview/markdown";
+
+const getGlossary = `
+query getGlossary {
+  glossary(order_by: {term: asc}) {
+    abbreviations
+    definition
+    id
+    references
+    term
+    variants
+  }
+}
+`;
+
+async function _fetchGlossary() {
+  const result = await gqlClient.query(getGlossary).toPromise();
+  if (result.error) {
+    console.error("[fetchGlossary]", result.error);
+    throw result.error;
+  }
+  return result.data.glossary;
+}
+const fetchGlossary = memoizee(_fetchGlossary, {
+  maxAge: 1000 * 5 * 60,
+  preFetch: true,
+  promise: true,
+});
 
 export default async function (req, res) {
   if (!process.env.ELASTICSEARCH_API_KEY || !process.env.ELASTICSEARCH_URL) {
     res.status(304).json({ message: "not modified" });
   }
 
+  const glossary = await fetchGlossary();
+
   const client = new Client({
     auth: {
       apiKey: process.env.ELASTICSEARCH_API_KEY,
     },
     node: `${process.env.ELASTICSEARCH_URL}`,
   });
 
-  const { cdtnId, document } = req.body;
+  const { cdtnId, source, document } = req.body;
   try {
     await client.update({
       body: {
-        doc: document,
+        doc: await transform(source, document, glossary),
       },
       id: cdtnId,
       index: `cdtn-master_documents`,
@@ -27,3 +60,12 @@ export default async function (req, res) {
     res.status(response.statusCode).json({ message: response.body.error });
   }
 }
+
+async function transform(source, document, glossary) {
+  switch (source) {
+    case SOURCES.EDITORIAL_CONTENT:
+      return markdownTransform(glossary, document);
+    default:
+      return document;
+  }
+}
diff --git a/targets/frontend/src/pages/contenus/[id].js b/targets/frontend/src/pages/contenus/[id].js
@@ -48,7 +48,7 @@ mutation updateDocument($cdtnId: String!, $metaDescription: String!, $title: Str
       cdtn_id: $cdtnId
     }
   ){
-    cdtnId:cdtn_id, title, metaDescription: meta_description, document
+    cdtnId:cdtn_id, title, source, metaDescription: meta_description, document
   }
 }`;
 
@@ -81,9 +81,19 @@ export function DocumentsPage() {
       metaDescription: jsonDoc.current.meta_description,
       title: jsonDoc.current.title,
     }).then(({ data }) => {
-      const { cdtnId, title, metaDescription, document } = data.document;
+      const {
+        cdtnId,
+        title,
+        source,
+        metaDescription,
+        document,
+      } = data.document;
       request("/api/preview", {
-        body: { cdtnId, document: { ...document, metaDescription, title } },
+        body: {
+          cdtnId,
+          document: { ...document, metaDescription, title },
+          source,
+        },
       });
       if (result.error) {
         console.error(result.error);