Skip to content

Commit

Permalink
normalize links (#1037)
Browse files Browse the repository at this point in the history
* centralize md instance as part of the normalized configuration

* move test

* normalize links with .html is cleanUrls is false (defaults to true)

* document cleanUrls

* makeLinkNormalizer

* simpler link normalization

* server-side normalization

* parse instead of regex

* fix directory link normalization

* docs; version badge

* shorter

---------

Co-authored-by: Mike Bostock <mbostock@gmail.com>
  • Loading branch information
Fil and mbostock committed Mar 12, 2024
1 parent a94ce04 commit 106c0f5
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 91 deletions.
4 changes: 4 additions & 0 deletions docs/config.md
Expand Up @@ -167,6 +167,10 @@ export default {

The base path when serving the site. Currently this only affects the custom 404 page, if any.

## cleanUrls <a href="https://github.com/observablehq/framework/pull/1037" target="_blank" class="observablehq-version-badge" data-version="prerelease" title="Added in #1037"></a>

Whether page links should be “clean”, _i.e._, formatted without a `.html` extension. Defaults to true. If true, a link to `config.html` will be formatted as `config`. Regardless of this setting, a link to an index page will drop the implied `index.html`; for example `foo/index.html` will be formatted as `foo/`.

## toc

The table of contents configuration.
Expand Down
38 changes: 37 additions & 1 deletion src/markdown.ts
@@ -1,5 +1,6 @@
/* eslint-disable import/no-named-as-default-member */
import {createHash} from "node:crypto";
import {extname} from "node:path/posix";
import matter from "gray-matter";
import he from "he";
import MarkdownIt from "markdown-it";
Expand Down Expand Up @@ -275,13 +276,47 @@ function makeSoftbreakRenderer(baseRenderer: RenderRule): RenderRule {
};
}

export function parseRelativeUrl(url: string): {pathname: string; search: string; hash: string} {
let search: string;
let hash: string;
const i = url.indexOf("#");
if (i < 0) hash = "";
else (hash = url.slice(i)), (url = url.slice(0, i));
const j = url.indexOf("?");
if (j < 0) search = "";
else (search = url.slice(j)), (url = url.slice(0, j));
return {pathname: url, search, hash};
}

export function makeLinkNormalizer(baseNormalize: (url: string) => string, clean: boolean): (url: string) => string {
return (url) => {
// Only clean relative links; ignore e.g. "https:" links.
if (!/^\w+:/.test(url)) {
const u = parseRelativeUrl(url);
let {pathname} = u;
if (pathname && !pathname.endsWith("/") && !extname(pathname)) pathname += ".html";
if (pathname === "index.html") pathname = ".";
else if (pathname.endsWith("/index.html")) pathname = pathname.slice(0, -"index.html".length);
else if (clean) pathname = pathname.replace(/\.html$/, "");
url = pathname + u.search + u.hash;
}
return baseNormalize(url);
};
}

export interface ParseOptions {
md: MarkdownIt;
path: string;
style?: Config["style"];
}

export function createMarkdownIt({markdownIt}: {markdownIt?: (md: MarkdownIt) => MarkdownIt} = {}): MarkdownIt {
export function createMarkdownIt({
markdownIt,
cleanUrls = true
}: {
markdownIt?: (md: MarkdownIt) => MarkdownIt;
cleanUrls?: boolean;
} = {}): MarkdownIt {
const md = MarkdownIt({html: true, linkify: true});
md.linkify.set({fuzzyLink: false, fuzzyEmail: false});
md.use(MarkdownItAnchor, {permalink: MarkdownItAnchor.permalink.headerLink({class: "observablehq-header-anchor"})});
Expand All @@ -290,6 +325,7 @@ export function createMarkdownIt({markdownIt}: {markdownIt?: (md: MarkdownIt) =>
md.renderer.rules.placeholder = makePlaceholderRenderer();
md.renderer.rules.fence = makeFenceRenderer(md.renderer.rules.fence!);
md.renderer.rules.softbreak = makeSoftbreakRenderer(md.renderer.rules.softbreak!);
md.normalizeLink = makeLinkNormalizer(md.normalizeLink, cleanUrls);
return markdownIt === undefined ? md : markdownIt(md);
}

Expand Down
61 changes: 17 additions & 44 deletions src/preview.ts
@@ -1,10 +1,10 @@
import {createHash} from "node:crypto";
import {watch} from "node:fs";
import type {FSWatcher, WatchEventType} from "node:fs";
import {access, constants, readFile, stat} from "node:fs/promises";
import {access, constants, readFile} from "node:fs/promises";
import {createServer} from "node:http";
import type {IncomingMessage, RequestListener, Server, ServerResponse} from "node:http";
import {basename, dirname, extname, join, normalize} from "node:path/posix";
import {basename, dirname, join, normalize} from "node:path/posix";
import {difference} from "d3-array";
import type {PatchItem} from "fast-array-diff";
import {getPatch} from "fast-array-diff";
Expand Down Expand Up @@ -152,55 +152,28 @@ export class PreviewServer {
throw new HttpError(`Not found: ${pathname}`, 404);
} else {
if ((pathname = normalize(pathname)).startsWith("..")) throw new Error("Invalid path: " + pathname);
let path = join(root, pathname);

// If this path is for /index, redirect to the parent directory for a
// tidy path. (This must be done before implicitly adding /index below!)
// Respect precedence of dir/index.md over dir.md in choosing between
// dir/ and dir!
if (basename(path, ".html") === "index") {
try {
await stat(join(dirname(path), "index.md"));
res.writeHead(302, {Location: join(dirname(pathname), "/") + url.search});
res.end();
return;
} catch (error) {
if (!isEnoent(error)) throw error;
res.writeHead(302, {Location: dirname(pathname) + url.search});
res.end();
return;
}
}

// If this path resolves to a directory, then add an implicit /index to
// the end of the path, assuming that the corresponding index.md exists.
try {
if ((await stat(path)).isDirectory() && (await stat(join(path, "index.md"))).isFile()) {
if (!pathname.endsWith("/")) {
res.writeHead(302, {Location: pathname + "/" + url.search});
res.end();
return;
}
pathname = join(pathname, "index");
path = join(path, "index");
}
} catch (error) {
if (!isEnoent(error)) throw error; // internal error
}

// If this path ends with .html, then redirect to drop the .html. TODO:
// Check for the existence of the .md file first.
if (extname(path) === ".html") {
res.writeHead(302, {Location: join(dirname(pathname), basename(pathname, ".html")) + url.search});
// Normalize the pathname (e.g., dropping ".html").
const normalizedPathname = config.md.normalizeLink(pathname);
if (pathname !== normalizedPathname) {
res.writeHead(302, {Location: normalizedPathname + url.search});
res.end();
return;
}

// Otherwise, serve the corresponding Markdown file, if it exists.
// If this path ends with a slash, then add an implicit /index to the
// end of the path.
let path = join(root, pathname);
if (pathname.endsWith("/")) {
pathname = join(pathname, "index");
path = join(path, "index");
}

// Lastly, serve the corresponding Markdown file, if it exists.
// Anything else should 404; static files should be matched above.
try {
const options = {path: pathname, ...config, preview: true};
const source = await readFile(path + ".md", "utf8");
const source = await readFile(join(dirname(path), basename(path, ".html") + ".md"), "utf8");
const parse = parseMarkdown(source, options);
const html = await renderPage(parse, options);
end(req, res, html, "text/html");
Expand Down Expand Up @@ -365,7 +338,7 @@ function handleWatch(socket: WebSocket, req: IncomingMessage, config: Config) {
path = decodeURIComponent(initialPath);
if (!(path = normalize(path)).startsWith("/")) throw new Error("Invalid path: " + initialPath);
if (path.endsWith("/")) path += "index";
path += ".md";
path = join(dirname(path), basename(path, ".html") + ".md");
const source = await readFile(join(root, path), "utf8");
const page = parseMarkdown(source, {path, ...config});
const resolvers = await getResolvers(page, {root, path});
Expand Down
64 changes: 35 additions & 29 deletions src/render.ts
Expand Up @@ -24,10 +24,12 @@ type RenderInternalOptions =
| {preview: true}; // preview

export async function renderPage(page: MarkdownPage, options: RenderOptions & RenderInternalOptions): Promise<string> {
const {root, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options;
const sidebar = page.data?.sidebar !== undefined ? Boolean(page.data.sidebar) : options.sidebar;
const toc = mergeToc(page.data?.toc, options.toc);
const draft = Boolean(page.data?.draft);
const {data} = page;
const {root, md, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options;
const {normalizeLink} = md;
const sidebar = data?.sidebar !== undefined ? Boolean(data.sidebar) : options.sidebar;
const toc = mergeToc(data?.toc, options.toc);
const draft = Boolean(data?.draft);
const {files, resolveFile, resolveImport} = resolvers;
return String(html`<!DOCTYPE html>
<meta charset="utf-8">${path === "/404" ? html`\n<base href="${preview ? "/" : base}">` : ""}
Expand Down Expand Up @@ -55,29 +57,29 @@ if (location.pathname.endsWith("/")) {
import ${preview || page.code.length ? `{${preview ? "open, " : ""}define} from ` : ""}${JSON.stringify(
resolveImport("observablehq:client")
)};${
files.size || page.data?.sql
? `\nimport {registerFile${page.data?.sql ? ", FileAttachment" : ""}} from ${JSON.stringify(
files.size || data?.sql
? `\nimport {registerFile${data?.sql ? ", FileAttachment" : ""}} from ${JSON.stringify(
resolveImport("observablehq:stdlib")
)};`
: ""
}${data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""}${
files.size ? `\n${renderFiles(files, resolveFile)}` : ""
}${
page.data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""
}${files.size ? `\n${renderFiles(files, resolveFile)}` : ""}${
page.data?.sql
? `\n${Object.entries<string>(page.data.sql)
data?.sql
? `\n${Object.entries<string>(data.sql)
.map(([name, source]) => `registerTable(${JSON.stringify(name)}, FileAttachment(${JSON.stringify(source)}));`)
.join("\n")}`
: ""
}
${preview ? `\nopen({hash: ${JSON.stringify(resolvers.hash)}, eval: (body) => eval(body)});\n` : ""}${page.code
.map(({node, id}) => `\n${transpileJavaScript(node, {id, resolveImport})}`)
.join("")}`)}
</script>${sidebar ? html`\n${await renderSidebar(title, pages, root, path, search)}` : ""}${
</script>${sidebar ? html`\n${await renderSidebar(title, pages, root, path, search, normalizeLink)}` : ""}${
toc.show ? html`\n${renderToc(findHeaders(page), toc.label)}` : ""
}
<div id="observablehq-center">${renderHeader(options, page.data)}
<div id="observablehq-center">${renderHeader(options, data)}
<main id="observablehq-main" class="observablehq${draft ? " observablehq--draft" : ""}">
${html.unsafe(rewriteHtml(page.html, resolvers.resolveFile))}</main>${renderFooter(path, options, page.data)}
${html.unsafe(rewriteHtml(page.html, resolvers.resolveFile))}</main>${renderFooter(path, options, data, normalizeLink)}
</div>
`);
}
Expand All @@ -102,7 +104,8 @@ async function renderSidebar(
pages: (Page | Section)[],
root: string,
path: string,
search: boolean
search: boolean,
normalizeLink: (href: string) => string
): Promise<Html> {
return html`<input id="observablehq-sidebar-toggle" type="checkbox" title="Toggle sidebar">
<label id="observablehq-sidebar-backdrop" for="observablehq-sidebar-toggle"></label>
Expand All @@ -111,7 +114,7 @@ async function renderSidebar(
<label id="observablehq-sidebar-close" for="observablehq-sidebar-toggle"></label>
<li class="observablehq-link${
normalizePath(path) === "/index" ? " observablehq-link-active" : ""
}"><a href="${relativePath(path, "/")}">${title}</a></li>
}"><a href="${normalizeLink(relativePath(path, "/"))}">${title}</a></li>
</ol>${
search
? html`\n <div id="observablehq-search"><input type="search" placeholder="Search"></div>
Expand All @@ -132,11 +135,15 @@ async function renderSidebar(
: ""
}>
<summary>${p.name}</summary>
<ol>${p.pages.map((p) => renderListItem(p, path))}
<ol>${p.pages.map((p) => renderListItem(p, path, normalizeLink))}
</ol>
</details>`
: "path" in p
? html`${i > 0 && "pages" in pages[i - 1] ? html`\n </ol>\n <ol>` : ""}${renderListItem(p, path)}`
? html`${i > 0 && "pages" in pages[i - 1] ? html`\n </ol>\n <ol>` : ""}${renderListItem(
p,
path,
normalizeLink
)}`
: ""
)}
</ol>
Expand Down Expand Up @@ -175,14 +182,10 @@ function renderToc(headers: Header[], label: string): Html {
</aside>`;
}

function renderListItem(page: Page, path: string): Html {
function renderListItem(page: Page, path: string, normalizeLink: (href: string) => string): Html {
return html`\n <li class="observablehq-link${
normalizePath(page.path) === path ? " observablehq-link-active" : ""
}"><a href="${relativePath(path, prettyPath(page.path))}">${page.name}</a></li>`;
}

function prettyPath(path: string): string {
return path.replace(/\/index$/, "/") || "/";
}"><a href="${normalizeLink(relativePath(path, page.path))}">${page.name}</a></li>`;
}

function renderHead(
Expand Down Expand Up @@ -231,23 +234,26 @@ function renderHeader({header}: Pick<Config, "header">, data: MarkdownPage["data
function renderFooter(
path: string,
options: Pick<Config, "pages" | "pager" | "title" | "footer">,
data: MarkdownPage["data"]
data: MarkdownPage["data"],
normalizeLink: (href: string) => string
): Html | null {
let footer = options.footer;
if (data?.footer !== undefined) footer = data?.footer;
const link = options.pager ? findLink(path, options) : null;
return link || footer
? html`\n<footer id="observablehq-footer">${link ? renderPager(path, link) : ""}${
? html`\n<footer id="observablehq-footer">${link ? renderPager(path, link, normalizeLink) : ""}${
footer ? html`\n<div>${html.unsafe(footer)}</div>` : ""
}
</footer>`
: null;
}

function renderPager(path: string, {prev, next}: PageLink): Html {
return html`\n<nav>${prev ? renderRel(path, prev, "prev") : ""}${next ? renderRel(path, next, "next") : ""}</nav>`;
function renderPager(path: string, {prev, next}: PageLink, normalizeLink: (href: string) => string): Html {
return html`\n<nav>${prev ? renderRel(path, prev, "prev", normalizeLink) : ""}${
next ? renderRel(path, next, "next", normalizeLink) : ""
}</nav>`;
}

function renderRel(path: string, page: Page, rel: "prev" | "next"): Html {
return html`<a rel="${rel}" href="${relativePath(path, prettyPath(page.path))}"><span>${page.name}</span></a>`;
function renderRel(path: string, page: Page, rel: "prev" | "next", normalizeLink: (href: string) => string): Html {
return html`<a rel="${rel}" href="${normalizeLink(relativePath(path, page.path))}"><span>${page.name}</span></a>`;
}
23 changes: 10 additions & 13 deletions src/search.ts
@@ -1,5 +1,5 @@
import {readFile} from "node:fs/promises";
import {basename, join} from "node:path/posix";
import {basename, dirname, join} from "node:path/posix";
import he from "he";
import MiniSearch from "minisearch";
import type {Config} from "./config.js";
Expand Down Expand Up @@ -27,7 +27,7 @@ const indexOptions = {
};

export async function searchIndex(config: Config, effects = defaultEffects): Promise<string> {
const {root, pages, search} = config;
const {root, pages, search, md} = config;
if (!search) return "{}";
if (indexCache.has(config) && indexCache.get(config).freshUntil > +new Date()) return indexCache.get(config).json;

Expand All @@ -41,23 +41,20 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro
// Index the pages
const index = new MiniSearch(indexOptions);
for await (const file of visitMarkdownFiles(root)) {
const path = join(root, file);
const source = await readFile(path, "utf8");
const {html, title, data} = parseMarkdown(source, {...config, path: "/" + file.slice(0, -3)});
const sourcePath = join(root, file);
const source = await readFile(sourcePath, "utf8");
const path = `/${join(dirname(file), basename(file, ".md"))}`;
const {html, title, data} = parseMarkdown(source, {...config, path});

// Skip pages that opt-out of indexing, and skip unlisted pages unless
// opted-in. We only log the first case.
const listed = pagePaths.has(`/${file.slice(0, -3)}`);
const listed = pagePaths.has(path);
const indexed = data?.index === undefined ? listed : Boolean(data.index);
if (!indexed) {
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(path)} ${faint("(skipped)")}`);
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(sourcePath)} ${faint("(skipped)")}`);
continue;
}

// This is the (top-level) serving path to the indexed page. There’s
// implicitly a leading slash here.
const id = file.slice(0, basename(file) === "index.md" ? -"index.md".length : -3);

// eslint-disable-next-line import/no-named-as-default-member
const text = he
.decode(
Expand All @@ -70,8 +67,8 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro
.replaceAll(/[\u0300-\u036f]/g, "")
.replace(/[^\p{L}\p{N}]/gu, " "); // keep letters & numbers

effects.logger.log(`${faint("index")} ${path}`);
index.add({id, title, text, keywords: normalizeKeywords(data?.keywords)});
effects.logger.log(`${faint("index")} ${sourcePath}`);
index.add({id: md.normalizeLink(path).slice("/".length), title, text, keywords: normalizeKeywords(data?.keywords)});
}

// Pass the serializable index options to the client.
Expand Down

0 comments on commit 106c0f5

Please sign in to comment.