Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

normalize links #1037

Merged
merged 12 commits into from Mar 12, 2024
4 changes: 4 additions & 0 deletions docs/config.md
Expand Up @@ -167,6 +167,10 @@ export default {

The base path when serving the site. Currently this only affects the custom 404 page, if any.

## cleanUrls <a href="https://github.com/observablehq/framework/pull/1037" target="_blank" class="observablehq-version-badge" data-version="prerelease" title="Added in #1037"></a>

Whether page links should be “clean”, _i.e._, formatted without a `.html` extension. Defaults to true. If true, a link to `config.html` will be formatted as `config`. Regardless of this setting, a link to an index page will drop the implied `index.html`; for example `foo/index.html` will be formatted as `foo/`.

## toc

The table of contents configuration.
Expand Down
38 changes: 37 additions & 1 deletion src/markdown.ts
@@ -1,5 +1,6 @@
/* eslint-disable import/no-named-as-default-member */
import {createHash} from "node:crypto";
import {extname} from "node:path/posix";
import matter from "gray-matter";
import he from "he";
import MarkdownIt from "markdown-it";
Expand Down Expand Up @@ -275,13 +276,47 @@ function makeSoftbreakRenderer(baseRenderer: RenderRule): RenderRule {
};
}

export function parseRelativeUrl(url: string): {pathname: string; search: string; hash: string} {
let search: string;
let hash: string;
const i = url.indexOf("#");
if (i < 0) hash = "";
else (hash = url.slice(i)), (url = url.slice(0, i));
const j = url.indexOf("?");
if (j < 0) search = "";
else (search = url.slice(j)), (url = url.slice(0, j));
return {pathname: url, search, hash};
}

export function makeLinkNormalizer(baseNormalize: (url: string) => string, clean: boolean): (url: string) => string {
return (url) => {
// Only clean relative links; ignore e.g. "https:" links.
if (!/^\w+:/.test(url)) {
const u = parseRelativeUrl(url);
let {pathname} = u;
if (pathname && !pathname.endsWith("/") && !extname(pathname)) pathname += ".html";
if (pathname === "index.html") pathname = ".";
else if (pathname.endsWith("/index.html")) pathname = pathname.slice(0, -"index.html".length);
else if (clean) pathname = pathname.replace(/\.html$/, "");
url = pathname + u.search + u.hash;
}
return baseNormalize(url);
};
}

export interface ParseOptions {
md: MarkdownIt;
path: string;
style?: Config["style"];
}

export function createMarkdownIt({markdownIt}: {markdownIt?: (md: MarkdownIt) => MarkdownIt} = {}): MarkdownIt {
export function createMarkdownIt({
markdownIt,
cleanUrls = true
}: {
markdownIt?: (md: MarkdownIt) => MarkdownIt;
cleanUrls?: boolean;
} = {}): MarkdownIt {
const md = MarkdownIt({html: true, linkify: true});
md.linkify.set({fuzzyLink: false, fuzzyEmail: false});
md.use(MarkdownItAnchor, {permalink: MarkdownItAnchor.permalink.headerLink({class: "observablehq-header-anchor"})});
Expand All @@ -290,6 +325,7 @@ export function createMarkdownIt({markdownIt}: {markdownIt?: (md: MarkdownIt) =>
md.renderer.rules.placeholder = makePlaceholderRenderer();
md.renderer.rules.fence = makeFenceRenderer(md.renderer.rules.fence!);
md.renderer.rules.softbreak = makeSoftbreakRenderer(md.renderer.rules.softbreak!);
md.normalizeLink = makeLinkNormalizer(md.normalizeLink, cleanUrls);
return markdownIt === undefined ? md : markdownIt(md);
}

Expand Down
61 changes: 17 additions & 44 deletions src/preview.ts
@@ -1,10 +1,10 @@
import {createHash} from "node:crypto";
import {watch} from "node:fs";
import type {FSWatcher, WatchEventType} from "node:fs";
import {access, constants, readFile, stat} from "node:fs/promises";
import {access, constants, readFile} from "node:fs/promises";
import {createServer} from "node:http";
import type {IncomingMessage, RequestListener, Server, ServerResponse} from "node:http";
import {basename, dirname, extname, join, normalize} from "node:path/posix";
import {basename, dirname, join, normalize} from "node:path/posix";
import {difference} from "d3-array";
import type {PatchItem} from "fast-array-diff";
import {getPatch} from "fast-array-diff";
Expand Down Expand Up @@ -152,55 +152,28 @@ export class PreviewServer {
throw new HttpError(`Not found: ${pathname}`, 404);
} else {
if ((pathname = normalize(pathname)).startsWith("..")) throw new Error("Invalid path: " + pathname);
let path = join(root, pathname);

// If this path is for /index, redirect to the parent directory for a
// tidy path. (This must be done before implicitly adding /index below!)
// Respect precedence of dir/index.md over dir.md in choosing between
// dir/ and dir!
if (basename(path, ".html") === "index") {
try {
await stat(join(dirname(path), "index.md"));
res.writeHead(302, {Location: join(dirname(pathname), "/") + url.search});
res.end();
return;
} catch (error) {
if (!isEnoent(error)) throw error;
res.writeHead(302, {Location: dirname(pathname) + url.search});
res.end();
return;
}
}

// If this path resolves to a directory, then add an implicit /index to
// the end of the path, assuming that the corresponding index.md exists.
try {
if ((await stat(path)).isDirectory() && (await stat(join(path, "index.md"))).isFile()) {
if (!pathname.endsWith("/")) {
res.writeHead(302, {Location: pathname + "/" + url.search});
res.end();
return;
}
pathname = join(pathname, "index");
path = join(path, "index");
}
} catch (error) {
if (!isEnoent(error)) throw error; // internal error
}

// If this path ends with .html, then redirect to drop the .html. TODO:
// Check for the existence of the .md file first.
if (extname(path) === ".html") {
res.writeHead(302, {Location: join(dirname(pathname), basename(pathname, ".html")) + url.search});
// Normalize the pathname (e.g., dropping ".html").
const normalizedPathname = config.md.normalizeLink(pathname);
if (pathname !== normalizedPathname) {
res.writeHead(302, {Location: normalizedPathname + url.search});
res.end();
return;
}

// Otherwise, serve the corresponding Markdown file, if it exists.
// If this path ends with a slash, then add an implicit /index to the
// end of the path.
let path = join(root, pathname);
if (pathname.endsWith("/")) {
pathname = join(pathname, "index");
path = join(path, "index");
}

// Lastly, serve the corresponding Markdown file, if it exists.
// Anything else should 404; static files should be matched above.
try {
const options = {path: pathname, ...config, preview: true};
const source = await readFile(path + ".md", "utf8");
const source = await readFile(join(dirname(path), basename(path, ".html") + ".md"), "utf8");
const parse = parseMarkdown(source, options);
const html = await renderPage(parse, options);
end(req, res, html, "text/html");
Expand Down Expand Up @@ -365,7 +338,7 @@ function handleWatch(socket: WebSocket, req: IncomingMessage, config: Config) {
path = decodeURIComponent(initialPath);
if (!(path = normalize(path)).startsWith("/")) throw new Error("Invalid path: " + initialPath);
if (path.endsWith("/")) path += "index";
path += ".md";
path = join(dirname(path), basename(path, ".html") + ".md");
const source = await readFile(join(root, path), "utf8");
const page = parseMarkdown(source, {path, ...config});
const resolvers = await getResolvers(page, {root, path});
Expand Down
64 changes: 35 additions & 29 deletions src/render.ts
Expand Up @@ -24,10 +24,12 @@ type RenderInternalOptions =
| {preview: true}; // preview

export async function renderPage(page: MarkdownPage, options: RenderOptions & RenderInternalOptions): Promise<string> {
const {root, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options;
const sidebar = page.data?.sidebar !== undefined ? Boolean(page.data.sidebar) : options.sidebar;
const toc = mergeToc(page.data?.toc, options.toc);
const draft = Boolean(page.data?.draft);
const {data} = page;
const {root, md, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options;
const {normalizeLink} = md;
const sidebar = data?.sidebar !== undefined ? Boolean(data.sidebar) : options.sidebar;
const toc = mergeToc(data?.toc, options.toc);
const draft = Boolean(data?.draft);
const {files, resolveFile, resolveImport} = resolvers;
return String(html`<!DOCTYPE html>
<meta charset="utf-8">${path === "/404" ? html`\n<base href="${preview ? "/" : base}">` : ""}
Expand Down Expand Up @@ -55,29 +57,29 @@ if (location.pathname.endsWith("/")) {
import ${preview || page.code.length ? `{${preview ? "open, " : ""}define} from ` : ""}${JSON.stringify(
resolveImport("observablehq:client")
)};${
files.size || page.data?.sql
? `\nimport {registerFile${page.data?.sql ? ", FileAttachment" : ""}} from ${JSON.stringify(
files.size || data?.sql
? `\nimport {registerFile${data?.sql ? ", FileAttachment" : ""}} from ${JSON.stringify(
resolveImport("observablehq:stdlib")
)};`
: ""
}${data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""}${
files.size ? `\n${renderFiles(files, resolveFile)}` : ""
}${
page.data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""
}${files.size ? `\n${renderFiles(files, resolveFile)}` : ""}${
page.data?.sql
? `\n${Object.entries<string>(page.data.sql)
data?.sql
? `\n${Object.entries<string>(data.sql)
.map(([name, source]) => `registerTable(${JSON.stringify(name)}, FileAttachment(${JSON.stringify(source)}));`)
.join("\n")}`
: ""
}
${preview ? `\nopen({hash: ${JSON.stringify(resolvers.hash)}, eval: (body) => eval(body)});\n` : ""}${page.code
.map(({node, id}) => `\n${transpileJavaScript(node, {id, resolveImport})}`)
.join("")}`)}
</script>${sidebar ? html`\n${await renderSidebar(title, pages, root, path, search)}` : ""}${
</script>${sidebar ? html`\n${await renderSidebar(title, pages, root, path, search, normalizeLink)}` : ""}${
toc.show ? html`\n${renderToc(findHeaders(page), toc.label)}` : ""
}
<div id="observablehq-center">${renderHeader(options, page.data)}
<div id="observablehq-center">${renderHeader(options, data)}
<main id="observablehq-main" class="observablehq${draft ? " observablehq--draft" : ""}">
${html.unsafe(rewriteHtml(page.html, resolvers.resolveFile))}</main>${renderFooter(path, options, page.data)}
${html.unsafe(rewriteHtml(page.html, resolvers.resolveFile))}</main>${renderFooter(path, options, data, normalizeLink)}
</div>
`);
}
Expand All @@ -102,7 +104,8 @@ async function renderSidebar(
pages: (Page | Section)[],
root: string,
path: string,
search: boolean
search: boolean,
normalizeLink: (href: string) => string
): Promise<Html> {
return html`<input id="observablehq-sidebar-toggle" type="checkbox" title="Toggle sidebar">
<label id="observablehq-sidebar-backdrop" for="observablehq-sidebar-toggle"></label>
Expand All @@ -111,7 +114,7 @@ async function renderSidebar(
<label id="observablehq-sidebar-close" for="observablehq-sidebar-toggle"></label>
<li class="observablehq-link${
normalizePath(path) === "/index" ? " observablehq-link-active" : ""
}"><a href="${relativePath(path, "/")}">${title}</a></li>
}"><a href="${normalizeLink(relativePath(path, "/"))}">${title}</a></li>
</ol>${
search
? html`\n <div id="observablehq-search"><input type="search" placeholder="Search"></div>
Expand All @@ -132,11 +135,15 @@ async function renderSidebar(
: ""
}>
<summary>${p.name}</summary>
<ol>${p.pages.map((p) => renderListItem(p, path))}
<ol>${p.pages.map((p) => renderListItem(p, path, normalizeLink))}
</ol>
</details>`
: "path" in p
? html`${i > 0 && "pages" in pages[i - 1] ? html`\n </ol>\n <ol>` : ""}${renderListItem(p, path)}`
? html`${i > 0 && "pages" in pages[i - 1] ? html`\n </ol>\n <ol>` : ""}${renderListItem(
p,
path,
normalizeLink
)}`
: ""
)}
</ol>
Expand Down Expand Up @@ -175,14 +182,10 @@ function renderToc(headers: Header[], label: string): Html {
</aside>`;
}

function renderListItem(page: Page, path: string): Html {
function renderListItem(page: Page, path: string, normalizeLink: (href: string) => string): Html {
return html`\n <li class="observablehq-link${
normalizePath(page.path) === path ? " observablehq-link-active" : ""
}"><a href="${relativePath(path, prettyPath(page.path))}">${page.name}</a></li>`;
}

function prettyPath(path: string): string {
return path.replace(/\/index$/, "/") || "/";
}"><a href="${normalizeLink(relativePath(path, page.path))}">${page.name}</a></li>`;
}

function renderHead(
Expand Down Expand Up @@ -231,23 +234,26 @@ function renderHeader({header}: Pick<Config, "header">, data: MarkdownPage["data
function renderFooter(
path: string,
options: Pick<Config, "pages" | "pager" | "title" | "footer">,
data: MarkdownPage["data"]
data: MarkdownPage["data"],
normalizeLink: (href: string) => string
): Html | null {
let footer = options.footer;
if (data?.footer !== undefined) footer = data?.footer;
const link = options.pager ? findLink(path, options) : null;
return link || footer
? html`\n<footer id="observablehq-footer">${link ? renderPager(path, link) : ""}${
? html`\n<footer id="observablehq-footer">${link ? renderPager(path, link, normalizeLink) : ""}${
footer ? html`\n<div>${html.unsafe(footer)}</div>` : ""
}
</footer>`
: null;
}

function renderPager(path: string, {prev, next}: PageLink): Html {
return html`\n<nav>${prev ? renderRel(path, prev, "prev") : ""}${next ? renderRel(path, next, "next") : ""}</nav>`;
function renderPager(path: string, {prev, next}: PageLink, normalizeLink: (href: string) => string): Html {
return html`\n<nav>${prev ? renderRel(path, prev, "prev", normalizeLink) : ""}${
next ? renderRel(path, next, "next", normalizeLink) : ""
}</nav>`;
}

function renderRel(path: string, page: Page, rel: "prev" | "next"): Html {
return html`<a rel="${rel}" href="${relativePath(path, prettyPath(page.path))}"><span>${page.name}</span></a>`;
function renderRel(path: string, page: Page, rel: "prev" | "next", normalizeLink: (href: string) => string): Html {
return html`<a rel="${rel}" href="${normalizeLink(relativePath(path, page.path))}"><span>${page.name}</span></a>`;
}
23 changes: 10 additions & 13 deletions src/search.ts
@@ -1,5 +1,5 @@
import {readFile} from "node:fs/promises";
import {basename, join} from "node:path/posix";
import {basename, dirname, join} from "node:path/posix";
import he from "he";
import MiniSearch from "minisearch";
import type {Config} from "./config.js";
Expand Down Expand Up @@ -27,7 +27,7 @@ const indexOptions = {
};

export async function searchIndex(config: Config, effects = defaultEffects): Promise<string> {
const {root, pages, search} = config;
const {root, pages, search, md} = config;
if (!search) return "{}";
if (indexCache.has(config) && indexCache.get(config).freshUntil > +new Date()) return indexCache.get(config).json;

Expand All @@ -41,23 +41,20 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro
// Index the pages
const index = new MiniSearch(indexOptions);
for await (const file of visitMarkdownFiles(root)) {
const path = join(root, file);
const source = await readFile(path, "utf8");
const {html, title, data} = parseMarkdown(source, {...config, path: "/" + file.slice(0, -3)});
const sourcePath = join(root, file);
const source = await readFile(sourcePath, "utf8");
const path = `/${join(dirname(file), basename(file, ".md"))}`;
const {html, title, data} = parseMarkdown(source, {...config, path});

// Skip pages that opt-out of indexing, and skip unlisted pages unless
// opted-in. We only log the first case.
const listed = pagePaths.has(`/${file.slice(0, -3)}`);
const listed = pagePaths.has(path);
const indexed = data?.index === undefined ? listed : Boolean(data.index);
if (!indexed) {
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(path)} ${faint("(skipped)")}`);
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(sourcePath)} ${faint("(skipped)")}`);
continue;
}

// This is the (top-level) serving path to the indexed page. There’s
// implicitly a leading slash here.
const id = file.slice(0, basename(file) === "index.md" ? -"index.md".length : -3);

// eslint-disable-next-line import/no-named-as-default-member
const text = he
.decode(
Expand All @@ -70,8 +67,8 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro
.replaceAll(/[\u0300-\u036f]/g, "")
.replace(/[^\p{L}\p{N}]/gu, " "); // keep letters & numbers

effects.logger.log(`${faint("index")} ${path}`);
index.add({id, title, text, keywords: normalizeKeywords(data?.keywords)});
effects.logger.log(`${faint("index")} ${sourcePath}`);
index.add({id: md.normalizeLink(path).slice("/".length), title, text, keywords: normalizeKeywords(data?.keywords)});
}

// Pass the serializable index options to the client.
Expand Down