-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat: cache npm metadata #5491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: cache npm metadata #5491
Changes from 25 commits
d09c0d2
9cd8119
f0aaee3
c577e8b
175f1a1
dba9140
7ec0583
2deff2a
caa8e9c
a5ff389
a1bf470
5df66c1
f3fc013
9e42027
f6fbaa5
5a0e7c5
7f7bde8
65b4138
30a97dd
7cc1f1b
a82c891
47ab924
3efc7a9
ede7a2a
5268b7a
c630d25
d838e3b
dd07bf4
e662c82
6778080
2b1e1ec
07b4ceb
7e09021
600c358
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
releases: | ||
"@yarnpkg/cli": minor | ||
"@yarnpkg/core": minor | ||
"@yarnpkg/fslib": minor | ||
"@yarnpkg/plugin-npm": minor | ||
|
||
declined: | ||
- "@yarnpkg/plugin-compat" | ||
- "@yarnpkg/plugin-constraints" | ||
- "@yarnpkg/plugin-dlx" | ||
- "@yarnpkg/plugin-essentials" | ||
- "@yarnpkg/plugin-exec" | ||
- "@yarnpkg/plugin-file" | ||
- "@yarnpkg/plugin-git" | ||
- "@yarnpkg/plugin-github" | ||
- "@yarnpkg/plugin-http" | ||
- "@yarnpkg/plugin-init" | ||
- "@yarnpkg/plugin-interactive-tools" | ||
- "@yarnpkg/plugin-link" | ||
- "@yarnpkg/plugin-nm" | ||
- "@yarnpkg/plugin-npm-cli" | ||
- "@yarnpkg/plugin-pack" | ||
- "@yarnpkg/plugin-patch" | ||
- "@yarnpkg/plugin-pnp" | ||
- "@yarnpkg/plugin-pnpm" | ||
- "@yarnpkg/plugin-stage" | ||
- "@yarnpkg/plugin-typescript" | ||
- "@yarnpkg/plugin-version" | ||
- "@yarnpkg/plugin-workspace-tools" | ||
- vscode-zipfs | ||
- "@yarnpkg/builder" | ||
- "@yarnpkg/doctor" | ||
- "@yarnpkg/extensions" | ||
- "@yarnpkg/libzip" | ||
- "@yarnpkg/nm" | ||
- "@yarnpkg/pnp" | ||
- "@yarnpkg/pnpify" | ||
- "@yarnpkg/sdks" | ||
- "@yarnpkg/shell" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,13 @@ | ||
import {Configuration, Ident, formatUtils, httpUtils, nodeUtils, StreamReport} from '@yarnpkg/core'; | ||
import {MessageName, ReportError} from '@yarnpkg/core'; | ||
import {prompt} from 'enquirer'; | ||
import {URL} from 'url'; | ||
import {Configuration, Ident, formatUtils, httpUtils, nodeUtils, StreamReport, structUtils, IdentHash, hashUtils, Project} from '@yarnpkg/core'; | ||
import {MessageName, ReportError} from '@yarnpkg/core'; | ||
import {Filename, ppath, toFilename, xfs} from '@yarnpkg/fslib'; | ||
import {prompt} from 'enquirer'; | ||
import pick from 'lodash/pick'; | ||
import {URL} from 'url'; | ||
|
||
import {Hooks} from './index'; | ||
import * as npmConfigUtils from './npmConfigUtils'; | ||
import {MapLike} from './npmConfigUtils'; | ||
import {Hooks} from './index'; | ||
import * as npmConfigUtils from './npmConfigUtils'; | ||
import {MapLike} from './npmConfigUtils'; | ||
|
||
export enum AuthType { | ||
NO_AUTH, | ||
|
@@ -33,7 +35,7 @@ export type Options = httpUtils.Options & RegistryOptions & { | |
* It doesn't handle 403 Forbidden, as the npm registry uses it when the user attempts | ||
* a prohibited action, such as publishing a package with a similar name to an existing package. | ||
*/ | ||
export async function handleInvalidAuthenticationError(error: any, {attemptedAs, registry, headers, configuration}: {attemptedAs?: string, registry: string, headers: {[key: string]: string} | undefined, configuration: Configuration}) { | ||
export async function handleInvalidAuthenticationError(error: any, {attemptedAs, registry, headers, configuration}: {attemptedAs?: string, registry: string, headers: {[key: string]: string | undefined} | undefined, configuration: Configuration}) { | ||
if (isOtpError(error)) | ||
throw new ReportError(MessageName.AUTHENTICATION_INVALID, `Invalid OTP token`); | ||
|
||
|
@@ -64,15 +66,166 @@ export function getIdentUrl(ident: Ident) { | |
} | ||
} | ||
|
||
export type GetPackageMetadataOptions = Omit<Options, 'ident' | 'configuration'> & { | ||
project: Project; | ||
|
||
/** | ||
* Warning: This option will return all cached metadata if the version is found, but the rest of the metadata can be stale. | ||
*/ | ||
version?: string; | ||
}; | ||
|
||
// We use 2 different caches: | ||
// - an in-memory cache, to avoid hitting the disk and the network more than once per process for each package | ||
// - an on-disk cache, for exact version matches and to avoid refetching the metadata if the resource hasn't changed on the server | ||
|
||
const PACKAGE_METADATA_CACHE = new Map<IdentHash, PackageMetadata>(); | ||
|
||
/** | ||
* Caches and returns the package metadata for the given ident. | ||
* | ||
* Note: This function only caches and returns specific fields from the metadata. | ||
* If you need other fields, use the uncached {@link get} or consider whether it would make more sense to extract | ||
* the fields from the on-disk packages using the linkers or from the fetch results using the fetchers. | ||
*/ | ||
export async function getPackageMetadata(ident: Ident, {project, registry, headers, version, ...rest}: GetPackageMetadataOptions): Promise<PackageMetadata> { | ||
const {configuration} = project; | ||
|
||
const cachedInMemory = PACKAGE_METADATA_CACHE.get(ident.identHash); | ||
if (cachedInMemory) | ||
return cachedInMemory; | ||
|
||
registry = normalizeRegistry(configuration, {ident, registry}); | ||
|
||
const registryFolder = getRegistryFolder(configuration, registry); | ||
const identPath = ppath.join(registryFolder, `${structUtils.slugifyIdent(ident)}.json`); | ||
|
||
let cachedOnDisk: CachedMetadata | null = null; | ||
|
||
// We bypass the on-disk cache for security reasons if the lockfile needs to be refreshed, | ||
// since most likely the user is trying to validate the metadata using hardened mode. | ||
if (!project.lockfileNeedsRefresh) { | ||
try { | ||
cachedOnDisk = await xfs.readJsonPromise(identPath) as CachedMetadata; | ||
|
||
if (typeof version !== `undefined` && typeof cachedOnDisk.metadata.versions[version] !== `undefined`) { | ||
return cachedOnDisk.metadata; | ||
} | ||
} catch {} | ||
} | ||
|
||
return await get(getIdentUrl(ident), { | ||
...rest, | ||
customErrorMessage: customPackageError, | ||
configuration, | ||
registry, | ||
ident, | ||
headers: { | ||
...headers, | ||
// We set both headers in case a registry doesn't support ETags | ||
[`If-None-Match`]: cachedOnDisk?.etag, | ||
[`If-Modified-Since`]: cachedOnDisk?.lastModified, | ||
}, | ||
wrapNetworkRequest: async executor => async () => { | ||
const response = await executor(); | ||
|
||
if (response.statusCode === 304) { | ||
if (cachedOnDisk === null) | ||
throw new Error(`Assertion failed: cachedMetadata should not be null`); | ||
|
||
return { | ||
...response, | ||
body: cachedOnDisk.metadata, | ||
}; | ||
} | ||
|
||
const packageMetadata = pickPackageMetadata(JSON.parse(response.body.toString())); | ||
|
||
PACKAGE_METADATA_CACHE.set(ident.identHash, packageMetadata); | ||
|
||
const metadata: CachedMetadata = { | ||
metadata: packageMetadata, | ||
etag: response.headers.etag, | ||
lastModified: response.headers[`last-modified`], | ||
}; | ||
|
||
await xfs.mkdirPromise(registryFolder, {recursive: true}); | ||
await xfs.writeJsonPromise(identPath, metadata, {compact: true}); | ||
paul-soporan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return { | ||
...response, | ||
body: packageMetadata, | ||
}; | ||
}, | ||
}); | ||
} | ||
|
||
type CachedMetadata = { | ||
metadata: PackageMetadata; | ||
etag?: string; | ||
lastModified?: string; | ||
}; | ||
|
||
export type PackageMetadata = { | ||
'dist-tags': Record<string, string>; | ||
versions: Record<string, any>; | ||
}; | ||
|
||
const CACHED_FIELDS = [ | ||
`name`, | ||
|
||
`deprecated`, | ||
`dist.tarball`, | ||
|
||
`bin`, | ||
`scripts`, | ||
|
||
`os`, | ||
`cpu`, | ||
`libc`, | ||
|
||
`dependencies`, | ||
`dependenciesMeta`, | ||
`optionalDependencies`, | ||
|
||
`peerDependencies`, | ||
`peerDependenciesMeta`, | ||
]; | ||
|
||
function pickPackageMetadata(metadata: PackageMetadata): PackageMetadata { | ||
return { | ||
'dist-tags': metadata[`dist-tags`], | ||
versions: Object.fromEntries(Object.entries(metadata.versions).map(([key, value]) => [ | ||
key, | ||
pick(value, CACHED_FIELDS), | ||
])), | ||
}; | ||
} | ||
|
||
/** | ||
* Used to invalidate the on-disk cache when the format changes. | ||
*/ | ||
const CACHE_KEY = hashUtils.makeHash(...CACHED_FIELDS).slice(0, 6); | ||
|
||
function getRegistryFolder(configuration: Configuration, registry: string) { | ||
const metadataFolder = getMetadataFolder(configuration); | ||
|
||
const parsed = new URL(registry); | ||
const registryFilename = toFilename(parsed.hostname); | ||
|
||
return ppath.join(metadataFolder, CACHE_KEY as Filename, registryFilename); | ||
} | ||
|
||
function getMetadataFolder(configuration: Configuration) { | ||
return ppath.join(configuration.get(`globalFolder`), `npm-metadata`); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking perhaps I'd also suggest moving the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ping on this comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Sorry for the delay.)
I don't really like the idea, the mirror and the npm metadata cache are 2 separate things, and merging them in a single folder would lead to confusion. We've also got This would just complicate things like I'd prefer all of them to be in separate folders. One thing we could do would be to move the mirror to
I'd rather not. It would just give the illusion of consistency. The cache can do it because it's the sole source that controls that folder. The npm metadata cache is supposed to be specific to the Moving the prefix to the filenames and having the folder called just Edit: The files also wouldn't be in a single folder like the cache, since e.g. for npm we have It would also have to be controlled by the core, which would be tasked with automatically generating the paths to ensure that no 2 resolvers accidentally use the same path (and also to make it possible for us to change the metadata cache path in the future). In addition, we're not even certain that the current kind of cache is better than a monolithic one, that's something I'm open to experimenting with in the future. That's why I think that opening the folder to anything but the npm resolvers is not something I want to do yet, if ever. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 Thought more about it and I'd be open to making it This way, we still have a common metadata folder but we make it clear that each resolver has to manage it manually. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems reasonable. I like the idea of moving the cache into cache/mirror (that said, I think we could also just rename --mirror into -g for the same effect; I think I'd like this even better 🤔). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 I still think I'd prefer to keep the mirror, metadata, and index separate.
We could have a |
||
} | ||
|
||
export async function get(path: string, {configuration, headers, ident, authType, registry, ...rest}: Options) { | ||
if (ident && typeof registry === `undefined`) | ||
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration}); | ||
registry = normalizeRegistry(configuration, {ident, registry}); | ||
paul-soporan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if (ident && ident.scope && typeof authType === `undefined`) | ||
authType = AuthType.BEST_EFFORT; | ||
|
||
if (typeof registry !== `string`) | ||
throw new Error(`Assertion failed: The registry should be a string`); | ||
|
||
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident}); | ||
if (auth) | ||
headers = {...headers, authorization: auth}; | ||
|
@@ -87,11 +240,7 @@ export async function get(path: string, {configuration, headers, ident, authType | |
} | ||
|
||
export async function post(path: string, body: httpUtils.Body, {attemptedAs, configuration, headers, ident, authType = AuthType.ALWAYS_AUTH, registry, otp, ...rest}: Options & {attemptedAs?: string}) { | ||
if (ident && typeof registry === `undefined`) | ||
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration}); | ||
|
||
if (typeof registry !== `string`) | ||
throw new Error(`Assertion failed: The registry should be a string`); | ||
registry = normalizeRegistry(configuration, {ident, registry}); | ||
|
||
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident}); | ||
if (auth) | ||
|
@@ -123,11 +272,7 @@ export async function post(path: string, body: httpUtils.Body, {attemptedAs, con | |
} | ||
|
||
export async function put(path: string, body: httpUtils.Body, {attemptedAs, configuration, headers, ident, authType = AuthType.ALWAYS_AUTH, registry, otp, ...rest}: Options & {attemptedAs?: string}) { | ||
if (ident && typeof registry === `undefined`) | ||
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration}); | ||
|
||
if (typeof registry !== `string`) | ||
throw new Error(`Assertion failed: The registry should be a string`); | ||
registry = normalizeRegistry(configuration, {ident, registry}); | ||
|
||
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident}); | ||
if (auth) | ||
|
@@ -159,11 +304,7 @@ export async function put(path: string, body: httpUtils.Body, {attemptedAs, conf | |
} | ||
|
||
export async function del(path: string, {attemptedAs, configuration, headers, ident, authType = AuthType.ALWAYS_AUTH, registry, otp, ...rest}: Options & {attemptedAs?: string}) { | ||
if (ident && typeof registry === `undefined`) | ||
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration}); | ||
|
||
if (typeof registry !== `string`) | ||
throw new Error(`Assertion failed: The registry should be a string`); | ||
registry = normalizeRegistry(configuration, {ident, registry}); | ||
|
||
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident}); | ||
if (auth) | ||
|
@@ -194,6 +335,16 @@ export async function del(path: string, {attemptedAs, configuration, headers, id | |
} | ||
} | ||
|
||
function normalizeRegistry(configuration: Configuration, {ident, registry}: Partial<RegistryOptions>): string { | ||
if (typeof registry === `undefined` && ident) | ||
return npmConfigUtils.getScopeRegistry(ident.scope, {configuration}); | ||
|
||
if (typeof registry !== `string`) | ||
throw new Error(`Assertion failed: The registry should be a string`); | ||
|
||
return registry; | ||
} | ||
|
||
async function getAuthenticationHeader(registry: string, {authType = AuthType.CONFIGURATION, configuration, ident}: {authType?: AuthType, configuration: Configuration, ident: RegistryOptions['ident']}) { | ||
const effectiveConfiguration = npmConfigUtils.getAuthConfiguration(registry, {configuration, ident}); | ||
const mustAuthenticate = shouldAuthenticate(effectiveConfiguration, authType); | ||
|
@@ -242,7 +393,7 @@ function shouldAuthenticate(authConfiguration: MapLike, authType: AuthType) { | |
} | ||
} | ||
|
||
async function whoami(registry: string, headers: {[key: string]: string} | undefined, {configuration}: {configuration: Configuration}) { | ||
async function whoami(registry: string, headers: {[key: string]: string | undefined} | undefined, {configuration}: {configuration: Configuration}) { | ||
paul-soporan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (typeof headers === `undefined` || typeof headers.authorization === `undefined`) | ||
return `an anonymous user`; | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.