Skip to content

Commit 4897712

Browse files
feat: cache npm metadata (#5491)
**What's the problem this PR addresses?** <!-- Describe the rationale of your PR. --> <!-- Link all issues that it closes. (Closes/Resolves #xxxx.) --> Resolving package metadata is slower than it has to be because, most times, Yarn has already fetched it in the past, and some things can be cached and reused. This should improve performance in various cases (ranging from creating new projects and cache-only-but-no-lockfile installs to `yarn up` when no new versions are available), since the server can avoid resending the response body if nothing has changed. **How did you fix it?** <!-- A detailed description of your implementation. --> This PR makes Yarn cache npm package metadata inside `<globalFolder>/npmMetadata/<cacheKey>/<registry>/<package>.json` when `getPackageMetadata` is used. If an exact version is requested, Yarn will return the metadata from disk directly and avoid hitting the network altogether. Otherwise, Yarn will set the `If-None-Match` & `If-Modified-Since` headers using the `etag` & `last-modified` values that were cached during previous requests. This tells the server to skip sending the response body and just respond with `304`, making Yarn reuse the cached metadata. TODO: - [x] Trim the cached metadata of unnecessary fields to decrease cache size - [x] Update benchmark scripts to make sure that they take the metadata cache into account - [x] Run more benchmarks - [ ] Make `yarn cache clean` clean the npm metadata cache (different PR) **Checklist** <!--- Don't worry if you miss something, chores are automatically tested. --> <!--- This checklist exists to help you remember doing the chores when you submit a PR. --> <!--- Put an `x` in all the boxes that apply. --> - [X] I have read the [Contributing Guide](https://yarnpkg.com/advanced/contributing). <!-- See https://yarnpkg.com/advanced/contributing#preparing-your-pr-to-be-released for more details. --> <!-- Check with `yarn version check` and fix with `yarn version check -i` --> - [X] I have set the packages that need to be released for my changes to be effective. <!-- The "Testing chores" workflow validates that your PR follows our guidelines. --> <!-- If it doesn't pass, click on it to see details as to what your PR might be missing. --> - [X] I will check that all automated PR checks pass before the PR gets reviewed. --------- Co-authored-by: Maël Nison <nison.mael@gmail.com>
1 parent 3eedcba commit 4897712

File tree

13 files changed

+352
-84
lines changed

13 files changed

+352
-84
lines changed

.pnp.cjs

Lines changed: 44 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.yarn/versions/07d34d58.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
releases:
2+
"@yarnpkg/cli": minor
3+
"@yarnpkg/core": minor
4+
"@yarnpkg/fslib": minor
5+
"@yarnpkg/plugin-npm": minor
6+
7+
declined:
8+
- "@yarnpkg/plugin-compat"
9+
- "@yarnpkg/plugin-constraints"
10+
- "@yarnpkg/plugin-dlx"
11+
- "@yarnpkg/plugin-essentials"
12+
- "@yarnpkg/plugin-exec"
13+
- "@yarnpkg/plugin-file"
14+
- "@yarnpkg/plugin-git"
15+
- "@yarnpkg/plugin-github"
16+
- "@yarnpkg/plugin-http"
17+
- "@yarnpkg/plugin-init"
18+
- "@yarnpkg/plugin-interactive-tools"
19+
- "@yarnpkg/plugin-link"
20+
- "@yarnpkg/plugin-nm"
21+
- "@yarnpkg/plugin-npm-cli"
22+
- "@yarnpkg/plugin-pack"
23+
- "@yarnpkg/plugin-patch"
24+
- "@yarnpkg/plugin-pnp"
25+
- "@yarnpkg/plugin-pnpm"
26+
- "@yarnpkg/plugin-stage"
27+
- "@yarnpkg/plugin-typescript"
28+
- "@yarnpkg/plugin-version"
29+
- "@yarnpkg/plugin-workspace-tools"
30+
- vscode-zipfs
31+
- "@yarnpkg/builder"
32+
- "@yarnpkg/doctor"
33+
- "@yarnpkg/extensions"
34+
- "@yarnpkg/libzip"
35+
- "@yarnpkg/nm"
36+
- "@yarnpkg/pnp"
37+
- "@yarnpkg/pnpify"
38+
- "@yarnpkg/sdks"
39+
- "@yarnpkg/shell"

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ The following changes only affect people writing Yarn plugins:
8484

8585
### Installs
8686

87+
- Yarn now caches npm version metadata, leading to faster resolution steps and decreased network data usage.
8788
- The `pnpm` linker avoids creating symlinks that lead to loops on the file system, by moving them higher up in the directory structure.
8889
- The `pnpm` linker no longer reports duplicate "incompatible virtual" warnings.
8990

packages/acceptance-tests/pkg-tests-specs/sources/commands/stage.test.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ describe(`Commands`, () => {
4848
await expect(run(`stage`, `-n`, {cwd: path})).resolves.toMatchObject({
4949
stdout: [
5050
`${npath.fromPortablePath(`${path}/.pnp.cjs`)}\n`,
51+
`${npath.fromPortablePath(`${path}/.yarn/global/metadata/npm/b98544/localhost/no-deps.json`)}\n`,
5152
`${npath.fromPortablePath(`${path}/.yarn/global/cache/no-deps-npm-1.0.0-cf533b267a-0.zip`)}\n`,
5253
`${npath.fromPortablePath(`${path}/.yarn/cache/.gitignore`)}\n`,
5354
`${npath.fromPortablePath(`${path}/.yarn/cache/no-deps-npm-1.0.0-cf533b267a-e0e60294c2.zip`)}\n`,

packages/plugin-npm/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"dependencies": {
1212
"@yarnpkg/fslib": "workspace:^",
1313
"enquirer": "^2.3.6",
14+
"lodash": "^4.17.15",
1415
"semver": "^7.1.2",
1516
"ssri": "^6.0.1",
1617
"tslib": "^2.4.0"
@@ -20,6 +21,7 @@
2021
"@yarnpkg/plugin-pack": "workspace:^"
2122
},
2223
"devDependencies": {
24+
"@types/lodash": "^4.14.136",
2325
"@types/semver": "^7.1.0",
2426
"@types/ssri": "^6.0.1",
2527
"@yarnpkg/core": "workspace:^",

packages/plugin-npm/sources/NpmSemverResolver.ts

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,9 @@ export class NpmSemverResolver implements Resolver {
4747
if (range === null)
4848
throw new Error(`Expected a valid range, got ${descriptor.range.slice(PROTOCOL.length)}`);
4949

50-
const registryData = await npmHttpUtils.get(npmHttpUtils.getIdentUrl(descriptor), {
51-
customErrorMessage: npmHttpUtils.customPackageError,
52-
configuration: opts.project.configuration,
53-
ident: descriptor,
54-
jsonResponse: true,
50+
const registryData = await npmHttpUtils.getPackageMetadata(descriptor, {
51+
project: opts.project,
52+
version: semver.valid(range.raw) ? range.raw : undefined,
5553
});
5654

5755
const candidates = miscUtils.mapAndFilter(Object.keys(registryData.versions), version => {
@@ -127,11 +125,9 @@ export class NpmSemverResolver implements Resolver {
127125
if (version === null)
128126
throw new ReportError(MessageName.RESOLVER_NOT_FOUND, `The npm semver resolver got selected, but the version isn't semver`);
129127

130-
const registryData = await npmHttpUtils.get(npmHttpUtils.getIdentUrl(locator), {
131-
customErrorMessage: npmHttpUtils.customPackageError,
132-
configuration: opts.project.configuration,
133-
ident: locator,
134-
jsonResponse: true,
128+
const registryData = await npmHttpUtils.getPackageMetadata(locator, {
129+
project: opts.project,
130+
version,
135131
});
136132

137133
if (!Object.prototype.hasOwnProperty.call(registryData, `versions`))

packages/plugin-npm/sources/NpmTagResolver.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,8 @@ export class NpmTagResolver implements Resolver {
3939
async getCandidates(descriptor: Descriptor, dependencies: unknown, opts: ResolveOptions) {
4040
const tag = descriptor.range.slice(PROTOCOL.length);
4141

42-
const registryData = await npmHttpUtils.get(npmHttpUtils.getIdentUrl(descriptor), {
43-
configuration: opts.project.configuration,
44-
ident: descriptor,
45-
jsonResponse: true,
42+
const registryData = await npmHttpUtils.getPackageMetadata(descriptor, {
43+
project: opts.project,
4644
});
4745

4846
if (!Object.prototype.hasOwnProperty.call(registryData, `dist-tags`))

packages/plugin-npm/sources/npmHttpUtils.ts

Lines changed: 183 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
import {Configuration, Ident, formatUtils, httpUtils, nodeUtils, StreamReport} from '@yarnpkg/core';
2-
import {MessageName, ReportError} from '@yarnpkg/core';
3-
import {prompt} from 'enquirer';
4-
import {URL} from 'url';
1+
import {Configuration, Ident, formatUtils, httpUtils, nodeUtils, StreamReport, structUtils, IdentHash, hashUtils, Project, miscUtils} from '@yarnpkg/core';
2+
import {MessageName, ReportError} from '@yarnpkg/core';
3+
import {Filename, PortablePath, ppath, toFilename, xfs} from '@yarnpkg/fslib';
4+
import {prompt} from 'enquirer';
5+
import pick from 'lodash/pick';
6+
import {URL} from 'url';
57

6-
import {Hooks} from './index';
7-
import * as npmConfigUtils from './npmConfigUtils';
8-
import {MapLike} from './npmConfigUtils';
8+
import {Hooks} from './index';
9+
import * as npmConfigUtils from './npmConfigUtils';
10+
import {MapLike} from './npmConfigUtils';
911

1012
export enum AuthType {
1113
NO_AUTH,
@@ -33,7 +35,7 @@ export type Options = httpUtils.Options & RegistryOptions & {
3335
* It doesn't handle 403 Forbidden, as the npm registry uses it when the user attempts
3436
* a prohibited action, such as publishing a package with a similar name to an existing package.
3537
*/
36-
export async function handleInvalidAuthenticationError(error: any, {attemptedAs, registry, headers, configuration}: {attemptedAs?: string, registry: string, headers: {[key: string]: string} | undefined, configuration: Configuration}) {
38+
export async function handleInvalidAuthenticationError(error: any, {attemptedAs, registry, headers, configuration}: {attemptedAs?: string, registry: string, headers: {[key: string]: string | undefined} | undefined, configuration: Configuration}) {
3739
if (isOtpError(error))
3840
throw new ReportError(MessageName.AUTHENTICATION_INVALID, `Invalid OTP token`);
3941

@@ -64,15 +66,169 @@ export function getIdentUrl(ident: Ident) {
6466
}
6567
}
6668

69+
export type GetPackageMetadataOptions = Omit<Options, 'ident' | 'configuration'> & {
70+
project: Project;
71+
72+
/**
73+
* Warning: This option will return all cached metadata if the version is found, but the rest of the metadata can be stale.
74+
*/
75+
version?: string;
76+
};
77+
78+
// We use 2 different caches:
79+
// - an in-memory cache, to avoid hitting the disk and the network more than once per process for each package
80+
// - an on-disk cache, for exact version matches and to avoid refetching the metadata if the resource hasn't changed on the server
81+
82+
const PACKAGE_METADATA_CACHE = new Map<IdentHash, Promise<PackageMetadata> | PackageMetadata>();
83+
84+
/**
85+
* Caches and returns the package metadata for the given ident.
86+
*
87+
* Note: This function only caches and returns specific fields from the metadata.
88+
* If you need other fields, use the uncached {@link get} or consider whether it would make more sense to extract
89+
* the fields from the on-disk packages using the linkers or from the fetch results using the fetchers.
90+
*/
91+
export async function getPackageMetadata(ident: Ident, {project, registry, headers, version, ...rest}: GetPackageMetadataOptions): Promise<PackageMetadata> {
92+
return await miscUtils.getFactoryWithDefault(PACKAGE_METADATA_CACHE, ident.identHash, async () => {
93+
const {configuration} = project;
94+
95+
registry = normalizeRegistry(configuration, {ident, registry});
96+
97+
const registryFolder = getRegistryFolder(configuration, registry);
98+
const identPath = ppath.join(registryFolder, `${structUtils.slugifyIdent(ident)}.json`);
99+
100+
let cached: CachedMetadata | null = null;
101+
102+
// We bypass the on-disk cache for security reasons if the lockfile needs to be refreshed,
103+
// since most likely the user is trying to validate the metadata using hardened mode.
104+
if (!project.lockfileNeedsRefresh) {
105+
try {
106+
cached = await xfs.readJsonPromise(identPath) as CachedMetadata;
107+
108+
if (typeof version !== `undefined` && typeof cached.metadata.versions[version] !== `undefined`) {
109+
return cached.metadata;
110+
}
111+
} catch {}
112+
}
113+
114+
return await get(getIdentUrl(ident), {
115+
...rest,
116+
customErrorMessage: customPackageError,
117+
configuration,
118+
registry,
119+
ident,
120+
headers: {
121+
...headers,
122+
// We set both headers in case a registry doesn't support ETags
123+
[`If-None-Match`]: cached?.etag,
124+
[`If-Modified-Since`]: cached?.lastModified,
125+
},
126+
wrapNetworkRequest: async executor => async () => {
127+
const response = await executor();
128+
129+
if (response.statusCode === 304) {
130+
if (cached === null)
131+
throw new Error(`Assertion failed: cachedMetadata should not be null`);
132+
133+
return {
134+
...response,
135+
body: cached.metadata,
136+
};
137+
}
138+
139+
const packageMetadata = pickPackageMetadata(JSON.parse(response.body.toString()));
140+
141+
PACKAGE_METADATA_CACHE.set(ident.identHash, packageMetadata);
142+
143+
const metadata: CachedMetadata = {
144+
metadata: packageMetadata,
145+
etag: response.headers.etag,
146+
lastModified: response.headers[`last-modified`],
147+
};
148+
149+
// We append the PID because it is guaranteed that this code is only run once per process for a given ident
150+
const identPathTemp = `${identPath}-${process.pid}.tmp` as PortablePath;
151+
152+
await xfs.mkdirPromise(registryFolder, {recursive: true});
153+
await xfs.writeJsonPromise(identPathTemp, metadata, {compact: true});
154+
155+
// Doing a rename is important to ensure the cache is atomic
156+
await xfs.renamePromise(identPathTemp, identPath);
157+
158+
return {
159+
...response,
160+
body: packageMetadata,
161+
};
162+
},
163+
});
164+
});
165+
}
166+
167+
type CachedMetadata = {
168+
metadata: PackageMetadata;
169+
etag?: string;
170+
lastModified?: string;
171+
};
172+
173+
export type PackageMetadata = {
174+
'dist-tags': Record<string, string>;
175+
versions: Record<string, any>;
176+
};
177+
178+
const CACHED_FIELDS = [
179+
`name`,
180+
181+
`dist.tarball`,
182+
183+
`bin`,
184+
`scripts`,
185+
186+
`os`,
187+
`cpu`,
188+
`libc`,
189+
190+
`dependencies`,
191+
`dependenciesMeta`,
192+
`optionalDependencies`,
193+
194+
`peerDependencies`,
195+
`peerDependenciesMeta`,
196+
];
197+
198+
function pickPackageMetadata(metadata: PackageMetadata): PackageMetadata {
199+
return {
200+
'dist-tags': metadata[`dist-tags`],
201+
versions: Object.fromEntries(Object.entries(metadata.versions).map(([key, value]) => [
202+
key,
203+
pick(value, CACHED_FIELDS),
204+
])),
205+
};
206+
}
207+
208+
/**
209+
* Used to invalidate the on-disk cache when the format changes.
210+
*/
211+
const CACHE_KEY = hashUtils.makeHash(...CACHED_FIELDS).slice(0, 6);
212+
213+
function getRegistryFolder(configuration: Configuration, registry: string) {
214+
const metadataFolder = getMetadataFolder(configuration);
215+
216+
const parsed = new URL(registry);
217+
const registryFilename = toFilename(parsed.hostname);
218+
219+
return ppath.join(metadataFolder, CACHE_KEY as Filename, registryFilename);
220+
}
221+
222+
function getMetadataFolder(configuration: Configuration) {
223+
return ppath.join(configuration.get(`globalFolder`), `metadata/npm`);
224+
}
225+
67226
export async function get(path: string, {configuration, headers, ident, authType, registry, ...rest}: Options) {
68-
if (ident && typeof registry === `undefined`)
69-
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration});
227+
registry = normalizeRegistry(configuration, {ident, registry});
228+
70229
if (ident && ident.scope && typeof authType === `undefined`)
71230
authType = AuthType.BEST_EFFORT;
72231

73-
if (typeof registry !== `string`)
74-
throw new Error(`Assertion failed: The registry should be a string`);
75-
76232
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident});
77233
if (auth)
78234
headers = {...headers, authorization: auth};
@@ -87,11 +243,7 @@ export async function get(path: string, {configuration, headers, ident, authType
87243
}
88244

89245
export async function post(path: string, body: httpUtils.Body, {attemptedAs, configuration, headers, ident, authType = AuthType.ALWAYS_AUTH, registry, otp, ...rest}: Options & {attemptedAs?: string}) {
90-
if (ident && typeof registry === `undefined`)
91-
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration});
92-
93-
if (typeof registry !== `string`)
94-
throw new Error(`Assertion failed: The registry should be a string`);
246+
registry = normalizeRegistry(configuration, {ident, registry});
95247

96248
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident});
97249
if (auth)
@@ -123,11 +275,7 @@ export async function post(path: string, body: httpUtils.Body, {attemptedAs, con
123275
}
124276

125277
export async function put(path: string, body: httpUtils.Body, {attemptedAs, configuration, headers, ident, authType = AuthType.ALWAYS_AUTH, registry, otp, ...rest}: Options & {attemptedAs?: string}) {
126-
if (ident && typeof registry === `undefined`)
127-
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration});
128-
129-
if (typeof registry !== `string`)
130-
throw new Error(`Assertion failed: The registry should be a string`);
278+
registry = normalizeRegistry(configuration, {ident, registry});
131279

132280
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident});
133281
if (auth)
@@ -159,11 +307,7 @@ export async function put(path: string, body: httpUtils.Body, {attemptedAs, conf
159307
}
160308

161309
export async function del(path: string, {attemptedAs, configuration, headers, ident, authType = AuthType.ALWAYS_AUTH, registry, otp, ...rest}: Options & {attemptedAs?: string}) {
162-
if (ident && typeof registry === `undefined`)
163-
registry = npmConfigUtils.getScopeRegistry(ident.scope, {configuration});
164-
165-
if (typeof registry !== `string`)
166-
throw new Error(`Assertion failed: The registry should be a string`);
310+
registry = normalizeRegistry(configuration, {ident, registry});
167311

168312
const auth = await getAuthenticationHeader(registry, {authType, configuration, ident});
169313
if (auth)
@@ -194,6 +338,16 @@ export async function del(path: string, {attemptedAs, configuration, headers, id
194338
}
195339
}
196340

341+
function normalizeRegistry(configuration: Configuration, {ident, registry}: Partial<RegistryOptions>): string {
342+
if (typeof registry === `undefined` && ident)
343+
return npmConfigUtils.getScopeRegistry(ident.scope, {configuration});
344+
345+
if (typeof registry !== `string`)
346+
throw new Error(`Assertion failed: The registry should be a string`);
347+
348+
return registry;
349+
}
350+
197351
async function getAuthenticationHeader(registry: string, {authType = AuthType.CONFIGURATION, configuration, ident}: {authType?: AuthType, configuration: Configuration, ident: RegistryOptions['ident']}) {
198352
const effectiveConfiguration = npmConfigUtils.getAuthConfiguration(registry, {configuration, ident});
199353
const mustAuthenticate = shouldAuthenticate(effectiveConfiguration, authType);
@@ -242,7 +396,7 @@ function shouldAuthenticate(authConfiguration: MapLike, authType: AuthType) {
242396
}
243397
}
244398

245-
async function whoami(registry: string, headers: {[key: string]: string} | undefined, {configuration}: {configuration: Configuration}) {
399+
async function whoami(registry: string, headers: {[key: string]: string | undefined} | undefined, {configuration}: {configuration: Configuration}) {
246400
if (typeof headers === `undefined` || typeof headers.authorization === `undefined`)
247401
return `an anonymous user`;
248402

packages/yarnpkg-core/sources/Plugin.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ export interface Hooks {
9191
* add some logging.
9292
*/
9393
wrapNetworkRequest?: (
94-
executor: () => Promise<any>,
94+
executor: () => Promise<httpUtils.Response>,
9595
extra: WrapNetworkRequestInfo
96-
) => Promise<() => Promise<any>>;
96+
) => Promise<() => Promise<httpUtils.Response>>;
9797

9898
/**
9999
* Called before the build, to compute a global hash key that we will use

0 commit comments

Comments
 (0)