Skip to content

Commit 2ae10a7

Browse files
TonyRLCopilotCopilot
authored
feat: replace rand-user-agent with header-generator (#20480)
* Initial plan * Replace @tonyrl/rand-user-agent with header-generator and add sec-ch-* headers Co-authored-by: TonyRL <11386903+TonyRL@users.noreply.github.com> * Extract unwanted UA validation into standalone isValidUserAgent function Co-authored-by: TonyRL <11386903+TonyRL@users.noreply.github.com> * Add headerGeneratorPreset option to got and ofetch utilities * Replace randUserAgent usage in lib/routes with headerGeneratorPreset option Co-authored-by: TonyRL <11386903+TonyRL@users.noreply.github.com> * chore: pin header-generator to exact match * fix: replace randUserAgent with a static user agent string in config * Remove deprecated _randUserAgent function and related tests Co-authored-by: TonyRL <11386903+TonyRL@users.noreply.github.com> * refactor: rename to header-generator * Changes before error encountered Co-authored-by: TonyRL <11386903+TonyRL@users.noreply.github.com> * fix: update generateHeaders parameter type to Partial<HeaderGeneratorOptions> * fix: add ios preset * fix: rename headerGeneratorPreset to headerGeneratorOptions * feat: add MODERN_IOS preset and update header-generator patch hash * fix: use ios preset * fix: update FetchOptions to include headerGeneratorOptions type * refactor: simplify header generation and update header inclusion list * fix: remove header-generator patch * refactor: update user-agent handling in requests and tests * test: remove redundant headerGeneratorOptions test and add new ofetch header preset test * test: add custom user-agent test for ofetch * test: update user-agent expectation in ofetch headers test * Update lib/utils/ofetch.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor: replace hardcoded headers with generatedHeaders in fetch and get request handlers * refactor: rename test suite from rand-user-agent to header-generator and remove unused dependency * Update lib/utils/header-generator.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: TonyRL <11386903+TonyRL@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent ee55d8f commit 2ae10a7

File tree

19 files changed

+394
-184
lines changed

19 files changed

+394
-184
lines changed

lib/config.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import randUserAgent from '@/utils/rand-user-agent';
21
import 'dotenv/config';
32
import { ofetch } from 'ofetch';
43

@@ -490,7 +489,7 @@ const calculateValue = () => {
490489
listenInaddrAny: toBoolean(envs.LISTEN_INADDR_ANY, true), // 是否允许公网连接,取值 0 1
491490
requestRetry: toInt(envs.REQUEST_RETRY, 2), // 请求失败重试次数
492491
requestTimeout: toInt(envs.REQUEST_TIMEOUT, 30000), // Milliseconds to wait for the server to end the response before aborting the request
493-
ua: envs.UA ?? (toBoolean(envs.NO_RANDOM_UA, false) ? TRUE_UA : randUserAgent({ browser: 'chrome', os: 'mac os', device: 'desktop' })),
492+
ua: envs.UA ?? (toBoolean(envs.NO_RANDOM_UA, false) ? TRUE_UA : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 15_6_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36'),
494493
trueUA: TRUE_UA,
495494
allowOrigin: envs.ALLOW_ORIGIN,
496495
// cache

lib/routes/cctv/utils/news.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@ import got from '@/utils/got';
33
import path from 'node:path';
44
import { parseDate } from '@/utils/parse-date';
55
import timezone from '@/utils/timezone';
6-
import randUserAgent from '@/utils/rand-user-agent';
7-
8-
const UA = randUserAgent({ browser: 'mobile safari', os: 'ios', device: 'mobile' });
6+
import { PRESETS } from '@/utils/header-generator';
97

108
const getNews = async (category) => {
119
const url = `https://news.cctv.com/2019/07/gaiban/cmsdatainterface/page/${category}_1.jsonp`;
@@ -15,8 +13,8 @@ const getNews = async (category) => {
1513
url,
1614
headers: {
1715
Referer: `http://news.cctv.com/${category}`,
18-
'User-Agent': UA,
1916
},
17+
headerGeneratorOptions: PRESETS.MODERN_IOS,
2018
});
2119

2220
const data = JSON.parse(response.data.slice(category.length + 1, -1));
@@ -58,9 +56,7 @@ const getNews = async (category) => {
5856
const { data } = await got({
5957
method: 'get',
6058
url: api,
61-
headers: {
62-
'User-Agent': UA,
63-
},
59+
headerGeneratorOptions: PRESETS.MODERN_IOS,
6460
});
6561

6662
switch (type) {

lib/routes/fortunechina/index.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@ import cache from '@/utils/cache';
33
import ofetch from '@/utils/ofetch';
44
import { load } from 'cheerio';
55
import { parseDate, parseRelativeDate } from '@/utils/parse-date';
6-
import randUserAgent from '@/utils/rand-user-agent';
7-
8-
const UA = randUserAgent({ browser: 'mobile safari', os: 'ios', device: 'mobile' });
6+
import { PRESETS } from '@/utils/header-generator';
97

108
export const route: Route = {
119
path: '/:category?',
@@ -62,9 +60,7 @@ async function handler(ctx) {
6260
items.map((item) =>
6361
cache.tryGet(item.link, async () => {
6462
const detailResponse = await ofetch(item.link, {
65-
headers: {
66-
'User-Agent': UA,
67-
},
63+
headerGeneratorOptions: PRESETS.MODERN_IOS,
6864
});
6965

7066
const content = load(detailResponse);

lib/routes/theatlantic/utils.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,15 @@ import ofetch from '@/utils/ofetch';
44
import { parseDate } from '@/utils/parse-date';
55
import { art } from '@/utils/render';
66
import path from 'node:path';
7-
import randUserAgent from '@/utils/rand-user-agent';
8-
9-
const UA = randUserAgent({ browser: 'chrome', os: 'android', device: 'mobile' });
7+
import { PRESETS } from '@/utils/header-generator';
108

119
const getArticleDetails = async (items) => {
1210
const list = await Promise.all(
1311
items.map((item) =>
1412
cache.tryGet(item.link, async () => {
1513
const url = item.link;
1614
const html = await ofetch(url, {
17-
headers: {
18-
'User-Agent': UA,
19-
},
15+
headerGeneratorOptions: PRESETS.MODERN_ANDROID,
2016
});
2117
const $ = load(html);
2218
let data = JSON.parse($('script#__NEXT_DATA__').text());

lib/routes/toutiao/user.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { Route } from '@/types';
22
import cache from '@/utils/cache';
33
import ofetch from '@/utils/ofetch';
44
import { parseDate } from '@/utils/parse-date';
5-
import randUserAgent from '@/utils/rand-user-agent';
5+
import { PRESETS, generateHeaders } from '@/utils/header-generator';
66
import { generate_a_bogus } from './a-bogus';
77
import { Feed } from './types';
88
import RejectError from '@/errors/types/reject';
@@ -30,17 +30,17 @@ export const route: Route = {
3030

3131
async function handler(ctx) {
3232
const { token } = ctx.req.param();
33-
const ua = randUserAgent({ browser: 'chrome', os: 'windows', device: 'desktop' });
3433

3534
const feed = (await cache.tryGet(
3635
`toutiao:user:${token}`,
3736
async () => {
3837
const query = `category=profile_all&token=${token}&max_behot_time=0&entrance_gid&aid=24&app_name=toutiao_web`;
3938

40-
const data = await ofetch(`https://www.toutiao.com/api/pc/list/feed?${query}&a_bogus=${generate_a_bogus(query, ua)}`, {
41-
headers: {
42-
'User-Agent': ua,
43-
},
39+
const headers = generateHeaders(PRESETS.MODERN_WINDOWS_CHROME);
40+
const userAgent = headers['user-agent'];
41+
42+
const data = await ofetch(`https://www.toutiao.com/api/pc/list/feed?${query}&a_bogus=${generate_a_bogus(query, userAgent)}`, {
43+
headerGeneratorOptions: PRESETS.MODERN_WINDOWS_CHROME,
4444
});
4545

4646
return data.data;

lib/routes/wsj/utils.ts

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,16 @@ import got from '@/utils/got';
44
import { parseDate } from '@/utils/parse-date';
55
import { art } from '@/utils/render';
66
import path from 'node:path';
7-
import randUserAgent from '@/utils/rand-user-agent';
7+
import { PRESETS } from '@/utils/header-generator';
88

9-
const UA = randUserAgent({ browser: 'chrome', os: 'android', device: 'mobile' });
10-
11-
// const chromeMobileUserAgent = 'Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/67.0.3396.87 Mobile Safari/537.36';
129
const parseArticle = (item) =>
1310
cache.tryGet(item.link, async () => {
1411
// Fetch the AMP version
1512
const url = item.link.replace(/(?<=^https:\/\/\w+\.wsj\.com)/, '/amp');
1613
const response = await got({
1714
url,
1815
method: 'get',
19-
headers: {
20-
'User-Agent': UA,
21-
},
16+
headerGeneratorOptions: PRESETS.MODERN_ANDROID,
2217
});
2318
const html = response.data;
2419
const $ = load(html);

lib/routes/xueqiu/snb.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import { Route } from '@/types';
22
import got from '@/utils/got';
33
import { parseDate } from '@/utils/parse-date';
4-
import randUserAgent from '@/utils/rand-user-agent';
5-
6-
const UA = randUserAgent({ browser: 'chrome', os: 'android', device: 'mobile' });
4+
import { PRESETS } from '@/utils/header-generator';
75

86
export const route: Route = {
97
path: '/snb/:id',
@@ -33,9 +31,7 @@ async function handler(ctx) {
3331
const url = 'https://xueqiu.com/p/' + id;
3432

3533
const response = await got(url, {
36-
headers: {
37-
'User-Agent': UA,
38-
},
34+
headerGeneratorOptions: PRESETS.MODERN_ANDROID,
3935
});
4036

4137
const data = response.data;

lib/utils/got.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import { config } from '@/config';
55
import { Cookie, CookieJar } from 'tough-cookie';
66

77
describe('got', () => {
8-
it('headers', async () => {
8+
it('no ua headers', async () => {
99
const { data } = await got('http://rsshub.test/headers');
10-
expect(data['user-agent']).toBe(config.ua);
10+
expect(data['user-agent']).toBeUndefined();
1111
});
1212

1313
it('retry', async () => {

lib/utils/header-generator.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import { describe, expect, it } from 'vitest';
2+
import ofetch from '@/utils/ofetch';
3+
import { generateHeaders, PRESETS } from '@/utils/header-generator';
4+
5+
describe('header-generator', () => {
6+
it('should has no ua', async () => {
7+
const response = await ofetch('http://rsshub.test/headers');
8+
expect(response['user-agent']).toBeUndefined();
9+
});
10+
11+
it('should match ua configurated', async () => {
12+
const testUa = 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1';
13+
const response = await ofetch('http://rsshub.test/headers', {
14+
headers: {
15+
'user-agent': testUa,
16+
},
17+
});
18+
expect(response['user-agent']).toBe(testUa);
19+
});
20+
21+
it('generateHeaders should include sec-ch and sec-fetch headers', () => {
22+
const headers = generateHeaders(PRESETS.MODERN_MACOS_CHROME);
23+
24+
expect(headers['user-agent']).toBeDefined();
25+
expect(headers['sec-ch-ua']).toBeDefined();
26+
expect(headers['sec-ch-ua-mobile']).toBeDefined();
27+
expect(headers['sec-ch-ua-platform']).toBeDefined();
28+
expect(headers['sec-fetch-site']).toBeDefined();
29+
expect(headers['sec-fetch-mode']).toBeDefined();
30+
expect(headers['sec-fetch-user']).toBeDefined();
31+
expect(headers['sec-fetch-dest']).toBeDefined();
32+
33+
expect(headers['sec-ch-ua-platform']).toBe('"macOS"');
34+
expect(headers['sec-ch-ua-mobile']).toBe('?0');
35+
});
36+
37+
it('generateHeaders should work with headerGeneratorOptions', () => {
38+
const headers = generateHeaders(PRESETS.MODERN_WINDOWS_CHROME);
39+
40+
expect(headers['user-agent']).toBeDefined();
41+
expect(headers['sec-ch-ua']).toBeDefined();
42+
expect(headers['sec-ch-ua-mobile']).toBeDefined();
43+
expect(headers['sec-ch-ua-platform']).toBeDefined();
44+
45+
expect(headers['sec-ch-ua-platform']).toBe('"Windows"');
46+
expect(headers['sec-ch-ua-mobile']).toBe('?0');
47+
expect(headers['user-agent']).toMatch(/Chrome/);
48+
});
49+
50+
it('generateHeaders should use default preset when no preset is provided', () => {
51+
const headers = generateHeaders();
52+
53+
expect(headers['user-agent']).toBeDefined();
54+
expect(headers['sec-ch-ua']).toBeDefined();
55+
expect(headers['sec-ch-ua-mobile']).toBeDefined();
56+
expect(headers['sec-ch-ua-platform']).toBeDefined();
57+
58+
expect(headers['sec-ch-ua-platform']).toBe('"macOS"');
59+
expect(headers['sec-ch-ua-mobile']).toBe('?0');
60+
expect(headers['user-agent']).toMatch(/Chrome/);
61+
});
62+
});

lib/utils/header-generator.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { HeaderGenerator, PRESETS, type HeaderGeneratorOptions } from 'header-generator';
2+
3+
export { PRESETS } from 'header-generator';
4+
5+
/**
6+
* Checks if a generated user agent is valid (doesn't contain unwanted strings)
7+
*
8+
* @param {string} userAgent The user agent string to validate
9+
* @param {string} browser The browser type (used to determine which filters to apply)
10+
* @returns {boolean} True if the user agent is valid, false if it contains unwanted strings
11+
*/
12+
const isValidUserAgent = (userAgent: string, browser: string): boolean => {
13+
browser = browser.toLowerCase();
14+
15+
if (browser === 'chrome') {
16+
return !(userAgent.includes('Chrome-Lighthouse') || userAgent.includes('Gener8') || userAgent.includes('HeadlessChrome') || userAgent.includes('SMTBot') || userAgent.includes('Electron') || userAgent.includes('Code'));
17+
}
18+
19+
if (browser === 'safari') {
20+
return !userAgent.includes('Applebot');
21+
}
22+
23+
return true;
24+
};
25+
26+
/**
27+
* @param {Partial<HeaderGeneratorOptions>} preset Preset from header-generator package (defaults to PRESETS.MODERN_MACOS_CHROME)
28+
* @returns Headers object with user-agent and additional headers
29+
*/
30+
// Cache for HeaderGenerator instances per preset
31+
const generatorCache = new Map<string, HeaderGenerator>();
32+
33+
export const generateHeaders = (preset: Partial<HeaderGeneratorOptions> = PRESETS.MODERN_MACOS_CHROME) => {
34+
const cacheKey = JSON.stringify(preset);
35+
let generator = generatorCache.get(cacheKey);
36+
if (!generator) {
37+
generator = new HeaderGenerator(preset);
38+
generatorCache.set(cacheKey, generator);
39+
}
40+
let headers = generator.getHeaders();
41+
42+
const userAgent = headers['user-agent'];
43+
let detectedBrowser: string;
44+
if (userAgent.includes('Firefox')) {
45+
detectedBrowser = 'firefox';
46+
} else if (userAgent.includes('Safari') && !userAgent.includes('Chrome')) {
47+
detectedBrowser = 'safari';
48+
} else {
49+
detectedBrowser = 'chrome';
50+
}
51+
52+
let attempts = 0;
53+
while (!isValidUserAgent(headers['user-agent'], detectedBrowser) && attempts < 10) {
54+
headers = generator.getHeaders();
55+
attempts++;
56+
}
57+
58+
return headers;
59+
};
60+
61+
/** List of headers to include from header-generator output
62+
* excluding headers that are typically set manually or by the environment
63+
*/
64+
export const generatedHeaders = new Set([
65+
// 'content-length',
66+
// 'cache-control',
67+
// sec-ch-ua (chrome client hints)
68+
'sec-ch-ua',
69+
'sec-ch-ua-mobile',
70+
'sec-ch-ua-platform',
71+
// 'origin',
72+
// 'content-type',
73+
'upgrade-insecure-requests',
74+
// 'user-agent', // handle manually
75+
'accept',
76+
// sec-fetch (fetch metadata)
77+
'sec-fetch-site',
78+
'sec-fetch-mode',
79+
'sec-fetch-user',
80+
'sec-fetch-dest',
81+
// 'referer', // handle manually
82+
'accept-encoding',
83+
'accept-language',
84+
// 'cookie',
85+
'priority',
86+
]);

0 commit comments

Comments
 (0)