Skip to content

Commit 8ea0e05

Browse files
committed
fix(route): use McKinsey official insights RSS
1 parent 8ffc526 commit 8ea0e05

1 file changed

Lines changed: 34 additions & 87 deletions

File tree

lib/routes/mckinsey/insights.ts

Lines changed: 34 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,53 @@
11
import { load } from 'cheerio';
22

33
import type { DataItem, Route } from '@/types';
4-
import cache from '@/utils/cache';
54
import ofetch from '@/utils/ofetch';
65
import { parseDate } from '@/utils/parse-date';
76

8-
const baseUrl = 'https://www.mckinsey.com';
9-
const link = `${baseUrl}/featured-insights`;
10-
11-
const headers = {
12-
'accept-language': 'en-US,en;q=0.9',
13-
'user-agent': 'Mozilla/5.0 (compatible; RSSHub; +https://github.com/DIYgod/RSSHub)',
14-
};
15-
16-
const datePattern = /(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}/;
17-
18-
function normalizeText(value?: string | null): string {
19-
return value?.replace(/\s+/g, ' ').trim() ?? '';
20-
}
21-
22-
function isInsightUrl(url: URL): boolean {
23-
if (url.hostname !== 'www.mckinsey.com') {
24-
return false;
25-
}
26-
27-
const pathname = url.pathname.toLowerCase();
28-
29-
if (pathname === '/featured-insights' || pathname === '/our-insights' || pathname === '/insights') {
30-
return false;
31-
}
32-
33-
return [
34-
'/featured-insights/',
35-
'/our-insights/',
36-
'/industries/',
37-
'/capabilities/',
38-
'/quarterly/',
39-
'/mgi/',
40-
'/institute-for-economic-mobility/',
41-
].some((prefix) => pathname.startsWith(prefix) && !pathname.endsWith('/our-insights'));
42-
}
43-
44-
function getContainerText($, element): string {
45-
const containers = $(element)
46-
.parents('article, li, div, section')
47-
.toArray()
48-
.map((container) => normalizeText($(container).text()))
49-
.filter((text) => text.length >= 40 && text.length <= 1600);
50-
51-
return containers[0] ?? normalizeText($(element).parent().text());
52-
}
7+
const link = 'https://www.mckinsey.com/insights/rss';
8+
const siteUrl = 'https://www.mckinsey.com/insights';
539

5410
const handler: Route['handler'] = async () => {
55-
const data = await ofetch(link, { headers });
56-
const $ = load(data);
57-
const seen = new Set<string>();
58-
59-
const item = $('a[href]')
11+
const data = await ofetch(link, {
12+
headers: {
13+
accept: 'application/rss+xml, application/xml, text/xml;q=0.9, */*;q=0.8',
14+
'accept-language': 'en-US,en;q=0.9',
15+
'user-agent': 'Mozilla/5.0 (compatible; RSSHub; +https://github.com/DIYgod/RSSHub)',
16+
},
17+
retry: 1,
18+
timeout: 15000,
19+
});
20+
21+
const $ = load(data, { xmlMode: true });
22+
23+
const item = $('item')
6024
.toArray()
61-
.map((element) => {
62-
const $element = $(element);
63-
const rawHref = $element.attr('href');
64-
const title = normalizeText($element.attr('title') || $element.text());
65-
66-
if (!rawHref || title.length < 12 || /^more\b/i.test(title) || /^download\b/i.test(title)) {
67-
return;
68-
}
69-
70-
const itemUrl = new URL(rawHref, baseUrl);
71-
itemUrl.hash = '';
72-
73-
if (!isInsightUrl(itemUrl) || seen.has(itemUrl.href)) {
74-
return;
75-
}
76-
77-
seen.add(itemUrl.href);
78-
79-
const containerText = getContainerText($, element);
80-
const date = containerText.match(datePattern)?.[0];
81-
const description = normalizeText(containerText.replace(title, '').replace(date ?? '', '')).slice(0, 800);
25+
.map((entry) => {
26+
const $entry = $(entry);
27+
const categories = $entry
28+
.find('category')
29+
.toArray()
30+
.map((category) => $(category).text().trim())
31+
.filter(Boolean);
8232

8333
return {
84-
title,
85-
link: itemUrl.href,
86-
description: description || title,
87-
pubDate: date ? parseDate(date) : undefined,
34+
title: $entry.find('title').first().text().trim(),
35+
link: $entry.find('link').first().text().trim(),
36+
description: $entry.find('description').first().text().trim(),
37+
pubDate: parseDate($entry.find('pubDate').first().text().trim()),
38+
guid: $entry.find('guid').first().text().trim(),
39+
category: categories,
8840
};
8941
})
90-
.filter(Boolean)
91-
.slice(0, 30) as DataItem[];
92-
93-
const cachedItems = await Promise.all(
94-
item.map((entry) =>
95-
cache.tryGet(`mckinsey:insights:${entry.link}`, async () => entry)
96-
)
97-
);
42+
.filter((entry) => entry.title && entry.link)
43+
.slice(0, 50) as DataItem[];
9844

9945
return {
100-
title: 'McKinsey Insights',
101-
link,
46+
title: $('channel > title').first().text().trim() || 'McKinsey Insights',
47+
link: $('channel > link').first().text().trim() || siteUrl,
48+
description: $('channel > description').first().text().trim(),
10249
language: 'en-US',
103-
item: cachedItems,
50+
item,
10451
};
10552
};
10653

0 commit comments

Comments
 (0)