diff --git a/lib/routes/scmp/index.ts b/lib/routes/scmp/index.ts index 6c279811b687..0ca0f0bca603 100644 --- a/lib/routes/scmp/index.ts +++ b/lib/routes/scmp/index.ts @@ -1,9 +1,7 @@ import { Route } from '@/types'; import cache from '@/utils/cache'; -import { load } from 'cheerio'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; import { parseItem } from './utils'; +import parser from '@/utils/rss-parser'; export const route: Route = { path: '/:category_id', @@ -32,56 +30,19 @@ export const route: Route = { async function handler(ctx) { const categoryId = ctx.req.param('category_id'); const rssUrl = `https://www.scmp.com/rss/${categoryId}/feed`; - const { data: response } = await got(rssUrl); - const $ = load(response, { - xmlMode: true, - }); - - const list = $('item') - .toArray() - .map((elem) => { - const item = $(elem); - const enclosure = item.find('enclosure').first(); - const mediaContent = item.find('media\\:content').toArray()[0]; - const thumbnail = item.find('media\\:thumbnail').toArray()[0]; - return { - title: item.find('title').text(), - description: item.find('description').text(), - link: item.find('link').text().split('?utm_source')[0], - author: item.find('author').text(), - pubDate: parseDate(item.find('pubDate').text()), - enclosure_url: enclosure?.attr('url'), - enclosure_length: enclosure?.attr('length'), - enclosure_type: enclosure?.attr('type'), - media: { - content: Object.keys(mediaContent.attribs).reduce((data, key) => { - data[key] = mediaContent.attribs[key]; - return data; - }, {}), - thumbnail: thumbnail?.attribs - ? Object.keys(thumbnail.attribs).reduce((data, attr) => { - data[attr] = thumbnail.attribs[attr]; - return data; - }, {}) - : undefined, - }, - }; - }); + const rss = await parser.parseURL(rssUrl); - const items = await Promise.all(list.map((item) => cache.tryGet(item.link, () => parseItem(item)))); + const items = await Promise.all(rss.items.map((item) => cache.tryGet(item.link, () => parseItem(item)))); ctx.set('json', { items, }); return { - title: $('channel > title').text(), - link: $('channel > link').text(), - description: $('channel > description').text(), + ...rss, item: items, language: 'en-hk', icon: 'https://assets.i-scmp.com/static/img/icons/scmp-icon-256x256.png', logo: 'https://customerservice.scmp.com/img/logo_scmp@2x.png', - image: $('channel > image > url').text(), }; } diff --git a/lib/routes/scmp/utils.ts b/lib/routes/scmp/utils.ts index 4012480bf9d6..8bc7ef840b10 100644 --- a/lib/routes/scmp/utils.ts +++ b/lib/routes/scmp/utils.ts @@ -1,5 +1,5 @@ import { load } from 'cheerio'; -import got from '@/utils/got'; +import ofetch from '@/utils/ofetch'; import { parseDate } from '@/utils/parse-date'; export const renderHTML = (node) => { @@ -60,7 +60,7 @@ export const renderHTML = (node) => { }; export const parseItem = async (item) => { - const { data: response, url } = await got(item.link); + const { _data: response, url } = await ofetch.raw(item.link); if (new URL(url).hostname !== 'www.scmp.com') { // e.g., https://multimedia.scmp.com/ @@ -79,6 +79,10 @@ export const parseItem = async (item) => { item.updated = parseDate(article.updatedDate, 'x'); item.category = [...new Set([...article.topics.map((t) => t.name), ...article.sections.flatMap((t) => t.value.map((v) => v.name)), ...article.keywords.map((k) => k?.split(', '))])]; + item.enclosure_length = item.enclosure.length; + item.enclosure_url = item.enclosure.url; + item.enclosure_type = item.enclosure.type; + // N.B. gallery in article is not rendered // e.g., { type: 'div', attribs: { class: 'scmp-photo-gallery', 'data-gallery-nid': '3239409' }} // from https://www.scmp.com/news/china/politics/article/3239355/li-keqiang-former-premier-china-dead