From 517fa8d1e074e6f22b703ec21b1910be2fa027c9 Mon Sep 17 00:00:00 2001 From: Ethan Shen <42264778+nczitzk@users.noreply.github.com> Date: Mon, 15 Apr 2024 00:03:45 +0800 Subject: [PATCH] fix(route): Cool Papers (#15223) * fix(route): Cool Papers * fix radar --- lib/routes/papers/index.ts | 159 +++++++++++++++++++++------------ lib/routes/papers/namespace.ts | 2 + 2 files changed, 102 insertions(+), 59 deletions(-) diff --git a/lib/routes/papers/index.ts b/lib/routes/papers/index.ts index 80b6e7a56aa5..47cb03656e42 100644 --- a/lib/routes/papers/index.ts +++ b/lib/routes/papers/index.ts @@ -2,86 +2,127 @@ import { Route } from '@/types'; import { getCurrentPath } from '@/utils/helpers'; const __dirname = getCurrentPath(import.meta.url); -import got from '@/utils/got'; -import { load } from 'cheerio'; import { parseDate } from '@/utils/parse-date'; import { art } from '@/utils/render'; import path from 'node:path'; +import parser from '@/utils/rss-parser'; -export const route: Route = { - path: '/:category{.+}?', - name: 'Unknown', - maintainers: [], - handler, +const pdfUrlGenerators = { + arxiv: (id: string) => `https://arxiv.org/pdf/${id}.pdf`, }; -async function handler(ctx) { - const { category = 'arxiv/cs.CL' } = ctx.req.param(); +export const handler = async (ctx) => { + const { category = 'arxiv/cs.AI' } = ctx.req.param(); const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 150; const rootUrl = 'https://papers.cool'; const currentUrl = new URL(category, rootUrl).href; + const feedUrl = new URL(`${category}/feed`, rootUrl).href; const site = category.split(/\//)[0]; const apiKimiUrl = new URL(`${site}/kimi/`, rootUrl).href; - const { data: response } = await got(currentUrl); - - const $ = load(response); + const feed = await parser.parseURL(feedUrl); - const pubDate = parseDate( - $('p.info') - .first() - .text() - .match(/(\d+\s\w+\s\d{4})/)[1], - ['DD MMM YYYY', 'D MMM YYYY'] - ); + const language = 'en'; - const items = $('div.panel') - .slice(0, limit) - .toArray() - .map((item) => { - item = $(item); + const items = feed.items.slice(0, limit).map((item) => { + const title = item.title; + const guid = item.guid; - const id = item.prop('id'); - const kimiUrl = new URL(id, apiKimiUrl).href; - const enclosureUrl = - item - .find('a.pdf-preview') - .prop('onclick') - .match(/'(http.*?)'/)?.[1] ?? undefined; + const id = item.link?.split(/\//).pop() ?? ''; + const kimiUrl = new URL(id, apiKimiUrl).href; + const pdfUrl = Object.hasOwn(pdfUrlGenerators, site) ? pdfUrlGenerators[site](id) : undefined; - return { - title: item.find('span[id]').first().text(), - link: kimiUrl, - description: art(path.join(__dirname, 'templates/description.art'), { - kimiUrl, - siteUrl: item.find('a').first().prop('href'), - pdfUrl: enclosureUrl, - summary: item.find('p.summary').text(), - }), - author: item - .find('p.authors a') - .toArray() - .map((a) => $(a).text()) - .join('; '), - guid: `${currentUrl}#${id}`, - pubDate, - enclosure_url: enclosureUrl, - enclosure_type: enclosureUrl ? 'application/pdf' : undefined, - }; + const description = art(path.join(__dirname, 'templates/description.art'), { + pdfUrl, + siteUrl: item.link, + kimiUrl, + summary: item.summary, }); - const title = $('title').text(); - const icon = new URL('favicon.ico', rootUrl).href; + return { + title, + description, + pubDate: parseDate(item.pubDate ?? ''), + link: item.link, + category: item.categories, + author: item.creator, + doi: `${site}${id}`, + guid, + id: guid, + content: { + html: description, + text: item.content, + }, + language, + enclosure_url: pdfUrl, + enclosure_type: 'application/pdf', + enclosure_title: title, + }; + }); return { - item: items, - title: title.split(/-/)[0].trim(), + title: feed.title, + description: feed.description, link: currentUrl, - description: title, - icon, - logo: icon, - subtitle: $('h1').first().text(), + item: items, + allowEmpty: true, + image: feed.image?.url, + language: feed.language, }; -} +}; + +export const route: Route = { + path: '/:category{.+}?', + name: 'Topic', + url: 'papers.cool', + maintainers: ['nczitzk'], + handler, + example: '/papers/arxiv/cs.AI', + parameters: { category: 'Category, arXiv Artificial Intelligence (cs.AI) by default' }, + description: `:::tip + If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI),where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI). + ::: + + | Category | id | + | ----------------------------------------------------- | ----------- | + | arXiv Artificial Intelligence (cs.AI) | arxiv/cs.AI | + | arXiv Computation and Language (cs.CL) | arxiv/cs.CL | + | arXiv Computer Vision and Pattern Recognition (cs.CV) | arxiv/cs.CV | + | arXiv Machine Learning (cs.LG) | arxiv/cs.LG | + `, + categories: ['journal'], + + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportRadar: true, + supportBT: false, + supportPodcast: false, + supportScihub: true, + }, + radar: [ + { + title: 'arXiv Artificial Intelligence (cs.AI)', + source: ['papers.cool/arxiv/cs.AI'], + target: '/arxiv/cs.AI', + }, + { + title: 'arXiv Computation and Language (cs.CL)', + source: ['papers.cool/arxiv/cs.CL'], + target: '/arxiv/cs.CL', + }, + { + title: 'arXiv Computer Vision and Pattern Recognition (cs.CV)', + source: ['papers.cool/arxiv/cs.CV'], + target: '/arxiv/cs.CV', + }, + { + title: 'arXiv Machine Learning (cs.LG)', + source: ['papers.cool/arxiv/cs.LG'], + target: '/arxiv/cs.LG', + }, + ], +}; diff --git a/lib/routes/papers/namespace.ts b/lib/routes/papers/namespace.ts index 51e9be2ec2a9..6a831b4405fb 100644 --- a/lib/routes/papers/namespace.ts +++ b/lib/routes/papers/namespace.ts @@ -3,4 +3,6 @@ import type { Namespace } from '@/types'; export const namespace: Namespace = { name: 'Cool Papers', url: 'papers.cool', + categories: ['journal'], + description: '', };