From 517fa8d1e074e6f22b703ec21b1910be2fa027c9 Mon Sep 17 00:00:00 2001
From: Ethan Shen <42264778+nczitzk@users.noreply.github.com>
Date: Mon, 15 Apr 2024 00:03:45 +0800
Subject: [PATCH] fix(route): Cool Papers (#15223)

* fix(route): Cool Papers

* fix radar
---
 lib/routes/papers/index.ts     | 159 +++++++++++++++++++++------------
 lib/routes/papers/namespace.ts |   2 +
 2 files changed, 102 insertions(+), 59 deletions(-)

diff --git a/lib/routes/papers/index.ts b/lib/routes/papers/index.ts
index 80b6e7a56aa5..47cb03656e42 100644
--- a/lib/routes/papers/index.ts
+++ b/lib/routes/papers/index.ts
@@ -2,86 +2,127 @@ import { Route } from '@/types';
 import { getCurrentPath } from '@/utils/helpers';
 const __dirname = getCurrentPath(import.meta.url);
 
-import got from '@/utils/got';
-import { load } from 'cheerio';
 import { parseDate } from '@/utils/parse-date';
 import { art } from '@/utils/render';
 import path from 'node:path';
+import parser from '@/utils/rss-parser';
 
-export const route: Route = {
-    path: '/:category{.+}?',
-    name: 'Unknown',
-    maintainers: [],
-    handler,
+const pdfUrlGenerators = {
+    arxiv: (id: string) => `https://arxiv.org/pdf/${id}.pdf`,
 };
 
-async function handler(ctx) {
-    const { category = 'arxiv/cs.CL' } = ctx.req.param();
+export const handler = async (ctx) => {
+    const { category = 'arxiv/cs.AI' } = ctx.req.param();
     const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 150;
 
     const rootUrl = 'https://papers.cool';
     const currentUrl = new URL(category, rootUrl).href;
+    const feedUrl = new URL(`${category}/feed`, rootUrl).href;
 
     const site = category.split(/\//)[0];
     const apiKimiUrl = new URL(`${site}/kimi/`, rootUrl).href;
 
-    const { data: response } = await got(currentUrl);
-
-    const $ = load(response);
+    const feed = await parser.parseURL(feedUrl);
 
-    const pubDate = parseDate(
-        $('p.info')
-            .first()
-            .text()
-            .match(/(\d+\s\w+\s\d{4})/)[1],
-        ['DD MMM YYYY', 'D MMM YYYY']
-    );
+    const language = 'en';
 
-    const items = $('div.panel')
-        .slice(0, limit)
-        .toArray()
-        .map((item) => {
-            item = $(item);
+    const items = feed.items.slice(0, limit).map((item) => {
+        const title = item.title;
+        const guid = item.guid;
 
-            const id = item.prop('id');
-            const kimiUrl = new URL(id, apiKimiUrl).href;
-            const enclosureUrl =
-                item
-                    .find('a.pdf-preview')
-                    .prop('onclick')
-                    .match(/'(http.*?)'/)?.[1] ?? undefined;
+        const id = item.link?.split(/\//).pop() ?? '';
+        const kimiUrl = new URL(id, apiKimiUrl).href;
+        const pdfUrl = Object.hasOwn(pdfUrlGenerators, site) ? pdfUrlGenerators[site](id) : undefined;
 
-            return {
-                title: item.find('span[id]').first().text(),
-                link: kimiUrl,
-                description: art(path.join(__dirname, 'templates/description.art'), {
-                    kimiUrl,
-                    siteUrl: item.find('a').first().prop('href'),
-                    pdfUrl: enclosureUrl,
-                    summary: item.find('p.summary').text(),
-                }),
-                author: item
-                    .find('p.authors a')
-                    .toArray()
-                    .map((a) => $(a).text())
-                    .join('; '),
-                guid: `${currentUrl}#${id}`,
-                pubDate,
-                enclosure_url: enclosureUrl,
-                enclosure_type: enclosureUrl ? 'application/pdf' : undefined,
-            };
+        const description = art(path.join(__dirname, 'templates/description.art'), {
+            pdfUrl,
+            siteUrl: item.link,
+            kimiUrl,
+            summary: item.summary,
         });
 
-    const title = $('title').text();
-    const icon = new URL('favicon.ico', rootUrl).href;
+        return {
+            title,
+            description,
+            pubDate: parseDate(item.pubDate ?? ''),
+            link: item.link,
+            category: item.categories,
+            author: item.creator,
+            doi: `${site}${id}`,
+            guid,
+            id: guid,
+            content: {
+                html: description,
+                text: item.content,
+            },
+            language,
+            enclosure_url: pdfUrl,
+            enclosure_type: 'application/pdf',
+            enclosure_title: title,
+        };
+    });
 
     return {
-        item: items,
-        title: title.split(/-/)[0].trim(),
+        title: feed.title,
+        description: feed.description,
         link: currentUrl,
-        description: title,
-        icon,
-        logo: icon,
-        subtitle: $('h1').first().text(),
+        item: items,
+        allowEmpty: true,
+        image: feed.image?.url,
+        language: feed.language,
     };
-}
+};
+
+export const route: Route = {
+    path: '/:category{.+}?',
+    name: 'Topic',
+    url: 'papers.cool',
+    maintainers: ['nczitzk'],
+    handler,
+    example: '/papers/arxiv/cs.AI',
+    parameters: { category: 'Category, arXiv Artificial Intelligence (cs.AI) by default' },
+    description: `:::tip
+  If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI)，where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI).
+  :::
+
+  | Category                                              | id          |
+  | ----------------------------------------------------- | ----------- |
+  | arXiv Artificial Intelligence (cs.AI)                 | arxiv/cs.AI |
+  | arXiv Computation and Language (cs.CL)                | arxiv/cs.CL |
+  | arXiv Computer Vision and Pattern Recognition (cs.CV) | arxiv/cs.CV |
+  | arXiv Machine Learning (cs.LG)                        | arxiv/cs.LG |
+  `,
+    categories: ['journal'],
+
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportRadar: true,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: true,
+    },
+    radar: [
+        {
+            title: 'arXiv Artificial Intelligence (cs.AI)',
+            source: ['papers.cool/arxiv/cs.AI'],
+            target: '/arxiv/cs.AI',
+        },
+        {
+            title: 'arXiv Computation and Language (cs.CL)',
+            source: ['papers.cool/arxiv/cs.CL'],
+            target: '/arxiv/cs.CL',
+        },
+        {
+            title: 'arXiv Computer Vision and Pattern Recognition (cs.CV)',
+            source: ['papers.cool/arxiv/cs.CV'],
+            target: '/arxiv/cs.CV',
+        },
+        {
+            title: 'arXiv Machine Learning (cs.LG)',
+            source: ['papers.cool/arxiv/cs.LG'],
+            target: '/arxiv/cs.LG',
+        },
+    ],
+};
diff --git a/lib/routes/papers/namespace.ts b/lib/routes/papers/namespace.ts
index 51e9be2ec2a9..6a831b4405fb 100644
--- a/lib/routes/papers/namespace.ts
+++ b/lib/routes/papers/namespace.ts
@@ -3,4 +3,6 @@ import type { Namespace } from '@/types';
 export const namespace: Namespace = {
     name: 'Cool Papers',
     url: 'papers.cool',
+    categories: ['journal'],
+    description: '',
 };