/
chatgpt.ts
119 lines (101 loc) · 4.6 KB
/
chatgpt.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import { Route } from '@/types';
import cache from '@/utils/cache';
import dayjs from 'dayjs';
import got from '@/utils/got';
import { load } from 'cheerio';
import { config } from '@/config';
import isSameOrBefore from 'dayjs/plugin/isSameOrBefore';
dayjs.extend(isSameOrBefore);
export const route: Route = {
path: '/chatgpt/release-notes',
categories: ['new-media'],
example: '/openai/chatgpt/release-notes',
parameters: {},
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: 'ChatGPT - Release Notes',
maintainers: [],
handler,
};
async function handler() {
const articleUrl = 'https://help.openai.com/en/articles/6825453-chatgpt-release-notes';
const cacheIn = await cache.tryGet(
articleUrl,
async () => {
const returns = [];
const pageResponse = await got({
method: 'get',
url: articleUrl,
});
const $ = load(pageResponse.data);
const page = JSON.parse($('script#__NEXT_DATA__').text()); // 页面貌似是用 Next 渲染的,有现成的 JSON 数据可以直接 parse,而避免用 cheerio 去解析主体内容的 HTML
const feedTitle = page.props.pageProps.articleContent.title;
const feedDesc = page.props.pageProps.articleContent.description;
const $author = page.props.pageProps.articleContent.author;
const authorName = $author.name;
const $blocks = page.props.pageProps.articleContent.blocks;
const anchorDay = dayjs();
let heading = null,
articleObj = {};
let year = anchorDay.year();
let prevMonth = -1;
for (const block of $blocks) {
const text = (block.text || '').trim();
if (!text) {
continue;
}
if (block.type === 'subheading') {
if (heading !== null) {
articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');
returns.push(articleObj);
articleObj = {};
}
heading = text;
articleObj.title = heading;
articleObj.author = authorName;
articleObj.category = 'ChatGPT';
articleObj.link = articleUrl + '#' + block.idAttribute;
articleObj.guid = articleUrl + '#' + block.idAttribute;
articleObj.description = '';
// 目前 ChatGPT Release Notes 页面并没有写入年份,所以只能靠猜
// 当前的正则表达式只支持 (月份英文+空格+日期数字) 的格式
const matchesPubDate = heading.match(/\((\w+\s+\d{1,2})\)$/);
// 实现:当年度交替时,年份减去 1
if (matchesPubDate !== null) {
const curMonth = 1 + 'Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec'.split(',').indexOf(matchesPubDate[1].substring(0, 3));
if (prevMonth !== -1 && prevMonth < curMonth) {
year--; // 年度交替:上一个月份数小于当前月份数;但排除 prevMonth==-1 的初始化情况
}
prevMonth = curMonth;
const pubDay = dayjs(`${matchesPubDate[1]}, ${year}`, ['MMMM D, YYYY', 'MMM D, YYYY'], 'en', true);
// 从 ISO(GMT)时间的字符串(使用字符串替换的方式)替换成 US/Pacific PST 时区的表达
articleObj.pubDate = dayjs(pubDay.toISOString().replace(/\.\d{3}Z$/, '-08:00'));
const linkAnchor = pubDay.format('YYYY_MM_DD');
articleObj.guid = articleUrl + '#' + linkAnchor;
}
} else {
articleObj.description += block.text.trim() + '\n\n';
}
}
if (heading !== null) {
articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');
returns.push(articleObj);
}
return { feedTitle, feedDesc, items: returns };
},
config.cache.routeExpire,
false
);
return {
title: cacheIn.feedTitle,
description: cacheIn.feedDesc,
link: articleUrl,
item: cacheIn.items,
};
}