Skip to content

Commit 08b2bcc

Browse files
authored
Merge pull request #3 from DIYgod/master
[pull] master from diygod:master
2 parents 1629bdb + 6288f06 commit 08b2bcc

17 files changed

Lines changed: 123 additions & 76 deletions

File tree

lib/routes/bilibili/cache.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ const getCookie = (disableConfig = false) => {
3838
let waitForRequest = new Promise<string>((resolve) => {
3939
resolve('');
4040
});
41-
const { destory } = await getPuppeteerPage('https://space.bilibili.com/1/dynamic', {
41+
const { destroy } = await getPuppeteerPage('https://space.bilibili.com/1/dynamic', {
4242
onBeforeLoad: (page) => {
4343
waitForRequest = new Promise<string>((resolve) => {
4444
page.on('requestfinished', async (request) => {
@@ -54,7 +54,7 @@ const getCookie = (disableConfig = false) => {
5454
});
5555
const cookieString = await waitForRequest;
5656
logger.debug(`Got bilibili cookie: ${cookieString}`);
57-
await destory();
57+
await destroy();
5858
return cookieString;
5959
});
6060
};

lib/routes/cjlu/yjsy/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ async function handler(ctx) {
8686
const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 10;
8787
const url = `${host}index/${cate}.htm`;
8888

89-
const { page, destory, browser } = await getPuppeteerPage(url, {
89+
const { page, destroy, browser } = await getPuppeteerPage(url, {
9090
onBeforeLoad: async (page) => {
9191
await page.setExtraHTTPHeaders(headers);
9292
await page.setUserAgent(headers['User-Agent']);
@@ -102,7 +102,7 @@ async function handler(ctx) {
102102
const cookieString = cookies.map((c) => `${c.name}=${c.value}`).join('; ');
103103

104104
const response = await page.content();
105-
await destory();
105+
await destroy();
106106

107107
const $ = load(response);
108108

lib/routes/dailypush/all.ts

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import { load } from 'cheerio';
22

33
import type { Route } from '@/types';
4-
import ofetch from '@/utils/ofetch';
4+
import puppeteer from '@/utils/puppeteer';
55

6-
import { BASE_URL, enhanceItemsWithSummaries, parseArticles } from './utils';
6+
import { BASE_URL, enhanceItemsWithSummaries, fetchPageHtml, parseArticles } from './utils';
77

88
export const route: Route = {
99
path: '/:sort?',
@@ -21,7 +21,7 @@ export const route: Route = {
2121
},
2222
features: {
2323
requireConfig: false,
24-
requirePuppeteer: false,
24+
requirePuppeteer: true,
2525
antiCrawler: false,
2626
supportBT: false,
2727
supportPodcast: false,
@@ -42,17 +42,21 @@ async function handler(ctx) {
4242
const { sort = '' } = ctx.req.param();
4343
const url = sort ? `${BASE_URL}/${sort}` : BASE_URL;
4444

45-
const response = await ofetch(url);
46-
const $ = load(response);
47-
48-
const list = parseArticles($, BASE_URL);
49-
const items = await enhanceItemsWithSummaries(list);
50-
51-
const pageTitle = $('title').text() || 'DailyPush - All';
52-
53-
return {
54-
title: pageTitle,
55-
link: url,
56-
item: items,
57-
};
45+
const browser = await puppeteer();
46+
try {
47+
const html = await fetchPageHtml(browser, url, 'article');
48+
const $ = load(html);
49+
const list = parseArticles($, BASE_URL);
50+
const items = await enhanceItemsWithSummaries(browser, list);
51+
52+
const pageTitle = $('title').text() || 'DailyPush - All';
53+
54+
return {
55+
title: pageTitle,
56+
link: url,
57+
item: items,
58+
};
59+
} finally {
60+
await browser.close();
61+
}
5862
}

lib/routes/dailypush/tags.ts

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import { load } from 'cheerio';
22

33
import type { Route } from '@/types';
4-
import ofetch from '@/utils/ofetch';
4+
import puppeteer from '@/utils/puppeteer';
55

6-
import { BASE_URL, enhanceItemsWithSummaries, parseArticles } from './utils';
6+
import { BASE_URL, enhanceItemsWithSummaries, fetchPageHtml, parseArticles } from './utils';
77

88
export const route: Route = {
99
path: '/tag/:tag/:sort?',
@@ -22,7 +22,7 @@ export const route: Route = {
2222
},
2323
features: {
2424
requireConfig: false,
25-
requirePuppeteer: false,
25+
requirePuppeteer: true,
2626
antiCrawler: false,
2727
supportBT: false,
2828
supportPodcast: false,
@@ -43,17 +43,21 @@ async function handler(ctx) {
4343
const { tag, sort = 'trending' } = ctx.req.param();
4444
const url = `${BASE_URL}/${tag}/${sort}`;
4545

46-
const response = await ofetch(url);
47-
const $ = load(response);
48-
49-
const list = parseArticles($, BASE_URL);
50-
const items = await enhanceItemsWithSummaries(list);
51-
52-
const pageTitle = $('title').text() || `DailyPush - ${tag.charAt(0).toUpperCase() + tag.slice(1)}`;
53-
54-
return {
55-
title: pageTitle,
56-
link: url,
57-
item: items,
58-
};
46+
const browser = await puppeteer();
47+
try {
48+
const html = await fetchPageHtml(browser, url, 'article');
49+
const $ = load(html);
50+
const list = parseArticles($, BASE_URL);
51+
const items = await enhanceItemsWithSummaries(browser, list);
52+
53+
const pageTitle = $('title').text() || `DailyPush - ${tag.charAt(0).toUpperCase() + tag.slice(1)}`;
54+
55+
return {
56+
title: pageTitle,
57+
link: url,
58+
item: items,
59+
};
60+
} finally {
61+
await browser.close();
62+
}
5963
}

lib/routes/dailypush/utils.ts

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import type { CheerioAPI } from 'cheerio';
22
import { load } from 'cheerio';
3+
import type { Browser, Page } from 'rebrowser-puppeteer';
34

45
import type { DataItem } from '@/types';
56
import cache from '@/utils/cache';
6-
import ofetch from '@/utils/ofetch';
7+
import logger from '@/utils/logger';
78
import { parseRelativeDate } from '@/utils/parse-date';
89

910
export const BASE_URL = 'https://www.dailypush.dev';
@@ -19,6 +20,38 @@ export interface ArticleItem {
1920
dailyPushUrl?: string;
2021
}
2122

23+
const allowedRequestTypes = new Set(['document']);
24+
25+
async function preparePage(page: Page) {
26+
await page.setRequestInterception(true);
27+
page.on('request', (request) => {
28+
if (allowedRequestTypes.has(request.resourceType())) {
29+
request.continue();
30+
return;
31+
}
32+
33+
request.abort();
34+
});
35+
}
36+
37+
export async function fetchPageHtml(browser: Browser, url: string, waitForSelector?: string): Promise<string> {
38+
const page = await browser.newPage();
39+
await preparePage(page);
40+
41+
try {
42+
logger.http(`Requesting ${url}`);
43+
await page.goto(url, { waitUntil: 'domcontentloaded' });
44+
45+
if (waitForSelector) {
46+
await page.waitForSelector(waitForSelector);
47+
}
48+
49+
return await page.content();
50+
} finally {
51+
await page.close();
52+
}
53+
}
54+
2255
/**
2356
* Try to parse text as a date. Returns the Date if parsing succeeds and is valid, undefined otherwise.
2457
*/
@@ -40,14 +73,14 @@ function extractAuthor(article: ReturnType<CheerioAPI>): DataItem['author'] {
4073
return undefined;
4174
}
4275

43-
// Get all content spans (exclude separator spans with '•')
76+
// Get all content spans (exclude separator spans with "•")
4477
const allSpans = container.find('span');
4578
const contentSpans: string[] = [];
4679

4780
for (let i = 0; i < allSpans.length; i++) {
4881
const $span = allSpans.eq(i);
4982
const text = $span.text().trim();
50-
// Skip separator spans (contain only '•' or have separator classes)
83+
// Skip separator spans (contain only "•" or have separator classes)
5184
if (text !== '•' && !$span.hasClass('text-slate-300') && !$span.hasClass('dark:text-slate-600')) {
5285
contentSpans.push(text);
5386
}
@@ -127,14 +160,14 @@ function extractPubDate(article: ReturnType<CheerioAPI>): Date | undefined {
127160
return undefined;
128161
}
129162

130-
// Get all content spans (exclude separator spans with '•')
163+
// Get all content spans (exclude separator spans with "•")
131164
const allSpans = container.find('span');
132165
const contentSpans: string[] = [];
133166

134167
for (let i = 0; i < allSpans.length; i++) {
135168
const $span = allSpans.eq(i);
136169
const text = $span.text().trim();
137-
// Skip separator spans (contain only '•' or have separator classes)
170+
// Skip separator spans (contain only "•" or have separator classes)
138171
if (text !== '•' && !$span.hasClass('text-slate-300') && !$span.hasClass('dark:text-slate-600')) {
139172
contentSpans.push(text);
140173
}
@@ -225,23 +258,20 @@ export function parseArticles($: CheerioAPI, baseUrl: string): ArticleItem[] {
225258
}
226259

227260
/**
228-
* Enhance items with full summaries from dailypush article pages
261+
* Enhance items with full summaries from dailypush article pages.
262+
* Uses the provided browser; opens a new tab per URL (document requests only). Caller must close the browser.
229263
*/
230-
export async function enhanceItemsWithSummaries(items: ArticleItem[]): Promise<DataItem[]> {
264+
export async function enhanceItemsWithSummaries(browser: Browser, items: ArticleItem[]): Promise<DataItem[]> {
231265
const itemsWithUrl = items.filter((item) => item.dailyPushUrl !== undefined);
232266
const itemsWithoutUrl: DataItem[] = items.filter((item) => item.dailyPushUrl === undefined);
233267

234-
const enhancedItems: DataItem[] = await Promise.all(
268+
const enhancedItems = await Promise.all(
235269
itemsWithUrl.map((item) =>
236270
cache.tryGet(item.dailyPushUrl!, async () => {
237-
// If we have a dailypush article URL, fetch it for the longer summary
238271
try {
239-
const articleResponse = await ofetch(item.dailyPushUrl!);
240-
const $ = load(articleResponse);
241-
242-
// Find the longer summary/description on the article page
272+
const html = await fetchPageHtml(browser, item.dailyPushUrl!, 'p.font-ibm-plex-sans.leading-relaxed');
273+
const $ = load(html);
243274
const summary = $('p.font-ibm-plex-sans.leading-relaxed').first();
244-
245275
if (summary.length > 0 && summary.text().trim()) {
246276
item.description = summary.text().trim();
247277
}
@@ -254,6 +284,5 @@ export async function enhanceItemsWithSummaries(items: ArticleItem[]): Promise<D
254284
)
255285
);
256286

257-
// Include items without dailyPushUrl as-is
258287
return [...enhancedItems, ...itemsWithoutUrl];
259288
}

lib/routes/iwara/ranking.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ async function handler(ctx) {
5858
const items = await cache.tryGet(
5959
`iwara:ranking:${type}:${sort}:${rating}`,
6060
async () => {
61-
const { page, destory } = await getPuppeteerPage(url, {
61+
const { page, destroy } = await getPuppeteerPage(url, {
6262
onBeforeLoad: async (page) => {
6363
await page.setRequestInterception(true);
6464
page.on('request', (request) => {
@@ -83,7 +83,7 @@ async function handler(ctx) {
8383
pubDate: parseDate(item.createdAt),
8484
}));
8585
} finally {
86-
await destory();
86+
await destroy();
8787
}
8888
},
8989
config.cache.routeExpire,

lib/routes/iwara/subscriptions.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ async function handler() {
6969
const username = config.iwara.username;
7070
const password = config.iwara.password;
7171

72-
const { page, destory } = await getPuppeteerPage(rootUrl, {
72+
const { page, destroy } = await getPuppeteerPage(rootUrl, {
7373
gotoConfig: {
7474
waitUntil: 'domcontentloaded',
7575
},
@@ -113,15 +113,21 @@ async function handler() {
113113
async () => {
114114
const result = await fetchApi(`${apiqRootUrl}/user/token`, {
115115
method: 'POST',
116-
headers: { ...apiHeaders, Authorization: refreshHeaders.authorization },
116+
headers: {
117+
...apiHeaders,
118+
Authorization: refreshHeaders.authorization,
119+
},
117120
});
118121
return { authorization: 'Bearer ' + result.accessToken };
119122
},
120123
60 * 60,
121124
false
122125
);
123126

124-
const authedHeaders = { ...apiHeaders, Authorization: authHeaders.authorization };
127+
const authedHeaders = {
128+
...apiHeaders,
129+
Authorization: authHeaders.authorization,
130+
};
125131

126132
// fetch subscriptions
127133
const [videoResponse, imageResponse] = await Promise.all([
@@ -177,7 +183,9 @@ async function handler() {
177183
}
178184

179185
const apiUrl = item.link.replace('www.iwara.tv', 'apiq.iwara.tv');
180-
const response = await fetchApi(apiUrl, { headers: authedHeaders });
186+
const response = await fetchApi(apiUrl, {
187+
headers: authedHeaders,
188+
});
181189

182190
description = renderSubscriptionImages(response.files ? response.files.filter((f) => f.type === 'image').map((f) => `${imageRootUrl}/image/original/${f.id}/${f.name}`) : [item.imageUrl]);
183191

@@ -202,6 +210,6 @@ async function handler() {
202210
item: items,
203211
};
204212
} finally {
205-
await destory();
213+
await destroy();
206214
}
207215
}

lib/routes/nhentai/util.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ const fetchPage = async (url: string): Promise<string> => {
7373
} catch (error: unknown) {
7474
const status = (error as { status?: number; statusCode?: number }).status ?? (error as { status?: number; statusCode?: number }).statusCode;
7575
if (status === 403) {
76-
const { page, destory } = await getPuppeteerPage(url, {
76+
const { page, destroy } = await getPuppeteerPage(url, {
7777
onBeforeLoad: async (page) => {
7878
const allowedTypes = new Set(['document', 'script', 'xhr', 'fetch']);
7979
await page.setRequestInterception(true);
@@ -83,7 +83,7 @@ const fetchPage = async (url: string): Promise<string> => {
8383
},
8484
});
8585
const content = await page.content();
86-
await destory();
86+
await destroy();
8787
return content;
8888
}
8989
throw error;

lib/routes/perplexity/blog.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async function handler(ctx: Context) {
3838
const limit = Number.parseInt(ctx.req.query('limit') ?? '20', 10);
3939
const rootUrl = 'https://www.perplexity.ai/hub';
4040

41-
const { page, destory, browser } = await getPuppeteerPage(rootUrl, {
41+
const { page, destroy, browser } = await getPuppeteerPage(rootUrl, {
4242
onBeforeLoad: async (page) => {
4343
await page.setRequestInterception(true);
4444
page.on('request', (request) => {
@@ -119,7 +119,9 @@ async function handler(ctx: Context) {
119119
request.resourceType() === 'document' ? request.continue() : request.abort();
120120
});
121121

122-
await contentPage.goto(item.link!, { waitUntil: 'domcontentloaded' });
122+
await contentPage.goto(item.link!, {
123+
waitUntil: 'domcontentloaded',
124+
});
123125

124126
const contentHtml = await contentPage.evaluate(() => document.documentElement.innerHTML);
125127
await contentPage.close();
@@ -148,7 +150,7 @@ async function handler(ctx: Context) {
148150
})
149151
);
150152

151-
await destory();
153+
await destroy();
152154

153155
return {
154156
title: 'Perplexity Blog',

0 commit comments

Comments
 (0)