|
| 1 | +import { config } from '@/config'; |
1 | 2 | import { Route, ViewType } from '@/types'; |
2 | | - |
3 | | -import ofetch from '@/utils/ofetch'; |
4 | | -import { load } from 'cheerio'; |
5 | | -import { parseRelativeDate } from '@/utils/parse-date'; |
6 | | -import { puppeteerGet } from './utils'; |
7 | | -import sanitizeHtml from 'sanitize-html'; |
8 | 3 | import cache from '@/utils/cache'; |
| 4 | +import { parseRelativeDate } from '@/utils/parse-date'; |
| 5 | +import { load } from 'cheerio'; |
| 6 | +import { connect, Options } from 'puppeteer-real-browser'; |
| 7 | + |
| 8 | +const realBrowserOption: Options = { |
| 9 | + args: ['--start-maximized'], |
| 10 | + turnstile: true, |
| 11 | + headless: false, |
| 12 | + // disableXvfb: true, |
| 13 | + // ignoreAllFlags:true, |
| 14 | + customConfig: { |
| 15 | + chromePath: config.chromiumExecutablePath, |
| 16 | + }, |
| 17 | + connectOption: { |
| 18 | + defaultViewport: null, |
| 19 | + }, |
| 20 | + plugins: [], |
| 21 | +}; |
| 22 | + |
| 23 | +async function getPageWithPuppeteer(url: string, selector: string): Promise<string> { |
| 24 | + if (config.puppeteerRealBrowserService) { |
| 25 | + const res = await fetch(`${config.puppeteerRealBrowserService}?url=${encodeURIComponent(url)}&selector=${encodeURIComponent(selector)}`); |
| 26 | + const json = await res.json(); |
| 27 | + return (json.data.at(0) || '') as string; |
| 28 | + } else { |
| 29 | + const { page, browser } = await connect(realBrowserOption); |
| 30 | + await page.goto(url, { timeout: 50000 }); |
| 31 | + let verify: boolean | null = null; |
| 32 | + const startDate = Date.now(); |
| 33 | + while (!verify && Date.now() - startDate < 50000) { |
| 34 | + // eslint-disable-next-line no-await-in-loop, no-restricted-syntax |
| 35 | + verify = await page.evaluate((sel) => (document.querySelector(sel) ? true : null), selector).catch(() => null); |
| 36 | + // eslint-disable-next-line no-await-in-loop |
| 37 | + await new Promise((r) => setTimeout(r, 1000)); |
| 38 | + } |
| 39 | + const html = await page.content(); |
| 40 | + await browser.close(); |
| 41 | + return html; |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +function getProfilePage(profileUrl: string): Promise<string> { |
| 46 | + return getPageWithPuppeteer(profileUrl, '.post_box'); |
| 47 | +} |
9 | 48 |
|
10 | 49 | export const route: Route = { |
11 | 50 | path: '/user/:id/:type?', |
@@ -34,76 +73,109 @@ export const route: Route = { |
34 | 73 | }, |
35 | 74 | ], |
36 | 75 | name: 'User Profile - Picnob', |
37 | | - maintainers: ['TonyRL', 'micheal-death', 'AiraNadih', 'DIYgod'], |
| 76 | + maintainers: ['TonyRL', 'micheal-death', 'AiraNadih', 'DIYgod', 'hyoban'], |
38 | 77 | handler, |
39 | 78 | view: ViewType.Pictures, |
40 | 79 | }; |
41 | 80 |
|
42 | 81 | async function handler(ctx) { |
| 82 | + if (!config.puppeteerRealBrowserService && !config.chromiumExecutablePath) { |
| 83 | + throw new Error('PUPPETEER_REAL_BROWSER_SERVICE or CHROMIUM_EXECUTABLE_PATH is required to use this route.'); |
| 84 | + } |
| 85 | + |
43 | 86 | // NOTE: 'picnob' is still available, but all requests to 'picnob' will be redirected to 'pixnoy' eventually |
44 | 87 | const baseUrl = 'https://www.pixnoy.com'; |
45 | 88 | const id = ctx.req.param('id'); |
46 | 89 | const type = ctx.req.param('type') ?? 'profile'; |
47 | 90 | const profileUrl = `${baseUrl}/profile/${id}/${type === 'tagged' ? 'tagged/' : ''}`; |
48 | 91 |
|
49 | | - // TODO: can't bypass cloudflare 403 error without puppeteer |
50 | | - let html; |
51 | | - let usePuppeteer = false; |
52 | | - try { |
53 | | - const data = await ofetch(profileUrl); |
54 | | - html = data; |
55 | | - } catch { |
56 | | - html = await puppeteerGet(profileUrl); |
57 | | - usePuppeteer = true; |
58 | | - } |
| 92 | + const html = await getProfilePage(profileUrl); |
| 93 | + |
59 | 94 | const $ = load(html); |
60 | 95 |
|
61 | 96 | const list = $('.post_box') |
62 | 97 | .toArray() |
63 | 98 | .map((item) => { |
64 | 99 | const $item = $(item); |
65 | | - const sum = $item.find('.sum').text(); |
66 | 100 | const coverLink = $item.find('.cover_link').attr('href'); |
67 | 101 | const shortcode = coverLink?.split('/')?.[2]; |
| 102 | + const image = $item.find('.cover .cover_link img'); |
| 103 | + const title = image.attr('alt') || ''; |
68 | 104 |
|
69 | 105 | return { |
70 | | - title: sanitizeHtml(sum.split('\n')[0], { allowedTags: [], allowedAttributes: {} }), |
71 | | - description: `<img src="${$item.find('.preview_w img').attr('data-src')}" /><br />${sum.replaceAll('\n', '<br>')}`, |
| 106 | + title, |
| 107 | + description: `<img src="${image.attr('data-src')}" /><br />${title}`, |
72 | 108 | link: `${baseUrl}${coverLink}`, |
73 | 109 | guid: shortcode, |
74 | 110 | pubDate: parseRelativeDate($item.find('.time .txt').text()), |
75 | 111 | }; |
76 | 112 | }); |
77 | 113 |
|
78 | | - const newDescription = await Promise.all( |
79 | | - list.map((item) => |
80 | | - cache.tryGet(`picnob:user:${id}:${item.guid}`, async () => { |
81 | | - try { |
82 | | - const html = usePuppeteer |
83 | | - ? await puppeteerGet(item.link) |
84 | | - : await ofetch(item.link, { |
85 | | - headers: { |
86 | | - 'user-agent': 'PostmanRuntime/7.44.0', |
87 | | - }, |
88 | | - }); |
89 | | - const $ = load(html); |
90 | | - if ($('.video_img').length > 0) { |
91 | | - return `<video src="${$('.video_img a').attr('href')}" poster="${$('.video_img img').attr('data-src')}"></video><br />${$('.sum_full').text()}`; |
92 | | - } else { |
93 | | - let description = ''; |
94 | | - for (const slide of $('.swiper-slide').toArray()) { |
95 | | - const $slide = $(slide); |
96 | | - description += `<img src="${$slide.find('.pic img').attr('data-src')}" /><br />`; |
| 114 | + // Fetch all post details concurrently |
| 115 | + // First, get HTML for all posts |
| 116 | + let htmlList: string[]; |
| 117 | + if (config.puppeteerRealBrowserService) { |
| 118 | + // Use puppeteer service for multiple URLs |
| 119 | + htmlList = (await Promise.all( |
| 120 | + list.map((item) => |
| 121 | + cache.tryGet(`picnob:user:${id}:${item.guid}:html`, async () => { |
| 122 | + const selector = '.video_img, .swiper-slide'; |
| 123 | + const res = await fetch(`${config.puppeteerRealBrowserService}?url=${encodeURIComponent(item.link)}&selector=${encodeURIComponent(selector)}`); |
| 124 | + const json = await res.json(); |
| 125 | + return (json.data?.at(0) || '') as string; |
| 126 | + }) |
| 127 | + ) |
| 128 | + )) as string[]; |
| 129 | + } else { |
| 130 | + // Use local puppeteer browser |
| 131 | + const { browser } = await connect(realBrowserOption); |
| 132 | + try { |
| 133 | + htmlList = (await Promise.all( |
| 134 | + list.map((item) => |
| 135 | + cache.tryGet(`picnob:user:${id}:${item.guid}:html`, async () => { |
| 136 | + const page = await browser.newPage(); |
| 137 | + try { |
| 138 | + await page.goto(item.link, { timeout: 50000 }); |
| 139 | + let verify: boolean | null = null; |
| 140 | + const startDate = Date.now(); |
| 141 | + while (!verify && Date.now() - startDate < 50000) { |
| 142 | + // eslint-disable-next-line no-await-in-loop, no-restricted-syntax |
| 143 | + verify = await page.evaluate(() => (document.querySelector('.video_img') || document.querySelector('.swiper-slide') ? true : null)).catch(() => null); |
| 144 | + // eslint-disable-next-line no-await-in-loop |
| 145 | + await new Promise((r) => setTimeout(r, 1000)); |
| 146 | + } |
| 147 | + return await page.content(); |
| 148 | + } catch { |
| 149 | + return ''; |
| 150 | + } finally { |
| 151 | + await page.close(); |
97 | 152 | } |
98 | | - description += $('.sum_full').text(); |
99 | | - return description; |
100 | | - } |
101 | | - } catch { |
102 | | - return ''; |
103 | | - } |
104 | | - }) |
105 | | - ) |
106 | | - ); |
| 153 | + }) |
| 154 | + ) |
| 155 | + )) as string[]; |
| 156 | + } finally { |
| 157 | + await browser.close(); |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + // Process HTML to generate descriptions |
| 162 | + const newDescription = htmlList.map((html) => { |
| 163 | + if (!html) { |
| 164 | + return ''; |
| 165 | + } |
| 166 | + const $ = load(html); |
| 167 | + if ($('.video_img').length > 0) { |
| 168 | + return `<video src="${$('.video_img a').attr('href')}" poster="${$('.video_img img').attr('data-src')}"></video><br />${$('.sum_full').text()}`; |
| 169 | + } else { |
| 170 | + let description = ''; |
| 171 | + for (const slide of $('.swiper-slide').toArray()) { |
| 172 | + const $slide = $(slide); |
| 173 | + description += `<img src="${$slide.find('.pic img').attr('data-src')}" /><br />`; |
| 174 | + } |
| 175 | + description += $('.sum_full').text(); |
| 176 | + return description; |
| 177 | + } |
| 178 | + }); |
107 | 179 |
|
108 | 180 | return { |
109 | 181 | title: `${$('h1.fullname').text()} (@${id}) ${type === 'tagged' ? 'tagged' : 'public'} posts - Picnob`, |
|
0 commit comments