From a523c0f6aa74e4ae3a2954561f17fdd035b45620 Mon Sep 17 00:00:00 2001 From: Bionus Date: Fri, 26 Aug 2022 02:54:17 +0200 Subject: [PATCH] Properly parse all E-H display modes (fix #2792) --- src/sites/E-Hentai/model.ts | 137 ++++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 31 deletions(-) diff --git a/src/sites/E-Hentai/model.ts b/src/sites/E-Hentai/model.ts index b9a548765..fa4d2aefe 100644 --- a/src/sites/E-Hentai/model.ts +++ b/src/sites/E-Hentai/model.ts @@ -1,3 +1,70 @@ +function parseCommonImage(item: any, pageUrl: any, preview: any): IImage { + const parsedUrl = Grabber.regexMatch("/g/(?\\d+)/(?[^/]+)/", pageUrl); + + return { + type: "gallery", + page_url: pageUrl, + id: parsedUrl["id"], + md5: parsedUrl["id"] + "/" + parsedUrl["token"], + tokens: { + token: parsedUrl["token"], + category: item.find(".cn, .cs")[0].innerText(), + }, + preview_url: preview.attr("data-src") || preview.attr("src"), + created_at: item.find("[id^=posted_]")[0].innerText(), + name: item.find(".glink")[0].innerText(), + }; +} + +function parseCompactImage(item: any): IImage | null { + // Skip header + if (item.find("th").length > 0) { + return null; + } + + // Skip ads + if (item.find("script").length > 0) { + return null; + } + + const pageUrl = item.find(".glname a")[0].attr("href"); + const preview = item.find(".glthumb img")[0]; + + return { + ...parseCommonImage(item, pageUrl, preview), + gallery_count: item.find(".glthumb")[0].innerHTML().match(/>(\d+) pages? tag.attr("title")), + }; +} + +function parseThumbnailImage(item: any): IImage { + const pageUrl = item.find("a")[0].attr("href"); + const preview = item.find("img")[0]; + + return { + ...parseCommonImage(item, pageUrl, preview), + gallery_count: item.innerHTML().match(/>(\d+) pages? 0) { + return null; + } + + const pageUrl = item.find(".gl1e a")[0].attr("href"); + const preview = item.find(".gl1e img")[0]; + + return { + ...parseCommonImage(item, pageUrl, preview), + gallery_count: item.find(".gl3e")[0].innerHTML().match(/>(\d+) pages? tag.attr("title")), + }; +} + function cssToObject(css: string): any { const ret: any = {}; css.split(";").map((style: string) => { @@ -95,40 +162,48 @@ export const source: ISource = { return "/?page=" + (query.page - 1) + "&f_cats=" + s.cats + "&f_search=" + encodeURIComponent(s.search); }, parse: (src: string): IParsedSearch | IError => { - const rows = src.match(/]*>(.+?)<\/tr>/g); - if (!rows) { - return { error: "Parse error: no tag found" }; - } - const images = rows.map((row: any) => { - const match: any = {}; - match["type"] = "gallery"; - - const urlName = row.match(new RegExp(']*>([^>]+)<')); - const preview = row.match(new RegExp(']* src="([^"]+)"(?: data-src="([^"]+)")?[^>]*>')); - const date = row.match(/>(\d{4}-\d{2}-\d{2} \d{2}:\d{2})([^>]+)')); - const pages = row.match(/>(\d+) pages IImage | null; + if (mode === "m" || mode === "p" || mode === "l") { + itemQuery = "table.itg > tbody > tr, table.itg > tr"; + parseFunction = parseCompactImage; + } else if (mode === "e") { + itemQuery = "table.itg > tbody > tr, table.itg > tr"; + parseFunction = parseExtendedImage; + } else if (mode === "t") { + itemQuery = "div.itg > div.gl1t"; + parseFunction = parseThumbnailImage; + } + + // Parse all images + const images: IImage[] = []; + for (const item of html.find(itemQuery)) { + try { + const image = parseFunction!(item); + if (image) { + images.push(image); + } + } catch (e) { + console.warn("Error parsing image: " + e + " / " + item.innerHTML()); // tslint:disable-line: no-console } + } - const gallery = Grabber.regexMatches("/g/(?\\d+)/(?[^/]+)/", match["page_url"]); - match["id"] = gallery[0]["id"]; - match["token"] = gallery[0]["token"]; - match["md5"] = match["id"] + "/" + match["token"]; - return match; - }); return { images, pageCount: Grabber.countToInt(Grabber.regexToConst("page", ">(?[0-9,]+)]*>(?:>|]*>>)", src)),