In [67]:
# @title Configuration

coser_id = "92"               # @param {type:"string"}
max_pages = 100               # @param {type:"integer"}
delay_per_page = 0.8          # @param {type:"number"}
resolve_downloads = False      # @param {type:"boolean"}
concurrency = 4               # @param {type:"integer"}
save_csv = True               # @param {type:"boolean"}
save_json = False             # @param {type:"boolean"}
save_html_report = True       # @param {type:"boolean"}
render_thumbnails = True      # @param {type:"boolean"}
custom_filename = ""          # @param {type:"string"}
# Explanation:
# - coser_id: the numeric ID you already used (e.g., "92")
# - max_pages: how many pages to attempt (stops early if no data)
# - resolve_downloads: whether to follow the /en/download/... link to final file URL
# - concurrency: threads for resolving download URLs only

In [68]:
# @title Helpers & Session Setup
import re, time, random
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
from bs4 import BeautifulSoup
import pandas as pd

# progress bar
from tqdm.auto import tqdm
from IPython.display import display, HTML

# Build resilient requests session with retries
from requests.adapters import HTTPAdapter
from urllib3.util import Retry

def make_session():
    session = requests.Session()
    # Retry config: retry on 429/5xx and on connection errors
    retries = Retry(total=5, backoff_factor=0.6,
                    status_forcelist=[429,500,502,503,504],
                    allowed_methods=frozenset(['GET','HEAD','POST']))
    adapter = HTTPAdapter(pool_connections=100, pool_maxsize=100, max_retries=retries)
    session.mount("https://", adapter)
    session.mount("http://", adapter)
    # A small list of user agents to rotate
    user_agents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15",
    ]
    session.headers.update({"User-Agent": random.choice(user_agents)})
    return session

session = make_session()
BASE = "https://galleryepic.com"
PAGE_URL = BASE + "/en/coser/" + str(coser_id) + "/{}"

In [69]:

# @title 1) Scrape pages and gather item metadata (with views/downloads/date and robust file size)
import re, time
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import pandas as pd
import requests

# Fallbacks if earlier setup wasn't run
if 'session' not in globals():
    session = requests.Session()
if 'BASE' not in globals():
    BASE = "https://galleryepic.com"
if 'PAGE_URL' not in globals():
    coser_id = globals().get('coser_id', "92")
    PAGE_URL = BASE + "/en/coser/" + str(coser_id) + "/{}"
if 'max_pages' not in globals():
    max_pages = 100
if 'delay_per_page' not in globals():
    delay_per_page = 0.8

rows = []
seen = set()
cosplayer_name = None

def parse_detail_page(detail_url):
    """
    Fetch detail page to extract views, downloads, creation date, and file size.
    Returns (views:int|None, downloads:int|None, created_date:str|None, file_size:str|None)
    """
    views = downloads = None
    created_date = file_size = None
    try:
        r = session.get(detail_url, timeout=20)
        if r.status_code != 200:
            return views, downloads, created_date, file_size
        soup = BeautifulSoup(r.text, "html.parser")

        # --- 1) Stats block (date, views, downloads) ---
        stats_block = soup.select_one("div.flex.space-x-4.mt-4.mb-6")
        if stats_block:
            ps = stats_block.find_all("p")
            if ps:
                created_date = ps[0].get_text(" ", strip=True)
                for p in ps[1:]:
                    txt = p.get_text(" ", strip=True)
                    m_views = re.search(r"([\d,]+)\s*views?", txt, re.I)
                    m_down = re.search(r"([\d,]+)\s*downloads?", txt, re.I)
                    if m_views:
                        try:
                            views = int(m_views.group(1).replace(",", ""))
                        except:
                            views = None
                    if m_down:
                        try:
                            downloads = int(m_down.group(1).replace(",", ""))
                        except:
                            downloads = None

        # Helper to normalize size like "31 MB", "1.2 GB", "512 KB"
        def normalize_size_text(s):
            if not s:
                return None
            s = re.sub(r'\s+', ' ', s).strip()
            m = re.search(r'([\d.,]+)\s*(KB|MB|GB)\b', s, re.I)
            if not m:
                return s  # fallback to raw cleaned text
            num = m.group(1).replace(",", "")
            unit = m.group(2).upper()
            # keep one decimal if present
            try:
                if '.' in num:
                    numf = float(num)
                    # format reasonably
                    return f"{numf:.2f} {unit}" if numf >= 1 else f"{numf} {unit}"
                else:
                    return f"{int(float(num))} {unit}"
            except:
                return f"{num} {unit}"

        # Pattern to detect size anywhere
        size_re = re.compile(r'[\d.,]+\s*(KB|MB|GB)\b', re.I)

        # --- 2) Primary: look for header div with classes flex + justify-between + items-center ---
        header_block = None
        # BS returns list for 'class' attribute; match all three classes
        def has_header_classes(tag):
            cl = tag.get("class")
            if not cl:
                return False
            return all(x in cl for x in ("flex", "justify-between", "items-center"))
        for div in soup.find_all("div"):
            if has_header_classes(div):
                # ensure it has at least one <p>
                ptags = div.find_all("p")
                if ptags:
                    header_block = div
                    break
        # If header found: prefer second <p>, else scan inside for a size pattern
        if header_block:
            p_tags = header_block.find_all("p")
            size_text = None
            if len(p_tags) >= 2:
                cand = p_tags[1].get_text(" ", strip=True)
                if size_re.search(cand):
                    size_text = cand
            if not size_text:
                # try any p in header_block for size
                for p in p_tags:
                    t = p.get_text(" ", strip=True)
                    if size_re.search(t):
                        size_text = t
                        break
            if size_text:
                file_size = normalize_size_text(size_text)

        # --- 3) Fallback A: any <p class="leading-7"> that matches size ---
        if not file_size:
            for p in soup.find_all("p"):
                cls = p.get("class") or []
                if any("leading-7" in c for c in cls if isinstance(c, str)):
                    t = p.get_text(" ", strip=True)
                    if size_re.search(t):
                        file_size = normalize_size_text(t)
                        break

        # --- 4) Fallback B: search for any text node matching size pattern anywhere, take nearest <p> parent ---
        if not file_size:
            matches = soup.find_all(string=size_re)
            for match in matches:
                # try to get enclosing <p>
                parent_p = match.find_parent("p")
                if parent_p:
                    t = parent_p.get_text(" ", strip=True)
                    file_size = normalize_size_text(t)
                    break
                # else try immediate parent text container
                elif hasattr(match, 'parent'):
                    t = match.parent.get_text(" ", strip=True)
                    if size_re.search(t):
                        file_size = normalize_size_text(t)
                        break

        return views, downloads, created_date, file_size

    except Exception as e:
        # minimal but useful debug info (remove or comment out if noisy)
        print(f"Detail parse error ({detail_url}): {e}")
        return views, downloads, created_date, file_size

# --- main scraping loop (unchanged, but parse_detail_page now returns file size too) ---
for page in range(1, int(max_pages) + 1):
    url = PAGE_URL.format(page)
    try:
        r = session.get(url, timeout=20)
    except Exception as e:
        print(f"Request error on page {page}: {e}. Stopping.")
        break

    if r.status_code == 404:
        print(f"Page {page} returned 404. Stopping.")
        break
    if r.status_code != 200:
        print(f"Page {page} returned {r.status_code}. Stopping.")
        break

    soup = BeautifulSoup(r.text, "html.parser")

    # Cosplayer name (one per coser page)
    if cosplayer_name is None:
        h4 = soup.select_one("h4.scroll-m-20")
        if h4:
            cosplayer_name = h4.get_text(" ", strip=True)

    containers = soup.select("div.space-y-3.relative")
    if not containers:
        print(f"No item containers on page {page}. Stopping early.")
        break

    for c in containers:
        dl = c.select_one('a[href^="/en/download/cosplay/"]')
        main_a = c.select_one('a[href^="/en/cosplay/"]')
        if not dl:
            continue

        href = dl.get("href", "").strip()
        if not href:
            continue
        full_download = urljoin(BASE, href)
        download_id = href.rstrip("/").split("/")[-1]
        key = (download_id, full_download)
        if key in seen:
            continue
        seen.add(key)

        # Cosplay (title)
        cosplay_name = ""
        h3 = None
        if main_a:
            h3 = main_a.select_one("h3")
            if h3:
                cosplay_name = h3.get_text(" ", strip=True)
        if not cosplay_name:
            h3f = c.select_one("h3")
            if h3f:
                h3 = h3f
                cosplay_name = h3f.get_text(" ", strip=True)

        # Photos & Videos
        photos = None
        videos = None
        for ptag in c.find_all("p"):
            txt = ptag.get_text(" ", strip=True)
            if not txt:
                continue
            m_ph = re.search(r'([\d,]+)\s*[Pp]\b', txt)
            m_vid = re.search(r'([\d,]+)\s*[Vv]\b', txt)
            if m_ph:
                try:
                    photos = int(m_ph.group(1).replace(",", ""))
                except:
                    photos = None
            if m_vid:
                try:
                    videos = int(m_vid.group(1).replace(",", ""))
                except:
                    videos = None
        photos_val = int(photos) if photos is not None else 0
        videos_val = int(videos) if videos is not None else 0

        # Origin
        origin = ""
        p_origin = c.select_one("p.text-muted-foreground")
        if p_origin:
            origin = p_origin.get_text(" ", strip=True)

        # Thumbnail & Detail URL
        thumbnail = ""
        detail_url = ""
        if main_a:
            detail_href = main_a.get("href", "").strip()
            if detail_href:
                detail_url = urljoin(BASE, detail_href)
            img = main_a.find("img")
            if img:
                src = img.get("src") or img.get("data-src") or ""
                if src:
                    thumbnail = urljoin(BASE, src)
        else:
            img = c.find("img")
            if img:
                src = img.get("src") or ""
                if src:
                    thumbnail = urljoin(BASE, src)

        # Step 2: scrape detail page for stats + file size
        views = downloads = None
        created_date = file_size = None
        if detail_url:
            views, downloads, created_date, file_size = parse_detail_page(detail_url)
            time.sleep(0.4)  # small delay for detail pages

        rows.append({
            "Page": page,
            "Cosplayer": cosplayer_name or "",
            "Cosplay": cosplay_name or "",
            "Origin": origin or "",
            "Photos": photos_val,
            "Videos": videos_val,
            "Views": views or 0,
            "Downloads": downloads or 0,
            "Date Created": created_date or "",
            "File Size": file_size or "",
            "Download ID": download_id,
            "Download URL": full_download,
            "Thumbnail": thumbnail or "",
            "Detail URL": detail_url or ""
        })

    # polite delay
    time.sleep(delay_per_page)

# Final dataframe — includes File Size
cols = ["Page","Cosplayer","Cosplay","Origin","Photos","Videos",
        "Views","Downloads","Date Created","File Size",
        "Download ID","Download URL","Thumbnail","Detail URL"]
df = pd.DataFrame(rows)
if not df.empty:
    df = df[cols]
print(f"Scraped {len(df)} items for coser (page sample: {PAGE_URL.format(1)}) - cosplayer: {cosplayer_name}")

Page 6 returned 404. Stopping.
Scraped 111 items for coser (page sample: https://galleryepic.com/en/coser/92/1) - cosplayer: Xidaidai


In [70]:

# @title 2) Optionally resolve download links to final URLs (concurrent)
# @markdown This follows redirects to get the final file URL (fast HEAD then GET fallback).
resolved = []  # will be same length as df rows

def resolve_target(url, timeout=20):
    """
    Try HEAD first to follow redirects; fallback to GET stream if HEAD fails.
    Returns (final_url, content_disposition_filename_or_none).
    """
    try:
        # rotate UA header a little per request
        session.headers.update({"User-Agent": session.headers.get("User-Agent")})
        resp = session.head(url, allow_redirects=True, timeout=timeout)
        # Some servers return 405 for HEAD; fallback to GET
        if resp.status_code in (405, 403) or resp.headers.get("Content-Length") is None and resp.status_code == 200:
            # do GET stream but don't download body
            resp = session.get(url, allow_redirects=True, stream=True, timeout=timeout)
        final = resp.url
        cdisp = resp.headers.get("content-disposition")
        # try to close if stream
        try:
            resp.close()
        except:
            pass
        return final, cdisp
    except Exception as e:
        return None, None

if resolve_downloads and not df.empty:
    download_urls = df["Download URL"].tolist()
    final_urls = [None]*len(download_urls)
    cdnames = [None]*len(download_urls)
    with ThreadPoolExecutor(max_workers=max(1, int(concurrency))) as ex:
        futures = {ex.submit(resolve_target, u): idx for idx,u in enumerate(download_urls)}
        for fut in tqdm(as_completed(futures), total=len(futures), desc="Resolving"):
            idx = futures[fut]
            try:
                final, cdisp = fut.result()
            except Exception as e:
                final, cdisp = None, None
            final_urls[idx] = final
            cdnames[idx] = cdisp
    # add to df
    df["Resolved URL"] = final_urls
    # optionally extract filename from content-disposition
    def get_filename_from_cdisp(cdisp):
        if not cdisp:
            return None
        m = re.search(r'filename\*?=(?:UTF-8\'\')?["\']?([^;\r\n"]+)', cdisp)
        if m:
            return m.group(1).strip('"')
        return None
    df["Resolved Filename"] = [get_filename_from_cdisp(x) for x in cdnames]
    print("Resolved download targets added to table.")
else:
    df["Resolved URL"] = None
    df["Resolved Filename"] = None

In [78]:

# @title 3) Save outputs — Modern Material-like HTML, coser profile + asc/desc toggle
import re, html, os, json
from urllib.parse import urljoin
from string import Template
from IPython.display import display, HTML

# User toggles (global variables you can set before running)
render_thumbnails = globals().get("render_thumbnails", True)   # False => much smaller HTML
save_csv = globals().get("save_csv", False)
save_json = globals().get("save_json", False)
save_html_report = globals().get("save_html_report", True)

# helper size parsing
def parse_size_to_bytes(s):
    if not s or not isinstance(s, str):
        return None
    m = re.search(r'([\d.,]+)\s*(B|KB|MB|GB)\b', s, re.I)
    if not m:
        return None
    num = m.group(1).replace(",", "")
    unit = m.group(2).upper()
    try:
        val = float(num)
    except:
        return None
    if unit == "B": return int(val)
    if unit == "KB": return int(val * 1024)
    if unit == "MB": return int(val * 1024 * 1024)
    if unit == "GB": return int(val * 1024 * 1024 * 1024)
    return None

def human_readable_bytes(b):
    if b is None:
        return ""
    mb = b / (1024*1024)
    if mb >= 1024:
        gb = mb / 1024
        return f"{gb:.2f} GB"
    return f"{mb:.1f} MB"

# Fetch coser profile (banner, avatar, name, external links) if possible
def fetch_coser_profile(coser_id):
    profile = {"name": "", "avatar": "", "banner": "", "links": [], "profile_url": ""}
    try:
        coser_url = urljoin(globals().get("BASE","https://galleryepic.com"), f"/en/coser/{coser_id}/1")
        profile["profile_url"] = coser_url
        r = globals().get("session", __import__("requests").Session()).get(coser_url, timeout=12)
        if r.status_code != 200:
            return profile
        from bs4 import BeautifulSoup
        s = BeautifulSoup(r.text, "html.parser")
        # name
        h4 = s.select_one("h4.scroll-m-20, h4")
        if h4:
            profile["name"] = h4.get_text(" ", strip=True)
        # avatar
        img_avatar = s.select_one('img[variant="avatar"], img[src*="static.galleryepic"], img[class*="w-20"], img[class*="avatar"]')
        if img_avatar:
            src = img_avatar.get("src") or img_avatar.get("data-src") or ""
            if src:
                profile["avatar"] = urljoin(globals().get("BASE","https://galleryepic.com"), src)
        # banner
        img_banner = s.select_one('img[variant="banner"], img[class*="banner"], img[src*="static.galleryepic"]')
        if img_banner:
            src = img_banner.get("src") or img_banner.get("data-src") or ""
            if src:
                profile["banner"] = urljoin(globals().get("BASE","https://galleryepic.com"), src)
        # social / external links: look for anchors near avatar area (absolute href with http)
        links = []
        # search for container near avatar
        container = None
        if img_avatar:
            container = img_avatar.find_parent()
        if not container:
            container = s
        for a in container.find_all("a", href=True):
            href = a.get("href")
            if href and href.startswith("http"):
                text = a.get("title") or a.get_text(" ", strip=True) or href
                links.append({"href": href, "text": text})
        # ensure unique
        seen = set()
        final_links = []
        for L in links:
            if L["href"] not in seen:
                final_links.append(L)
                seen.add(L["href"])
        profile["links"] = final_links
    except Exception as e:
        # silent failure is fine
        pass
    return profile

# ensure df exists
if 'df' not in globals() or df is None or df.empty:
    print("No data to save. Run scraping first.")
else:
    # normalize columns and ensure presence
    desired_cols = ["Page","Cosplayer","Cosplay","Origin","Photos","Videos",
                    "Views","Downloads","Date Created","File Size",
                    "Download ID","Download URL","Thumbnail","Detail URL"]
    df_save = df.copy()
    if "Cosplay Name" in df_save.columns and "Cosplay" not in df_save.columns:
        df_save = df_save.rename(columns={"Cosplay Name":"Cosplay"})
    for c in desired_cols:
        if c not in df_save.columns:
            if c in ("Photos","Videos","Views","Downloads"):
                df_save[c] = 0
            else:
                df_save[c] = ""
    for ncol in ("Photos","Videos","Views","Downloads"):
        try:
            df_save[ncol] = pd.to_numeric(df_save[ncol], errors="coerce").fillna(0).astype(int)
        except Exception:
            df_save[ncol] = df_save[ncol].apply(lambda v: int(v) if isinstance(v,(int,float)) else 0)

    # normalize file-size display, compute byte totals
    bytes_list = []
    def norm_size_cell(s):
        if not s or str(s).strip().lower() in ("", "nan"):
            bytes_list.append(None)
            return ""
        b = parse_size_to_bytes(str(s))
        bytes_list.append(b)
        return human_readable_bytes(b) if b else str(s).strip()
    df_save["File Size"] = df_save["File Size"].apply(norm_size_cell)

    total_bytes = sum([b for b in bytes_list if b])
    total_size_display = human_readable_bytes(total_bytes) if total_bytes else "0.0 MB"

    # reorder
    df_save = df_save[desired_cols]

    # prepare base filename
    name_part = (globals().get("custom_filename","") or (globals().get("cosplayer_name") or str(globals().get("coser_id",""))))
    safe_base = re.sub(r'[^A-Za-z0-9_\-]', '_', str(name_part)).strip('_') or f"galleryepic_coser_{globals().get('coser_id','')}"
    base = safe_base

    saved_files = []
    # CSV / JSON saving
    if save_csv:
        csv_name = f"{base}.csv"
        df_save.to_csv(csv_name, index=False, encoding="utf-8")
        saved_files.append(csv_name)
        print("Saved CSV:", csv_name)
    if save_json:
        json_name = f"{base}.json"
        df_save.to_json(json_name, orient="records", force_ascii=False, indent=2)
        saved_files.append(json_name)
        print("Saved JSON:", json_name)

    # fetch coser profile for header
    coser_id = globals().get("coser_id", "")
    coser_profile = fetch_coser_profile(coser_id)

    # cards HTML assembly (safe handling of thumbnail HTML)
    base_site = globals().get("BASE", "https://galleryepic.com")
    cards = []
    for _, r in df_save.iterrows():
        page = html.escape(str(r.get("Page","")))
        cosplayer = html.escape(str(r.get("Cosplayer","") or coser_profile.get("name","")))
        cosplay = html.escape(str(r.get("Cosplay","")))
        origin = html.escape(str(r.get("Origin","")))
        photos = int(r.get("Photos") or 0)
        videos = int(r.get("Videos") or 0)
        views = int(r.get("Views") or 0)
        downloads = int(r.get("Downloads") or 0)
        date_created = html.escape(str(r.get("Date Created","") or ""))
        file_size_display = html.escape(str(r.get("File Size","") or ""))
        b = parse_size_to_bytes(str(r.get("File Size","")) or "")
        size_mb_num = round((b/(1024*1024)), 3) if b else 0.0

        download_id = html.escape(str(r.get("Download ID","")))
        download_url = html.escape(str(r.get("Download URL","")))
        thumb_raw = str(r.get("Thumbnail","") or "")
        if thumb_raw and render_thumbnails:
            thumb_url = html.escape(urljoin(base_site, thumb_raw))
            thumb_html = f'<img src="{thumb_url}" loading="lazy" alt="{cosplay}">'
        else:
            thumb_html = '<div class="no-thumb">No image</div>'

        detail_raw = str(r.get("Detail URL","") or "")
        detail_url = html.escape(urljoin(base_site, detail_raw)) if detail_raw else ""

        stats_line = f"{date_created} • {views} views • {downloads} dl"
        if file_size_display:
            stats_line += f" • {file_size_display}"
        counts_html = f"{photos}P / {videos}V"

        card = (
            '<article class="card" data-name="{name}" data-origin="{origin}" data-photos="{photos}" data-videos="{videos}" '
            'data-views="{views}" data-downloads="{downloads}" data-size-mb="{size_mb}">'
            '<div class="thumb">{thumb}<div class="overlay">{counts}</div></div>'
            '<div class="meta"><div class="title">{cosplay}</div>'
            '<div class="subtitle">{origin}</div>'
            '<div class="stats">{stats}</div>'
            '<div class="actions"><a class="btn" href="{durl}" target="_blank" rel="noopener">Download</a> '
            '<a class="link" href="{turl}" target="_blank" rel="noopener">Detail</a></div></div></article>'
        ).format(
            name=cosplay.lower(),
            origin=origin.lower(),
            photos=photos,
            videos=videos,
            views=views,
            downloads=downloads,
            size_mb=size_mb_num,
            thumb=thumb_html,
            counts=html.escape(counts_html),
            cosplay=cosplay,
            origin_display=origin,
            stats=html.escape(stats_line),
            durl=download_url,
            turl=detail_url
        )
        cards.append(card)
    cards_html = "\n".join(cards)

    # Build Material-like HTML (no external libs). Uses a compact CSS and JS.
    html_template = Template(r"""<!doctype html>
<html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>$safe_base — GalleryEpic</title>
<style>
:root{
  --bg:#f5f7fb; --surface:#ffffff; --muted:#6b7280; --accent:#6750A4; --accent-2:#03a87c;
  --text:#111827; --shadow: 0 6px 18px rgba(2,6,23,0.12);
  --radius:12px;
}
*{box-sizing:border-box}
html,body{height:100%;margin:0;font-family:Inter,system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial;background:var(--bg);color:var(--text);}
.container{max-width:1200px;margin:20px auto;padding:18px}
.header-card{background:linear-gradient(135deg,var(--surface),#fbfbff);border-radius:var(--radius);overflow:hidden;box-shadow:var(--shadow);display:flex;gap:16px;align-items:center;padding:18px}
.banner{height:110px;background-size:cover;background-position:center;border-radius:10px;flex:0 0 24%}
.profile{display:flex;gap:12px;align-items:center;flex:1}
.avatar{width:88px;height:88px;border-radius:14px;overflow:hidden;flex:0 0 88px;box-shadow:0 4px 10px rgba(2,6,23,0.08)}
.avatar img{width:100%;height:100%;object-fit:cover;display:block}
.headline{display:flex;flex-direction:column;gap:6px}
.headline .name{font-weight:700;font-size:18px}
.headline .meta{color:var(--muted);font-size:13px}
.headline .links{display:flex;gap:8px;margin-top:6px}
.icon-link{display:inline-flex;align-items:center;justify-content:center;width:36px;height:36px;border-radius:8px;background:#fff;border:1px solid rgba(0,0,0,0.04);text-decoration:none;color:var(--muted);font-size:14px}
.controls{display:flex;gap:8px;align-items:center}
.input, select{padding:10px;border-radius:10px;border:1px solid rgba(2,6,23,0.06);background:#fff}
.grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(220px,1fr));gap:14px;margin-top:18px}
.card{background:var(--surface);border-radius:10px;overflow:hidden;box-shadow:var(--shadow);display:flex;flex-direction:column;transition:transform .12s;min-height:200px}
.card:hover{transform:translateY(-6px)}
.thumb{position:relative;aspect-ratio:3/4;background:#eee;display:flex;align-items:center;justify-content:center}
.thumb img{width:100%;height:100%;object-fit:cover;display:block}
.no-thumb{padding:18px;color:var(--muted)}
.overlay{position:absolute;right:8px;bottom:8px;background:rgba(0,0,0,0.6);color:#fff;padding:6px 8px;border-radius:8px;font-weight:700;font-size:12px}
.meta{padding:12px;display:flex;flex-direction:column;gap:8px;flex:1}
.title{font-weight:600;font-size:15px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
.subtitle{font-size:13px;color:var(--muted)}
.stats{font-size:13px;color:var(--muted)}
.actions{display:flex;gap:8px;flex-wrap:wrap;margin-top:auto}
.btn{background:linear-gradient(90deg,var(--accent),var(--accent-2));color:white;padding:8px 10px;border-radius:10px;text-decoration:none;font-weight:600}
.link{color:var(--muted);text-decoration:none;padding:8px;border-radius:8px;border:1px solid transparent;background:transparent}
.toolbar{display:flex;gap:8px;align-items:center;margin-top:12px}
.small{font-size:12px;color:var(--muted)}
.toggle{padding:8px;border-radius:8px;border:1px solid rgba(2,6,23,0.06);background:#fff;cursor:pointer}
@media (max-width:720px){.header-card{flex-direction:column;align-items:flex-start}.banner{width:100%;height:120px;flex:unset}.profile{width:100%}}
</style>
</head><body>
<div class="container">
  <div class="header-card">
    <div class="banner" style="background-image:url('$banner');"></div>
    <div class="profile">
      <div class="avatar"><img src="$avatar" alt="$coser_name"></div>
      <div class="headline">
        <div class="name">$coser_name</div>
        <div class="meta">Scraped $total_items items • $total_photos photos • $total_videos videos • $total_size</div>
        <div class="links">$links_html</div>
      </div>
      <div style="flex:1"></div>
      <div style="display:flex;flex-direction:column;gap:8px;align-items:flex-end">
        <div class="toolbar">
          <input id="search" class="input" placeholder="Search cosplay or origin" />
          <select id="originFilter" class="input"><option value="">All origins</option>$origin_options</select>
          <select id="sortBy" class="input">
            <option value="default">Sort: Default</option>
            <option value="photos">Photos</option>
            <option value="videos">Videos</option>
            <option value="views">Views</option>
            <option value="downloads">Downloads</option>
            <option value="size">Size</option>
          </select>
          <button id="sortDir" class="toggle" title="Toggle ascending/descending">↓</button>
        </div>
        <div class="small">Coser profile: <a href="$profile_url" target="_blank" rel="noopener">$profile_url</a></div>
      </div>
    </div>
  </div>

  <section class="grid" id="cards">
    $cards
  </section>
</div>

<script>
const cardsRoot = document.getElementById('cards');
const search = document.getElementById('search');
const originFilter = document.getElementById('originFilter');
const sortBy = document.getElementById('sortBy');
const sortDirBtn = document.getElementById('sortDir');

let sortDesc = true; // default desc
function updateSortDirUI(){ sortDirBtn.textContent = sortDesc ? '↓' : '↑' }

function nodesArray(){ return Array.from(cardsRoot.querySelectorAll('.card')) }
function normalize(s){ return (s||'').toString().toLowerCase(); }

function filterAndSort(){
  const q = normalize(search.value);
  const origin = normalize(originFilter.value);
  let nodes = nodesArray();
  nodes.forEach(n=>{
    const name = normalize(n.dataset.name || '');
    const o = normalize(n.dataset.origin || '');
    const matches = (q==='' || name.includes(q) || o.includes(q)) && (origin==='' || o===origin);
    n.style.display = matches ? '' : 'none';
  });
  nodes = nodes.filter(n=> n.style.display !== 'none');
  const key = sortBy.value;
  const desc = sortDesc ? -1 : 1;
  function val(n,k){
    if(k==='photos') return parseFloat(n.dataset.photos||0);
    if(k==='videos') return parseFloat(n.dataset.videos||0);
    if(k==='views') return parseFloat(n.dataset.views||0);
    if(k==='downloads') return parseFloat(n.dataset.downloads||0);
    if(k==='size') return parseFloat(n.dataset.sizeMb||0);
    return 0;
  }
  if(key !== 'default'){
    nodes.sort((a,b)=> desc * (val(b,key) - val(a,key)));
  }
  nodes.forEach(n=> cardsRoot.appendChild(n));
}

search.addEventListener('input', filterAndSort);
originFilter.addEventListener('change', filterAndSort);
sortBy.addEventListener('change', filterAndSort);
sortDirBtn.addEventListener('click', ()=>{
  sortDesc = !sortDesc;
  updateSortDirUI();
  filterAndSort();
});

updateSortDirUI();
filterAndSort();
</script>
</body></html>""")

    # prepare small links html for header
    links = []
    for L in coser_profile.get("links", []):
        href = html.escape(L.get("href",""))
        text = html.escape(L.get("text","link"))
        links.append(f'<a class="icon-link" href="{href}" target="_blank" rel="noopener">{text[:18]}</a>')
    links_html = " ".join(links) if links else '<span class="small">No external links</span>'

    header_filled = html_template.safe_substitute({
        "safe_base": html.escape(base),
        "banner": html.escape(coser_profile.get("banner") or ""),
        "avatar": html.escape(coser_profile.get("avatar") or ""),
        "coser_name": html.escape(coser_profile.get("name") or (globals().get("cosplayer_name") or "")),
        "total_items": len(df_save),
        "total_photos": int(df_save["Photos"].sum()),
        "total_videos": int(df_save["Videos"].sum()),
        "total_size": total_size_display,
        "links_html": links_html,
        "origin_options": "\n".join([f'<option value=\"{html.escape(o)}\">{html.escape(o)}</option>' for o in sorted([o for o in df_save["Origin"].astype(str).unique() if o.strip()])]),
        "cards": cards_html,
        "profile_url": html.escape(coser_profile.get("profile_url") or "")
    })

    # write HTML file
    html_name = f"{base}_modern.html"
    with open(html_name, "w", encoding="utf-8") as fh:
        fh.write(header_filled)
    saved_files.append(html_name)
    print("Saved modern Material-like HTML report:", html_name)
    display(HTML(f'<a href="{html_name}" target="_blank">Open report: {html_name}</a>'))

    # final print
    print("Saved files:", saved_files)

Saved CSV: Xidaidai.csv
Saved modern Material-like HTML report: Xidaidai_modern.html


Saved files: ['Xidaidai.csv', 'Xidaidai_modern.html']


In [77]:

# @title 4) Display the table in Colab UI
if df.empty:
    print("No data to display.")
else:
    try:
        import caas_jupyter_tools
        caas_jupyter_tools.display_dataframe_to_user("GalleryEpic Results", df)
    except Exception:
        display(df)

Unnamed: 0,Page,Cosplayer,Cosplay,Origin,Photos,Videos,Views,Downloads,Date Created,File Size,Download ID,Download URL,Thumbnail,Detail URL,Resolved URL,Resolved Filename
0,1,Xidaidai,Phoebe,Wuthering Waves,18,0,0,0,9/1/2025,31 MB,8826,https://galleryepic.com/en/download/cosplay/8826,https://static.galleryepic.xyz/image/df326b42-...,https://galleryepic.com/en/cosplay/8826,,
1,1,Xidaidai,Hyacine,Honkai,21,0,509,153,8/30/2025,64 MB,8811,https://galleryepic.com/en/download/cosplay/8811,https://static.galleryepic.xyz/image/a32b79bc-...,https://galleryepic.com/en/cosplay/8811,,
2,1,Xidaidai,Cartethyia,Wuthering Waves,17,0,5778,451,8/17/2025,44 MB,8609,https://galleryepic.com/en/download/cosplay/8609,https://static.galleryepic.xyz/image/ad1b4014-...,https://galleryepic.com/en/cosplay/8609,,
3,1,Xidaidai,Sunohara Kokona,Blue Archive,18,0,7114,511,8/1/2025,47 MB,8471,https://galleryepic.com/en/download/cosplay/8471,https://static.galleryepic.xyz/image/5447bf60-...,https://galleryepic.com/en/cosplay/8471,,
4,1,Xidaidai,Halford,Azur Lane,20,0,4471,289,8/1/2025,82 MB,8459,https://galleryepic.com/en/download/cosplay/8459,https://static.galleryepic.xyz/image/f114fbcd-...,https://galleryepic.com/en/cosplay/8459,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,5,Xidaidai,Tamamo-no-Mae,Fate,29,2,2347,335,5/12/2022,178 MB,1397,https://galleryepic.com/en/download/cosplay/1397,https://static.galleryepic.xyz/image/0b210e1b-...,https://galleryepic.com/en/cosplay/1397,,
107,5,Xidaidai,Nero Claudius,Fate,30,0,3404,403,5/12/2022,31 MB,1396,https://galleryepic.com/en/download/cosplay/1396,https://static.galleryepic.xyz/image/3ed291ab-...,https://galleryepic.com/en/cosplay/1396,,
108,5,Xidaidai,Shuten-Douji,Fate,47,2,3246,669,5/12/2022,249 MB,1395,https://galleryepic.com/en/download/cosplay/1395,https://static.galleryepic.xyz/image/7eeeb2d8-...,https://galleryepic.com/en/cosplay/1395,,
109,5,Xidaidai,Astolfo,Fate,45,0,4000,330,5/12/2022,105 MB,1394,https://galleryepic.com/en/download/cosplay/1394,https://static.galleryepic.xyz/image/5394a4a3-...,https://galleryepic.com/en/cosplay/1394,,
