In [6]:
import os
import csv
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from io import BytesIO
from PIL import Image  # <-- Add this import

URL = "https://www.prydwen.gg/star-rail/light-cones/"
SAVE_DIR = "../images/lc_icons"
os.makedirs(SAVE_DIR, exist_ok=True)

response = requests.get(URL)
soup = BeautifulSoup(response.content, "html.parser")

light_cones = []

for cone_div in soup.select("div.hsr-cone-data"):
    name = cone_div.select_one("h4").get_text(strip=True)
    rarity_tag = cone_div.select_one("strong.rarity-hsr")
    rarity = rarity_tag.get_text(strip=True).replace("★", "") if rarity_tag else None
    path_tag = cone_div.select_one("div.hsr-cone-info p:nth-of-type(2) strong")
    path = path_tag.get_text(strip=True) if path_tag else None

    img_tag = cone_div.find_previous("img") or cone_div.find_next("img")
    img_url = urljoin(URL, img_tag["src"]) if img_tag and img_tag.get("src") else None

    img_path = None
    if img_url:
        # save as PNG instead of WebP
        img_name = name.replace(" ", "_").replace(":", "").replace("/", "_") + ".png"
        img_path = os.path.join(SAVE_DIR, img_name)

        # download and convert webp to png
        img_data = requests.get(img_url)
        image = Image.open(BytesIO(img_data.content)).convert("RGBA")  # ensures transparency preserved
        image.save(img_path, "PNG")

    light_cones.append({
        "name": name,
        "rarity": rarity,
        "path": path,
        "path_to_image": img_path or ""
    })

csv_path = "../data/light_cones.csv"
with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["name", "path", "rarity", "icon_path"])
    writer.writeheader()
    for cone in light_cones:
        writer.writerow({
            "name": cone["name"],
            "path": cone["path"],
            "rarity": cone["rarity"],
            "icon_path": cone["path_to_image"]
        })
print(f"✅ Scraped {len(light_cones)} Light Cones. Metadata saved to {csv_path}")
print(f"🖼️ Images saved as PNG in {SAVE_DIR}")


✅ Scraped 147 Light Cones. Metadata saved to ../data/light_cones.csv
🖼️ Images saved as PNG in ../images/lc_icons
