In [None]:
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import undetected_chromedriver as uc
import requests
import os
import pyautogui
import time
import subprocess

In [None]:
FAILED = 0
SUCCESS = 1
ANIMATED = 2

HEADERS = {
    "Referer": "https://www.pixiv.net/",
    "User-Agent": "Mozilla/5.0"
}

In [None]:
def scroll_down(driver, n_times):
    actions = ActionChains(driver)
    
    for _ in range(n_times):
        actions.scroll_by_amount(0, 1000).perform()  # scroll down by 1000 pixels
        time.sleep(0.3)

def login(driver, email, password):
    email_input = driver.find_element(By.CSS_SELECTOR, "input[placeholder='E-mail address or pixiv ID']")
    email_input.send_keys(email)
    time.sleep(1)
    password_input = driver.find_element(By.CSS_SELECTOR, "input[placeholder='Password']")
    password_input.send_keys(password)
    time.sleep(1)
    password_input.send_keys(Keys.ENTER)

def get_artwork_ids(driver):
    ids = []

    artworks = driver.find_elements(By.XPATH, "//a[contains(@href, '/en/artworks/')]")
    for artwork in artworks:
        id = artwork.get_attribute("data-gtm-value")
        if id is not None:
            ids.append(id)

    return ids

def read_account_info(path):
    info = {}
    
    with open(path) as file:
        lines = file.readlines()

        for line in lines:
            temp = line.split("=")
            key = temp[0].strip()
            value = temp[1].strip()

            info[key] = value

    return info

In [None]:
def download_artwork(driver, artwork_id, save_path, has_gui=False):
    artwork_url = f"https://www.pixiv.net/en/artworks/{artwork_id}"
    
    # show_all_class = "sc-e1dc2ae6-1 fUQgzA"
    # reading_works_class = "sc-e1dc2ae6-1 fUQgzA"#"sc-13c1e204-0 ixmPpS"
    
    img_class = "sc-e1dc2ae6-1 fUQgzA"
    has_multiple_pages = True

    driver.get(artwork_url)

    time.sleep(2)

    # find animated/ugoira. we don't download this
    try:
        canvas = driver.find_element(By.TAG_NAME, "canvas")
        return ANIMATED
    except NoSuchElementException:
        pass

    # set correct <img> class
    try:
        div = driver.find_element(By.XPATH, '//div[@class="sc-9222a8f6-2 kufPoS"]')
        # if div.text == "Reading works":
        #     img_class = reading_works_class
        # elif div.text == "Show all":
        #     img_class = show_all_class
    except NoSuchElementException:
        # img_class = show_all_class
        has_multiple_pages = False
    
    # print(f"img_class: {img_class}")
    
    # find number of pages
    if has_multiple_pages:
        n_pages_div = driver.find_elements(By.XPATH, '//div[@class="sc-b5e6ab10-0 krtyqW"]')[0]
        span = n_pages_div.find_element(By.TAG_NAME, 'span')
        n_pages = int(span.text.strip().split("/")[-1])
    else:
        n_pages = 1
    
    # print(f"n_pages: {n_pages}")

    # get image base URL
    img_element = driver.find_element(By.XPATH, f'//img[(@class="{img_class}")]')
    base_img_url = img_element.get_attribute("src").split("_")[0]
    
    # download
    os.makedirs(save_path, exist_ok=True)

    for i in range(n_pages):
        img_url = base_img_url + f"_p{i}_master1200.jpg"
        file_name = f"{artwork_id}_p{i}.jpg"
        
        response = requests.get(img_url, headers=HEADERS)

        if response.status_code != 200:
            print(f"FAILED to download {file_name}")
            return FAILED

        with open(os.path.join(save_path, file_name), "wb") as f:
            f.write(response.content)
            print(f"Downloaded {file_name}")

    return SUCCESS

# Crawl

In [None]:
save_path = "temp_artworks"

account_info = read_account_info("account.txt")
email = account_info["email"]
password = account_info["password"]
nickname = account_info["nickname"]

In [None]:
artist_id = 

In [None]:
current_page = 1

# construct artist page
artist_page = "https://www.pixiv.net/en/users/artist_id/artworks/?p=current_page"
artist_page = artist_page.replace("artist_id", str(artist_id))
current_artist_page = artist_page.replace("current_page", str(current_page))

artwork_ids = []

# set up Chrome WebDriver
driver = uc.Chrome(use_subprocess=False)
driver.maximize_window()

login_page = "https://accounts.pixiv.net/login"
driver.get(login_page)

# log in
print("Log in ... START")
# find email and password field
login(driver, email, password)
print("Log in ... DONE")

time.sleep(2)

# find all artwork's ids
print("Find all artwork's ids ... START")
has_next_page = True

while has_next_page:
    print(f"Page: {current_page}")
    
    driver.get(current_artist_page)

    scroll_down(driver, n_times=10)
    current_page_ids = get_artwork_ids(driver)

    print(f"Found {len(current_page_ids)} artworks: {current_page_ids}")

    artwork_ids.extend(current_page_ids)

    # check if next page exists
    # find previous and next button
    btns = driver.find_elements(By.XPATH, '//a[contains(@class, "sc-ddbdb82a-2 jnCvtc sc-ddbdb82a-1")]')
    hidden_attr = btns[-1].get_attribute("hidden")
    # True if no hidden attribute (next page is available)
    has_next_page = (hidden_attr is None)

    current_page += 1
    current_artist_page = artist_page.replace("current_page", str(current_page))
    
    time.sleep(2)
    
print("Find all artwork's ids ... DONE")
print(f"Found a total of {len(artwork_ids)} artworks\n")

# start downloadinng
failed = []
animated = []

for artwork_id in artwork_ids:
    try:
        return_val = download_artwork(driver, artwork_id, save_path)
    except Exception as e:
        print(f"Error downloading artwork {artwork_id}: {e}")
        return_val = FAILED

    if return_val == FAILED:
        failed.append(artwork_id)
    elif return_val == ANIMATED:
        animated.append(artwork_id)

driver.quit()

if failed:
    print(f"Failed: {failed}")
if animated:
    print(f"Animated: {animated}")

# Batch-save

In [None]:
folder_path = r"C:\Users\PC MY TU\Desktop\temp_artworks"

file_names = sorted(
    [f for f in os.listdir(folder_path) if not f.endswith(".ini")],
    key=lambda x: (len(x), x)  # sort by length then alphabetically
)

n_files = len(file_names)
print(f"Total files: {n_files}")

interval = max(1, n_files // 10) # 1/10 interval

# minimize all windows, prevent accidentally closing this file
time.sleep(1)
pyautogui.hotkey("winleft", "d")
time.sleep(1)

for i, file_name in enumerate(file_names, start=1):
    file_path = os.path.join(folder_path, file_name)

    # open the image with Paint
    subprocess.Popen(["mspaint.exe", file_path])
    time.sleep(0.5)

    pyautogui.hotkey("ctrl", "s")
    time.sleep(0.3)

    pyautogui.hotkey("alt", "f4")
    time.sleep(0.5)

    # print count at every intervalT
    if i % interval == 0 or i == n_files:
        print(f"{i}/{n_files}, {100 * i / n_files:.2f}%")

print("DONE")
pyautogui.alert("Processing complete!", "Done")

In [None]:
import os

def rename_files_in_folder(folder_path, new_base_name="file"):
    # List all files in the folder
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    # Sort the files (optional, for consistent ordering)
    files.sort()
    
    for i, file_name in enumerate(files, start=1):
        if "master" not in file_name:
            continue
            
        file_name_temp = os.path.splitext(file_name)[0]
        extension = os.path.splitext(file_name)[1]

        components = file_name_temp.split("_")
        components.pop()
        new_name = "_".join(components) + extension
        
        old_path = os.path.join(folder_path, file_name)
        new_path = os.path.join(folder_path, new_name)
        
        os.rename(old_path, new_path)
        print(f"{file_name} -> {new_name}")

# Example usage
folder_path = r"C:\Users\PC MY TU\Desktop\temp_artworks"
rename_files_in_folder(folder_path)