In [None]:
import requests
from bs4 import BeautifulSoup
import re
import os
import shutil

In [None]:
YOUR_PROXIES = {
    'http': 'http://127.0.0.1:12369',
    'https': 'http://127.0.0.1:12369',
}
PATH_TO_SAVE = "C:/Users/kody/comics/"

In [None]:
session = requests.Session()
session.proxies = YOUR_PROXIES

In [None]:
def run(_book_url):
    print("获取漫画( {} )的主页".format(_book_url))

    while True:
        try:
            response = session.get(_book_url, timeout=3)
            print("获取漫画主页成功")
            print("解析漫画主页")
            html = BeautifulSoup(response.text, "html.parser")
            title = html.select(
                "body > div:nth-child(2) > section > div.banner_detail_form > div.info > h1"
            )[0].text
            print("漫画名: {}".format(title))
            break
        except Exception as e:
            print("获取漫画主页失败({})，重试中...".format(e))

    comic_dir = os.path.join(PATH_TO_SAVE, title)
    print("尝试创建文件夹: {}".format(comic_dir))
    try:
        os.mkdir(comic_dir)
        print("创建文件夹成功")
    except Exception as e:
        print("创建文件夹失败，可能已存在")

    print("尝试从文件夹中获取已存在的章节")
    existed_chapter_no_set = set()
    for chapter_dir in os.listdir(comic_dir):
        existed_chapter_no_set.add(int(chapter_dir[:4]))
    print("目前已存在 {} 章".format(len(existed_chapter_no_set)))

    print("获取漫画章节列表")
    chapter_list = []
    chapter_url_prefix = "https://ikanwzd.top"
    for li in html.select("#detail-list-select")[0].find_all("li"):
        chapter_url = chapter_url_prefix + li.a["href"]
        try:
            chapter_no = int(re.match(r"^第(\d+)(話|话)-.+$", li.a.text).group(1))
            chapter_name = re.match(r"^第\d+(話|话)-(.+)$", li.a.text).group(2)
            chapter_name = re.sub(r'[<>:"/\\|?*]', "", chapter_name)
        except Exception as e:
            continue

        if chapter_no in existed_chapter_no_set:
            continue

        chapter_list.append((chapter_no, chapter_url, chapter_name))
    chapter_list = sorted(chapter_list, key=lambda x: x[0])
    print("漫画章节列表获取成功, 还需下载 {} 章".format(len(chapter_list)))

    for chapter in chapter_list:
        chapter_no = chapter[0]
        chapter_url = chapter[1]
        chapter_name = chapter[2]

        print("获取漫画( {} )的第 {} 章( {} )".format(title, chapter_no, chapter_name))

        print("创建章节文件夹")
        chapter_dir_name = "{:0>4d} {} {:0>4d}".format(
            chapter_no, chapter_name, chapter_no
        )
        chapter_dir = os.path.join(comic_dir, chapter_dir_name)
        os.mkdir(chapter_dir)

        print("获取漫画第 {} 章的网页".format(chapter_no))
        while True:
            try:
                chapter_url = chapter_url.replace("https://", "http://")
                response = session.get(chapter_url, timeout=3)
                html = BeautifulSoup(response.text, "html.parser")
                image_div_list = html.select("#content > div.comiclist > div")[
                    0
                ].find_all("div")
                print("获取漫画第 {} 章的网页成功".format(chapter_no))
                break
            except Exception as e:
                print("获取漫画第 {} 章的网页失败({})，重试中...".format(chapter_no, e))

        img_no = 1
        for div in image_div_list:
            img_url = div.img["data-original"]
            img_file_name = "{:0>4d}.jpg".format(img_no)
            img_save_path = os.path.join(chapter_dir, img_file_name)

            while True:
                try:
                    response = session.get(img_url, timeout=3, stream=True)
                    with open(img_save_path, "wb") as f:
                        shutil.copyfileobj(response.raw, f)
                    print("保存图片成功: 第{}章 - {}".format(chapter_no, img_file_name))
                    break
                except Exception as e:
                    print("获取图片失败，重试中...")
                    pass

            img_no += 1

        print("创建html文件")
        html_file_path = os.path.join(chapter_dir, "0000.html")
        html_file_content_list = []
        for i in range(1, img_no):
            img_file_name = "{:0>4d}.jpg".format(i)
            html_file_content_list.append("<img src='{}'>".format(img_file_name))
        with open(html_file_path, "w") as f:
            f.write("\n".join(html_file_content_list))
        print("创建html文件成功")

    print("下载完成")

In [None]:
book_url_list = [
    "http://ikanwzd.top/book/418",
    "http://ikanwzd.top/book/627",
    "http://ikanwzd.top/book/612",
    "http://ikanwzd.top/book/591",
    "http://ikanwzd.top/book/483",
    "http://ikanwzd.top/book/676",
    "http://ikanwzd.top/book/584",
    "http://ikanwzd.top/book/691",
    "http://ikanwzd.top/book/599",
    "http://ikanwzd.top/book/634",
    "http://ikanwzd.top/book/622",
    "http://ikanwzd.top/book/87",
    "http://ikanwzd.top/book/605",
    "http://ikanwzd.top/book/580",
    "http://ikanwzd.top/book/578",
    "http://ikanwzd.top/book/570",
    "http://ikanwzd.top/book/528",
    "http://ikanwzd.top/book/500",
    "http://ikanwzd.top/book/479",
    "http://ikanwzd.top/book/81",
    "http://ikanwzd.top/book/619",
    "http://ikanwzd.top/book/679",
    "http://ikanwzd.top/book/6",
]
for book_url in book_url_list:
    run(book_url)