In [1]:
import requests
from bs4 import BeautifulSoup
import time

# 定义基础URL和请求头
base_url = 'https://myanimelist.net/topanime.php'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

# 存储所有动漫信息的列表
anime_list = []

# 页数控制（每页显示50个动漫）
page = 0

# 循环直到抓取到100个动漫为止
while len(anime_list) < 100:
    # 构建分页URL
    url = f"{base_url}?limit={page * 50}"
    
    # 发送请求
    response = requests.get(url, headers=headers)
    
    # 检查请求是否成功
    if response.status_code != 200:
        print(f"Failed to retrieve page {page + 1}. Status code: {response.status_code}")
        break

    # 使用BeautifulSoup解析页面
    soup = BeautifulSoup(response.text, 'html.parser')

    # 查找每个动漫条目
    anime_entries = soup.find_all('tr', {'class': 'ranking-list'})

    for entry in anime_entries:
        # 抓取标题
        title_tag = entry.find('h3', {'class': 'anime_ranking_h3'})
        title = title_tag.text.strip() if title_tag else 'Title not found'

        # 抓取评分
        score_tag = entry.find('td', {'class': 'score'})
        score = score_tag.text.strip() if score_tag else 'Score not found'

        # 抓取详情页URL
        anime_url_tag = title_tag.find('a') if title_tag else None
        anime_url = anime_url_tag['href'] if anime_url_tag else None

        # 抓取动漫详情页的信息
        if anime_url:
            # 请求动漫详情页
            anime_response = requests.get(anime_url, headers=headers)
            if anime_response.status_code == 200:
                anime_soup = BeautifulSoup(anime_response.text, 'html.parser')

                # 抓取动漫的相关信息
                anime_info = {}

                # 1. 抓取 Anime Information 部分
                info_section = anime_soup.find('div', {'id': 'content'}).find('div', {'class': 'spaceit_pad'})
                if info_section:
                    # 获取类型、集数、状态、播出时间等
                    for row in info_section.find_all('div', class_='spaceit_pad'):
                        key_value = row.get_text(strip=True).split(":")
                        if len(key_value) == 2:
                            key, value = key_value
                            anime_info[key.strip()] = value.strip()

                # 2. 抓取 Genres 部分
                genres = [genre.text.strip() for genre in anime_soup.find_all('span', {'itemprop': 'genre'})]
                anime_info['Genres'] = ', '.join(genres)

                # 3. 抓取 Statistics 部分
                statistics_section = anime_soup.find('div', {'class': 'spaceit_pad'})
                if statistics_section:
                    # 获取评分、排名、流行度等
                    score_tag = anime_soup.find('span', {'itemprop': 'ratingValue'})
                    if score_tag:
                        anime_info['Score'] = score_tag.text.strip()

                    ranked_tag = anime_soup.find(text='Ranked:')
                    if ranked_tag and ranked_tag.parent:
                        anime_info['Ranked'] = ranked_tag.parent.next_sibling.strip()

                    popularity_tag = anime_soup.find(text='Popularity:')
                    if popularity_tag and popularity_tag.parent:
                        anime_info['Popularity'] = popularity_tag.parent.next_sibling.strip()

                    members_tag = anime_soup.find(text='Members:')
                    if members_tag and members_tag.parent:
                        anime_info['Members'] = members_tag.parent.next_sibling.strip()

                    favorites_tag = anime_soup.find(text='Favorites:')
                    if favorites_tag and favorites_tag.parent:
                        anime_info['Favorites'] = favorites_tag.parent.next_sibling.strip()

                # 添加到动漫列表
                anime_list.append({
                    'title': title,
                    'score': score,
                    'anime_info': anime_info
                })
            else:
                print(f"Failed to retrieve anime details from {anime_url}")

        # 如果已经抓取到100个动漫，停止抓取
        if len(anime_list) >= 100:
            break

    # 打印当前进度
    print(f"Page {page + 1} processed. Total anime fetched: {len(anime_list)}")

    # 翻到下一页
    page += 1

    # 为了避免过于频繁的请求，添加一个延迟
    time.sleep(1)

# 打印抓取的动漫信息
for i, anime in enumerate(anime_list, 1):
    print(f"{i}. Title: {anime['title']}, Score: {anime['score']}")
    for key, value in anime['anime_info'].items():
        print(f"   {key}: {value}")
    print("\n")

  ranked_tag = anime_soup.find(text='Ranked:')
  popularity_tag = anime_soup.find(text='Popularity:')
  members_tag = anime_soup.find(text='Members:')
  favorites_tag = anime_soup.find(text='Favorites:')


Page 1 processed. Total anime fetched: 50
Page 2 processed. Total anime fetched: 100
1. Title: Sousou no Frieren, Score: 9.32
   Genres: Adventure, Drama, Fantasy, Shounen
   Score: 9.32
   Ranked: #1
   Popularity: #187
   Members: 941,505
   Favorites: 56,153


2. Title: One Piece Fan Letter, Score: 9.16
   Genres: Action, Adventure, Fantasy, Shounen
   Score: 9.16
   Ranked: #2
   Popularity: #3034
   Members: 57,233
   Favorites: 1,560


3. Title: Fullmetal Alchemist: Brotherhood, Score: 9.09
   Genres: Action, Adventure, Drama, Fantasy, Military, Shounen
   Score: 9.09
   Ranked: #3
   Popularity: #3
   Members: 3,436,962
   Favorites: 229,456


4. Title: Steins;Gate, Score: 9.07
   Genres: Drama, Sci-Fi, Suspense, Psychological, Time Travel
   Score: 9.07
   Ranked: #4
   Popularity: #14
   Members: 2,633,231
   Favorites: 192,228


5. Title: Shingeki no Kyojin Season 3 Part 2, Score: 9.05
   Genres: Action, Drama, Suspense, Gore, Military, Survival, Shounen
   Score: 9.05
   Ran