In [None]:
!pip install beautifulsoup4 pandas lxml Faker requests

In [None]:
import requests
import pandas as pd
import re
from bs4 import BeautifulSoup, Tag
from faker import Faker


class MALScraper:
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({'User-Agent': Faker().user_agent()})

    def save_dataframe(self, dataframe, filepath):
        dataframe.to_csv(filepath, index=False)

    def get_seasonal_anime(self, year=2025, season='winter'):
        url = f"https://myanimelist.net/anime/season/{year}/{season}"
        soup = BeautifulSoup(self.session.get(url).content, 'html.parser')
        items = soup.find_all('div', class_='seasonal-anime')[:50]

        def create_dictionary(item):
            def parse_title(element):
                title = element.text.strip()
                return title if title else None

            def parse_score(element):
                score = element.text.strip()
                return float(score) if score and score != 'N/A' else None

            def parse_members(element):
                members = element.text.strip().replace(',', '')
                return (
                    int(float(members[:-1]) * 1_000_000) if members.endswith('M') else
                    int(float(members[:-1]) * 1_000) if members.endswith('K') else
                    int(members) if members.isdigit() else None
                )

            def parse_episodes(element):
                episodes: re.Match[str] | None = re.search(
                    r'(\d+)\s*eps?', element, re.I)
                return int(episodes.group(1)) if episodes else None

            return {
                'title': parse_title(item.select_one('.h2_anime_title')),
                'score': parse_score(item.select_one(selector='.score')),
                'members': parse_members(item.select_one('.member')),
                'episodes': parse_episodes(item.text)
            }

        data = list(filter(None, map(create_dictionary, items)))
        dataframe = pd.DataFrame(data, index=range(1, len(data) + 1))

        self.save_dataframe(dataframe, f'anime_{season}.csv')

        return dataframe


scraper = MALScraper()
dataframe = scraper.get_seasonal_anime(2023, 'fall')
print(f"Found {len(dataframe)} anime")
dataframe