In [1]:
from src import *

import numpy as np
import pandas as pd

from typing import List

# READ
> Don't try to actually run all cells. It would take too long from the delay added <br>
> Just choose which segments require testing.
>
> Reduce `MINIMUM_DELAY` at your own risk of being blocked

> To perform scraping of `Details`, `StatisticsSummary`, `StatisticsSubmissions`, ensure there's a `csv` already in `Season/`.<br>
> Otherwise run `AutomateSeason()` in the `Seasons` segment to generate the database of anime links.

In [2]:
Scraper.MINIMUM_DELAY = 5

# Seasons

In [3]:
assert False

AssertionError: 

In [None]:
def AutomateSeason(YEARS : List[str]):
    for YEAR in YEARS:
        for SEASON in Scraper.SEASONS.__members__.values():
            url = Scraper.build_URL_Seasons(YEAR, SEASON)
            print(url)
    
            try:
                seasonalSoup = Scraper.scrape(url)
                df_Season = pd.DataFrame(SeasonPage.parse(seasonalSoup))
    
                filename = f'Season/{YEAR}_{SEASON.value}.csv'
                df_Season.to_csv(filename)
                print(filename)
    
            except Exception as e:
                print(url, e)

## 1. Scrape All 1996 Seasons Record

In [None]:
AutomateSeason(np.arange(1995, 1996))

## 2. Scrape Modern Anime (2020-2025)

In [None]:
AutomateSeason(np.arange(2020, 2026))

# Details

In [None]:
assert False

## 1. Scrape [First 2, 2 Random, and Last 2] Anime from 1995 Fall

In [None]:
SEASON = Scraper.SEASONS.FALL.value
YEAR = 1995

In [None]:
df = pd.read_csv(f'Season/{YEAR}_{SEASON}.csv', index_col=0)
df_Target = df.iloc[[0,1,
                np.random.randint(2,df.shape[0]-2),
                np.random.randint(2,df.shape[0]-2),
                -2,-1]].copy()

df_Target

In [None]:
df_Details = None

for _, row in df_Target.iterrows():
    output = {}
    newRow = None
    
    try:
        soup = Scraper.scrape(row.Link)
        output = DetailsPage.parse(soup)
        output['Title'] = row.Title

        newRow = pd.DataFrame([output])
        
        if df_Details is None:
            df_Details = newRow
        else:
            df_Details = pd.concat([df_Details, newRow], ignore_index=True)
        
        print(row.Title)
    except Exception as e:
        print(f'ERROR: {row.Link} {e}')

In [None]:
filename = f'Detail/{YEAR}_{SEASON}_sample.csv'
df_Details.to_csv(filename)

## 2. Scrape All Entries From Specific Year and Season

In [None]:
def AutomateDetails(YEAR : int | str, SEASON : Scraper.SEASONS):
    df_Details = None

    for _, row in pd.read_csv(f'Season/{YEAR}_{SEASON.value}.csv').iterrows():
        output = {}
        newRow = None
        
        try:
            soup = Scraper.scrape(row.Link)
            output = DetailsPage.parse(soup)
            output['Title'] = row.Title
    
            newRow = pd.DataFrame([output])
            
            if df_Details is None:
                df_Details = newRow
            else:
                df_Details = pd.concat([df_Details, newRow], ignore_index=True)
            
            print(row.Title)
        except Exception as e:
            print(f'ERROR: {row.Link} {e}')    
        
    filename = f'Detail/{YEAR}_{SEASON.value}.csv'
    df_Details.to_csv(filename)
    print(filename)

In [None]:
AutomateDetails(2024, Scraper.SEASONS.FALL)

# Statistics [1] Summary and [2] Submissions

In [None]:
assert False

## 1. Get the Scores of Every Anime in Season and Year

In [5]:
def AutomateStatSummary(YEAR:int, SEASONS:Scraper.SEASONS) -> pd.DataFrame:
    df_StatSummary = None
    
    for _, row in pd.read_csv(f'Season/{YEAR}_{SEASON.value}.csv', index_col=0).iterrows():
        URL = f'{row.Link}/stats'
        print(URL)
    
        try:
            soup = Scraper.scrape(URL)
            data = StatisticsSummaryPage.parse(soup)
            data['Title'] = row.Title
        
            if df_StatSummary is None: 
                df_StatSummary = pd.DataFrame([data])
            else:
                df_StatSummary = pd.concat([df_StatSummary, pd.DataFrame([data])], ignore_index=True)
        except Exception as e:
            print(f'ERROR URL {e}')
        
    filename = f'Statistics/Summary/{YEAR}_{SEASON.value}.csv'
    print(filename)
    df_StatSummary.to_csv(filename)
    
    return df_StatSummary

In [6]:
#YEAR = 2024
#SEASON = Scraper.SEASONS.FALL

for YEAR in np.arange(2020, 2025):
    for SEASON in Scraper.SEASONS.__members__.values():
        AutomateStatSummary(YEAR, SEASON)

https://myanimelist.net/anime/40748/Jujutsu_Kaisen/stats
https://myanimelist.net/anime/40776/Haikyuu_To_the_Top_Part_2/stats
https://myanimelist.net/anime/41389/Tonikaku_Kawaii/stats
https://myanimelist.net/anime/40454/Dungeon_ni_Deai_wo_Motomeru_no_wa_Machigatteiru_Darou_ka_III/stats
https://myanimelist.net/anime/41433/Akudama_Drive/stats
https://myanimelist.net/anime/40911/Yuukoku_no_Moriarty/stats
https://myanimelist.net/anime/40571/Majo_no_Tabitabi/stats
https://myanimelist.net/anime/40497/Mahouka_Koukou_no_Rettousei__Raihousha-hen/stats
https://myanimelist.net/anime/41619/Munou_na_Nana/stats
https://myanimelist.net/anime/40595/Kimi_to_Boku_no_Saigo_no_Senjou_Aruiwa_Sekai_ga_Hajimaru_Seisen/stats
https://myanimelist.net/anime/41380/100-man_no_Inochi_no_Ue_ni_Ore_wa_Tatteiru/stats
https://myanimelist.net/anime/41345/Noblesse/stats
https://myanimelist.net/anime/41006/Higurashi_no_Naku_Koro_ni_Gou/stats
https://myanimelist.net/anime/41930/Kamisama_ni_Natta_Hi/stats
https://myanimelist

KeyboardInterrupt: 

## 2. Scrape the Latest Viewership across <= 100 Pages for a Given Title

In [None]:
def AutomateStatSubmissions(TITLE : str, LINK : str):
    df_Statistics = None
    
    for i in np.arange(0, 75*100, 75):
        try:
            URL = f'{LINK}/stats?show={i}#members'
            print(URL)
    
            soup = Scraper.scrape(URL)
    
            
            if df_Statistics is None:
                df_Statistics = StatisticsSubmissionPage.parseAsDf(soup)
            else:
                df_Statistics = pd.concat([df_Statistics, StatisticsSubmissionPage.parseAsDf(soup)], ignore_index=True)
            
        except Exception as e:
            print(f'END {e}')
            break
    
    filename = f'Statistics/Submissions/{TITLE}.csv'
    df_Statistics.to_csv(filename)
    print(filename)

### Try EVA

In [None]:
EVA = pd.read_csv('Season/1995_fall.csv', index_col=0).loc[0]

AutomateStatSubmissions(TITLE=EVA.Title, LINK=EVA.Link)