In [1]:
import requests
import time
import pandas as pd
import json
from bs4 import BeautifulSoup
import re

In [2]:
class ParseError(Exception):
    pass

class PageError(Exception):
    pass

class Parse:
    def __init__(self, url:str) -> None:
        self.url = url
        assert isinstance(self.url, str), 'url has the type str'
    @property        
    def get_roles(self) -> dict:
        self.page_roles = requests.get(self.url + '/roles', timeout = 2).json()
        self.roles = dict.fromkeys(['characters'],[d.get('character').get('name') for d in 
                                                 filter(lambda roles: 'Main' in roles['roles_russian'], self.page_roles)])

        self.roles['director'] = [d.get('person').get('name') for d in 
                                          filter(lambda roles: 'Режиссёр' in roles['roles_russian'], self.page_roles)]

        self.roles['original'] = [d.get('person').get('name') for d in 
                                                  filter(lambda roles: 'Автор оригинала' in roles['roles_russian'], self.page_roles)]
        
        return self.roles
    def get_add_page(self, add_url:str) -> dict:
        try:
            self.add_page = len(requests.get(self.url + '/' + add_url, timeout = 1).json())
            return {add_url : self.add_page}
        except:
            return {}            
    def __enter__(self) -> 'json':
        self.page = requests.get(self.url, timeout = 3)
        if self.page.status_code == 404:
                raise PageError('не найдена страница', self.url)
        
        try:
            return {**self.page.json(), **self.get_roles, 
                        **self.get_add_page('related'), **self.get_add_page('similar'), **self.get_add_page('external_links')}
        except json.decoder.JSONDecodeError:
            time.sleep(3)
            self.__enter__()
            return {**self.page.json(), **self.get_roles, 
                        **self.get_add_page('related'), **self.get_add_page('similar'), **self.get_add_page('external_links')}
        

            
            
            
    def __exit__(self, exc_type, exc_value, exc_trace) -> None:
        if exc_type:
            raise ParseError(exc_value)  

In [None]:
%%time
start_page = 1
end_page = 2
urls = 'https://shikimori.org/animes/kind/!music/status/released/page/'
data = pd.DataFrame()
for link in range(start_page, end_page):
    req = requests.get(urls + str(link))
    soup_pages = BeautifulSoup(req.text, 'html5lib')
    list_pages = (url.get('data-href') for url in soup_pages.find_all('div', class_ = 'cover linkeable anime-tooltip'))
    for url in list_pages:
        with Parse(url = 'https://shikimori.org/api/animes/' + url.split('animes/')[1]) as page:
            data = data.append(page, ignore_index = True)

In [51]:
data.to_csv('data_shiki.csv')