In [11]:
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd
from pathlib import Path

In [3]:
def fetchAnimeContent(anime: bs4.element.Tag):
    # forwarding to anime link
    forwardLink = f"https://aniwatch.to{anime.find('div', class_='film-poster').a['href']}"
    response = requests.get(forwardLink)
    soup = BeautifulSoup(response.text, 'html.parser')
    moreAttr = soup.find('div', class_='ani_detail-stage').find('div', class_='container')

    # extract anime attributes needed
    title = anime.find('div', class_='film-detail').find('h3', class_='film-name').a['title']
    description = moreAttr.find('div', class_='film-description m-hide').div.text.strip()
    genres = [genre.text for genre in moreAttr.find('div', class_='item item-list').find_all('a')]
    poster = anime.find('div', class_='film-poster').find('img')['data-src']

    return title, genres, description, poster, forwardLink

## SCRAPE ANIME MOVIES FROM ZORO

In [5]:
try:
    # fetch the website url
    url = 'https://aniwatch.to/movie'
    response = requests.get(url)

    # fetch the anime page
    soup = BeautifulSoup(response.text, 'html.parser')
    page = soup.find('div', class_='tab-content')
    animes = page.find_all('div', class_='flw-item')

    # container to store anime attributes
    moviesContainer = {'title': [], 'genres': [], 'description': [], 'poster': [], 'url':[]}

    # start scraping anime
    for i in range(1, 26):
        for anime in animes:
            try:
                # finally start storing the anime attributes
                title, genres, description, poster, url = fetchAnimeContent(anime)
                moviesContainer['title'].append(title)
                moviesContainer['genres'].append(genres)
                moviesContainer['description'].append(description)
                moviesContainer['poster'].append(poster)
                moviesContainer['url'].append(url)
            except:
                continue

        # change the animes page link
        url = f'https://aniwatch.to/movie?page={i+1}'
        response = requests.get(url)

        # fetch the anime page
        soup = BeautifulSoup(response.text, 'html.parser')
        page = soup.find('div', class_='tab-content')
        animes = page.find_all('div', class_='flw-item')
except Exception as error:
    print('Error Occured:', error)

In [6]:
anime_movies = pd.DataFrame(moviesContainer)

In [9]:
anime_movies.shape

(850, 4)

In [10]:
anime_movies.head(2)

Unnamed: 0,title,genres,description,poster
0,Detective Conan Movie: The Story of Haibara Ai...,"[Adventure, Police]",The movie will focus on Ai Haibara's past and ...,https://img.flawlessfiles.com/_r/300x400/100/c...
1,Tsurune Movie: Hajimari no Issha,"[School, Sports]",Movie adaptation of Tsurune: Kazemai Koukou Ky...,https://img.flawlessfiles.com/_r/300x400/100/f...


In [None]:
anime_movies.to_csv(Path.cwd()/'datasets/anime_movies.csv')

## SCRAPE ANIME SERIES FROM ZORO

In [None]:
try:
    # fetch the website url
    url = 'https://aniwatch.to/most-favorite'
    response = requests.get(url)

    # fetch the anime page
    soup = BeautifulSoup(response.text, 'html.parser')
    page = soup.find('div', class_='tab-content')
    animes = page.find_all('div', class_='flw-item')

    # container to store anime attributes
    mostFavorites = {'title': [], 'genres': [], 'description': [], 'poster': [], 'url': []}

    # start scraping anime
    for i in range(1, 151):
        for anime in animes:
            try:
                # finally start storing the anime attributes
                title, genres, description, poster, url = fetchAnimeContent(anime)
                mostFavorites['title'].append(title)
                mostFavorites['genres'].append(genres)
                mostFavorites['description'].append(description)
                mostFavorites['poster'].append(poster)
                mostFavorites['url'].append(url)
            except:
                continue

        # change the animes page link
        url = f'https://aniwatch.to/most-favorite?page={i+1}'
        response = requests.get(url)

        # fetch the anime page
        soup = BeautifulSoup(response.text, 'html.parser')
        page = soup.find('div', class_='tab-content')
        animes = page.find_all('div', class_='flw-item')
except Exception as error:
    print('Error Occured:', error)

In [None]:
anime_series = pd.DataFrame(mostFavorites)
anime_series.shape

(5981, 4)

In [None]:
anime_series.to_csv(Path.cwd()/'datasets/anime_series.csv')