In [5]:
import os
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup

# Add the directory containing the Edge WebDriver to the PATH environment variable
webdriver_path = 'C:/msedgedriver.exe'
os.environ["PATH"] += os.pathsep + webdriver_path

# Function to scrape anime data from a single page
def scrape_anime_data(url):
    driver = webdriver.Edge()  # Initialize WebDriver inside the function to ensure a fresh session
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    anime_list = soup.find_all('li', class_='card')
    
    anime_data = []
    
    for anime in anime_list:
        anime_id = anime['data-id']
        name_elem = anime.find('h3', class_='cardName')
        description_elem = anime.find('a')['title']
        tags_elem = anime.find('div', class_='tags')
        
        name = name_elem.text.strip() if name_elem else 'N/A'
        
        # Extracting rating from the title attribute
        rating_soup = BeautifulSoup(description_elem, 'html.parser')
        rating = rating_soup.find('div', class_='ttRating').text.strip() if rating_soup.find('div', class_='ttRating') else 'N/A'
        
        # Extracting description from the title attribute
        description = rating_soup.find('p').text.strip() if rating_soup.find('p') else 'N/A'
        
        # Extracting tags from the title attribute
        tags = ", ".join([tag.text.strip() for tag in rating_soup.find('div', class_='tags').find_all('li')]) if rating_soup.find('div', class_='tags') else 'N/A'
        
        # Extracting image URL
        img_tag = anime.find('img')
        image_url = img_tag['data-src'] if img_tag and 'data-src' in img_tag.attrs else (img_tag['src'] if img_tag else 'N/A')
        
        anime_data.append({
            'id': anime_id,
            'name': name,
            'rating': rating,
            'description': description,
            'tags': tags,
            'image_url': image_url  # Include image URL in the dictionary
        })
    
    driver.quit()  # Close the WebDriver after scraping each page
    return anime_data

# Define the base URL
base_url = 'https://www.anime-planet.com/anime/all?sort=status_1&order=desc&page='

# List to store all scraped data
all_anime_data = []

# Scrape data from pages 1 to 5
for page_num in range(1, 675):
    url = base_url + str(page_num)
    anime_data = scrape_anime_data(url)
    all_anime_data.extend(anime_data)

# Convert the list of dictionaries into a DataFrame
anime_df = pd.DataFrame(all_anime_data)

# Print the DataFrame
anime_df.to_csv('Anime-Planet')

Unnamed: 0,id,name,rating,description,tags,image_url
23301,16925,Perfect World,4.1,Born into a unique world where villages fight ...,"Action, Adventure, Fantasy, Chinese Animation,...",https://cdn.anime-planet.com/anime/primary/per...
23302,16721,MILGRAM,4.1,Vocaloid composer DECO*27 and The Caligula Eff...,"Drama, Abstract, Crime, Criminals, Psychologic...",https://cdn.anime-planet.com/anime/primary/mil...
23303,15050,Crayon Shin-chan Specials,3.8,This entry currently doesn't have a synopsis. ...,"Comedy, Ecchi, Slice of Life, Crude, Mature Th...",https://cdn.anime-planet.com/anime/primary/cra...
23304,14877,Wu Shen Zhuzai,4.1,"Qin Chen, a legendary figure of the Tianwu con...","Action, Fantasy, Chinese Animation, Short Epis...",https://cdn.anime-planet.com/anime/primary/wu-...
23305,5670,Chibi Maruko-chan (1995),3.5,This entry currently doesn't have a synopsis. ...,"Comedy, Shoujo, Slice of Life, Family Friendly...",https://cdn.anime-planet.com/anime/primary/chi...
