In [2]:
from bs4 import BeautifulSoup
import requests as r
import pandas as pd
import time

url = 'https://naruto.fandom.com/wiki/List_of_Animated_Media'
result = r.get(url)
content = result.text
soup = BeautifulSoup(content, 'lxml')

#getting hrefs for all episodes of Naruto to be able to scrap data from all pages
eps_table = soup.find('table', {'class': "box table coloured bordered innerbordered style-basic fill-horiz"})
ep_links = []
ep_names = []
jap_airdate = []
eng_airdate = []
for row in eps_table.select("tr")[1:]:
    ep_links.append(row.find('a', href=True)['href'])
    ep_names.append(row.select('td')[0].get_text()[:-1]) #we are taking [:-1] to get rid of \n
    jap_airdate.append(row.select('td')[1].get_text()[:-1])
    eng_airdate.append(row.select('td')[2].get_text()[:-1])
#scraping lists of chars that appeared in each episode, arcs, openings and endings
char_appear_list = []
arcs = []
openings = []
endings = []
for i in range(0, len(ep_links)):
    url = 'https://naruto.fandom.com' + ep_links[i]
    result = r.get(url)
    content = result.text
    soup = BeautifulSoup(content, 'lxml')
    char_table = soup.find('table', {'class': "wikitable fill-horiz cell-align-center"})
    char_list=[]
    for row in char_table.select("tr")[2:]:
        char_list.append(row.find("b").get_text())
    char_appear_list.append(char_list)
    for j in range (0, len(soup.find_all('h3', {'class': "pi-data-label pi-secondary-font"}))):
        if soup.find_all('h3', {'class': "pi-data-label pi-secondary-font"})[j].get_text() == 'Arc':
            arcs.append(soup.find_all('div', {'class': "pi-data-value pi-font"})[j].get_text())
        if soup.find_all('h3', {'class': "pi-data-label pi-secondary-font"})[j].get_text() == 'Opening':
            openings.append(soup.find_all('div', {'class': "pi-data-value pi-font"})[j].get_text())
        if soup.find_all('h3', {'class': "pi-data-label pi-secondary-font"})[j].get_text() == 'Ending':
            endings.append(soup.find_all('div', {'class': "pi-data-value pi-font"})[j].get_text())
    #we need every list to have the same length, so if there is no arc/ending/opening info we put 'None' in the list
    if len(arcs) < i+1:
        arcs.append('None')
    if len(openings) < i+1:
        openings.append('None')
    if len(endings) < i+1:
        endings.append('None')
    time.sleep(0.100)

#getting ratings for each episode from MAL website
ratings = []
for offset in ['', '?offset=100', '?offset=200']:
    url = 'https://myanimelist.net/anime/20/Naruto/episode' + offset
    result = r.get(url)
    content = result.text
    soup = BeautifulSoup(content, 'lxml')

    ep_table = soup.find('table', {'class': "mt8 episode_list js-watch-episode-list"})
    for row in ep_table.select("tr")[1:len(ep_table.select("tr"))]:
        ratings.append(float(row.select('td')[4].get_text()[-3:]))

data = {'Episode Name': ep_names,
        'Japanese Airdate': jap_airdate, 
        'English Airdate': eng_airdate, 
        'Char List': char_appear_list, 
        'Opening': openings, 
        'Ending': endings,
        'Arc': arcs,
        'Rating': ratings}

df = pd.DataFrame(data)
df.to_csv('naruto_website_scraper.csv')

['3 October 2002', '10 October 2002', '17 October 2002', '24 October 2002', '31 October 2002', '7 November 2002', '14 November 2002', '21 November 2002', '28 November 2002', '5 December 2002', '12 December 2002', '19 December 2002', '26 December 2002', '9 January 2003', '16 January 2003', '23 January 2003', '30 January 2003', '6 February 2003', '13 February 2003', '20 February 2003', '27 February 2003', '6 March 2003', '13 March 2003', '20 March 2003', '27 March 2003', '2 April 2003', '2 April 2003', '9 April 2003', '16 April 2003', '23 April 2003', '30 April 2003', '7 May 2003', '14 May 2003', '21 May 2003', '28 May 2003', '4 June 2003', '11 June 2003', '18 June 2003', '2 July 2003', '9 July 2003', '16 July 2003', '23 July 2003', '30 July 2003', '6 August 2003', '13 August 2003', '20 August 2003', '27 August 2003', '3 September 2003', '10 September 2003', '17 September 2003', '24 September 2003', '1 October 2003', '8 October 2003', '15 October 2003', '22 October 2003', '29 October 200