In [1]:
# Import necessary packages for web scraping
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [10]:
# Read data from 'one_piece_characters.csv' into a dataframe 
op_names = pd.read_csv('one_piece_characters.csv')

name_list = []

for name in op_names['Name'].tolist():
    formatted_name = name.replace(' ', '_')
    
    URL = "https://onepiece.fandom.com/wiki/" + formatted_name
    try:
        r = requests.get(URL)
        r.raise_for_status()
        
        soup = BeautifulSoup(r.content, 'html.parser')
        table_statistics = soup.find('aside', attrs={'class': 'portable-infobox'}).findAll('section')
        
        # Character Data
        character_data = {}
        for section in table_statistics[:1]:
            for item in section.findAll('div', {'class': 'pi-item'}):
                label = item.find('h3', class_='pi-data-label').text.strip()
                value = item.find('div', class_='pi-data-value').text.strip()
                character_data[label] = value
        
        # Devil Fruit Data
        devil_fruit_data = {}
        for section in table_statistics[1:]:
            for item in section.findAll('div', {'class': 'pi-item'}):
                label = 'Devil Fruit ' + item.find('h3', class_='pi-data-label').text.strip()
                value = item.find('div', class_='pi-data-value').text.strip()
                devil_fruit_data[label] = value
        
        # Merge character_data and devil_fruit_data
        all_data = {**character_data, **devil_fruit_data}
        
        data_details = {name: all_data}
        name_list.append(data_details)
        
    except requests.exceptions.HTTPError as e:
        print("Error " + name)

    except Exception as e:
        print("An unexpected error occurred for {}: {}".format(name, str(e)))
        
print("Finished scrapping") 

Error Jack-in-the-Box
An unexpected error occurred for Minatomo: 'NoneType' object has no attribute 'findAll'
An unexpected error occurred for Minatomo: 'NoneType' object has no attribute 'findAll'
Error Mr. Sacrifice
Error Wall Zombie
Finished scrapping


In [15]:
print(name_list[0])

{'A O': {'Japanese Name:': 'A・O', 'Romanized Name:': 'Ā Ō', 'Official English Name:': 'A.O. (VIZ Media and FUNimation subs);A.O (FUNimation dub)', 'Debut:': 'Chapter 551; Episode 460[1]', 'Affiliations:': 'A O Pirates;[2] Subordinate of the Whitebeard Pirates[1][2]', 'Occupations:': 'Pirate; Captain[1][2]', 'Status:': 'Unknown', 'Birthday:': 'January 15th[3]', 'Japanese VA:': 'Kōhei Fukuhara', 'Funi English VA:': 'Mark Fickert'}}


In [17]:
# Create a dataframe with the combined data.
df = pd.concat([pd.DataFrame(result) for result in name_list], axis=1)
df = df.transpose()

df.head(3)

Unnamed: 0,Affiliations:,Birthday:,Debut:,Funi English VA:,Japanese Name:,Japanese VA:,Occupations:,Official English Name:,Romanized Name:,Status:,...,Features:,Devil Fruit Affiliations:,Devil Fruit Funi English VA:,Devil Fruit Japanese VA:,Homeland:,Captain:,Total Bounty:,Fighting Style Focus:,Literal Meaning:,User(s):
A O,A O Pirates;[2] Subordinate of the Whitebeard ...,January 15th[3],Chapter 551; Episode 460[1],Mark Fickert,A・O,Kōhei Fukuhara,Pirate; Captain[1][2],A.O. (VIZ Media and FUNimation subs);A.O (FUNi...,Ā Ō,Unknown,...,,,,,,,,,,
Abdullah,Ideo Pirates[2][3]; Straw Hat Grand Fleet[4],,Chapter 704; Episode 633[1],Sean O'Connor,アブドーラ,Keiji Hirai,"Pirate[3], Criminal; Bounty Hunter (former)[1]",Abdullah,Abudōra,Alive,...,,,,,,,,,,
Absalom,Thriller Bark Pirates (Mysterious Four)[2] (fo...,December 30th[5],Chapter 444; Episode 339[1],Andrew Chandler,アブサロム,Hiroaki Miura,Reporter[3]Soldier Zombie and General Zombie L...,Absalom,Abusaromu,Deceased,...,,,,,,,,,,


In [20]:
# Display information about the dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1376 entries, A O to Zunesha
Data columns (total 61 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   Affiliations:                       1169 non-null   object
 1   Birthday:                           712 non-null    object
 2   Debut:                              1351 non-null   object
 3   Funi English VA:                    940 non-null    object
 4   Japanese Name:                      1369 non-null   object
 5   Japanese VA:                        1004 non-null   object
 6   Occupations:                        1162 non-null   object
 7   Official English Name:              1366 non-null   object
 8   Romanized Name:                     1367 non-null   object
 9   Status:                             1350 non-null   object
 10  Age at Death:                       39 non-null     object
 11  Age:                                460 non-null    obje

In [21]:
# Save dataframe to csv file
csv_file = "onepiece_character_detail.csv"
df.to_csv(csv_file, index=False)