In [130]:
from bs4 import BeautifulSoup
import requests
import re
import time

In [131]:
url = 'https://onepiece.fandom.com/wiki/Eustass_Kid' # Char's wiki URL

response = requests.get(url)
html = response.content
soup = BeautifulSoup(html,'html.parser')

## Getting the name of the char

In [132]:
#--------GET NAME----------#
def get_name(soup):
    try:
        aside = soup.find_all('aside')
        get_name_text = aside[0].find('h2')
        return get_name_text.get_text()
    except:
        return 'Unknown'

#--------GET NAME----------#

## Getting the affiliation of the char

In [133]:
#-------------GET AFFILIATION---------------------------#
def get_affiliation(soup):
    try:
        affiliation = soup.find(attrs={"data-source": "affiliation"})
        get_affiliation_text = affiliation.find('a')
        return get_affiliation_text.get_text()
    except:
        return 'Unknown'
#-------------GET AFFILIATION---------------------------#

## Getting the Devil Fruit of the char

In [134]:
#------------GET DEVIL FRUIT---------------------------#
def get_devil_fruit(soup):
    try:
        devilfruit = soup.find(attrs={"data-source": "dftype"})
        get_devilfruit_text = devilfruit.find('a')
        if get_devilfruit_text == 'Mythical Zoan':
           get_devilfruit_text = 'Zoan'
        return get_devilfruit_text.get_text()
    except:
        return 'None'
#------------GET DEVIL FRUIT---------------------------#

## Getting the Haki Users ##

In [135]:
##---------------GET HAKI USER------------------##
def get_haki_users():
    haki_user_type = 0  # 0 for all, 1 for sao, 2 for oaa, 3 for oo, 4 for oa 
    all_haki = []
    sao = []
    oaa = []
    oo = []
    oa = []
    
    haki_users = {'All Types': None,
                  'Supreme King And Observation': None,
                  'Observation And Armament': None,
                  'Only Observation': None,
                  'Only Armament': None}

    haki_url = 'https://onepiece.fandom.com/wiki/Haki'
    haki_response = requests.get(haki_url)
    haki_html  = haki_response.content
    haki_soup = BeautifulSoup(haki_html, 'html.parser')

    haki_tbody = haki_soup.find_all('small')

    for current_user in haki_tbody:
        user = current_user.get_text()

        if user == 'John Giant':
            break
        
        if user == 'Marcus Mars':
            haki_user_type = 1
        elif user == 'Boa Marigold':
            haki_user_type = 2
        elif user == 'Satori':
            haki_user_type = 3
        elif user == 'Marguerite':
            haki_user_type = 4

        if user == 'Joy Boy':
            continue
        elif haki_user_type == 0: 
            all_haki.append(user)
        elif haki_user_type == 1:
            sao.append(user)
        elif haki_user_type == 2: 
            oaa.append(user)
        elif haki_user_type == 3: 
            oo.append(user)
        elif haki_user_type == 4: 
            oa.append(user)

    # Haki kullanıcılarını temizleme
    all_haki = [re.sub(r'\s*\(.*?\)|\xa0|\†|≠|\?', '', char).strip() for char in all_haki]
    oaa = [re.sub(r'\s*\(.*?\)|\xa0|\†|≠|\?', '', char).strip() for char in oaa]
    oo = [re.sub(r'\s*\(.*?\)|\xa0|\†|≠|\?', '', char).strip() for char in oo]
    oa = [re.sub(r'\s*\(.*?\)|\xa0|\†|≠|\?', '', char).strip() for char in oa]

    haki_users['All Types'] = all_haki
    haki_users['Supreme King And Observation'] = sao
    haki_users['Observation And Armament'] = oaa
    haki_users['Only Observation'] = oo
    haki_users['Only Armament'] = oa

    return haki_users


##---------------GET HAKI USER------------------##

## Getting the bounty if exists

In [136]:
#-------------------GET BOUNTY---------------------#
def get_bounty(soup):
    try:
        bounty = soup.find(attrs={"data-source": "bounty"})
        get_bounty_text = bounty.find('div')
        bounty_text = get_bounty_text.get_text().split('[')[0].replace(',', '').replace('(', '').replace(')', '')
        return bounty_text
    except:
        return 'Unknown'
#-------------------GET BOUNTY---------------------#

## Getting the height of Char

In [137]:
#----------------GET HEIGHT---------------#
def get_height(soup):
    try:
        height = soup.find(attrs={"data-source": "height"})
        get_height_text = height.find('div')
        height_text = get_height_text.get_text()

        heights = re.findall(r'\d+\s*cm', height_text)
        if heights:
            latest_height = heights[-1]
            latest_height = int(re.sub(r'\s*cm', '', latest_height))
            return latest_height
        else:
            return 'Unknown'
    except:
        return 'Unknown'
#----------------GET HEIGHT---------------#

## Getting the Image of Char

In [138]:
#---------------GET IMAGE-------------------#
def get_image(soup,test):
    image = soup.find('aside')
    image_a = image.find('img')

    source = image_a['srcset']

    filename = test.translate(str.maketrans('ıİçÇşŞğĞüÜöÖ ', 'iIcCsSgGuUoO_')) + ".jpg"

    photo = requests.get(source)

    if photo.status_code != 200:
       print('Something went wrong')
    else:
       with open(filename, 'wb') as f:
           f.write(photo.content)
           print(f"{filename} başarıyla kaydedildi.")

    return filename

#---------------GET IMAGE-------------------#

## Getting where the character is from

In [139]:
#--------------------GET ORIGIN-----------------#
def get_origin(soup):
    try:
        origin = soup.find(attrs={"data-source": "origin"})
        get_origin_text = origin.find('a')
        return get_origin_text.get_text()
    except:
        return 'Unknown'
#--------------------GET ORIGIN-----------------#

In [140]:

haki_users = get_haki_users()


def get_character_data(url, haki_users):
    character_info = {}
    
    
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    

    name_of_char = get_name(soup)
    character_info['name'] = get_name(soup)
    character_info['affiliation'] = get_affiliation(soup)
    character_info['devil_fruit'] = get_devil_fruit(soup)
    character_info['bounty'] = get_bounty(soup)
    character_info['height'] = get_height(soup)
    character_info['origin'] = get_origin(soup)
    character_info['haki'] = 'None'
    character_info['image'] = get_image(soup,test=name_of_char)

    for haki_type, haki_list in haki_users.items():
        if character_info['name'] in haki_list:
            character_info['haki'] = haki_type
            break

    return character_info

    

In [141]:
haki_users = get_haki_users()


character_urls = {
    "Monkey D. Luffy": "https://onepiece.fandom.com/wiki/Monkey_D._Luffy",
    "Roronoa Zoro": "https://onepiece.fandom.com/wiki/Roronoa_Zoro",
    "Nami": "https://onepiece.fandom.com/wiki/Nami",
    "Usopp": "https://onepiece.fandom.com/wiki/Usopp",
    "Sanji": "https://onepiece.fandom.com/wiki/Sanji",
    "Tony Tony Chopper": "https://onepiece.fandom.com/wiki/Tony_Tony_Chopper",
    "Nico Robin": "https://onepiece.fandom.com/wiki/Nico_Robin",
    "Franky": "https://onepiece.fandom.com/wiki/Franky",
    "Brook": "https://onepiece.fandom.com/wiki/Brook",
    "Jinbe": "https://onepiece.fandom.com/wiki/Jinbe",
    "Portgas D. Ace": "https://onepiece.fandom.com/wiki/Portgas_D._Ace",
    "Shanks": "https://onepiece.fandom.com/wiki/Shanks",
    "Edward Newgate (Whitebeard)": "https://onepiece.fandom.com/wiki/Edward_Newgate",
    "Gol D. Roger": "https://onepiece.fandom.com/wiki/Gol_D._Roger",
    "Silvers Rayleigh": "https://onepiece.fandom.com/wiki/Silvers_Rayleigh",
    "Buggy": "https://onepiece.fandom.com/wiki/Buggy",
    "Dracule Mihawk": "https://onepiece.fandom.com/wiki/Dracule_Mihawk",
    "Boa Hancock": "https://onepiece.fandom.com/wiki/Boa_Hancock",
    "Donquixote Doflamingo": "https://onepiece.fandom.com/wiki/Donquixote_Doflamingo",
    "Crocodile": "https://onepiece.fandom.com/wiki/Crocodile",
    "Enel": "https://onepiece.fandom.com/wiki/Enel",
    "Trafalgar D. Water Law": "https://onepiece.fandom.com/wiki/Trafalgar_D._Water_Law",
    "Eustass Kid": "https://onepiece.fandom.com/wiki/Eustass_Kid",
    "Kaido": "https://onepiece.fandom.com/wiki/Kaido",
    "Charlotte Linlin (Big Mom)": "https://onepiece.fandom.com/wiki/Charlotte_Linlin",
    "Marshall D. Teach (Blackbeard)": "https://onepiece.fandom.com/wiki/Marshall_D._Teach",
    "Sabo": "https://onepiece.fandom.com/wiki/Sabo",
    "Kozuki Oden": "https://onepiece.fandom.com/wiki/Kozuki_Oden",
    "Killer": "https://onepiece.fandom.com/wiki/Killer",
    "Kin'emon": "https://onepiece.fandom.com/wiki/Kin%27emon",
    "Smoker": "https://onepiece.fandom.com/wiki/Smoker",
    "Sengoku": "https://onepiece.fandom.com/wiki/Sengoku",
    "Monkey D. Garp": "https://onepiece.fandom.com/wiki/Monkey_D._Garp",
    "Sakazuki (Akainu)": "https://onepiece.fandom.com/wiki/Sakazuki",
    "Borsalino (Kizaru)": "https://onepiece.fandom.com/wiki/Borsalino",
    "Issho (Fujitora)": "https://onepiece.fandom.com/wiki/Issho",
    "Kuzan (Aokiji)": "https://onepiece.fandom.com/wiki/Kuzan",
    "Donquixote Rosinante": "https://onepiece.fandom.com/wiki/Donquixote_Rosinante",
    "Bartolomeo": "https://onepiece.fandom.com/wiki/Bartolomeo",
    "Cavendish": "https://onepiece.fandom.com/wiki/Cavendish",
    "Bellamy": "https://onepiece.fandom.com/wiki/Bellamy",
    "Pedro": "https://onepiece.fandom.com/wiki/Pedro",
    "Carrot": "https://onepiece.fandom.com/wiki/Carrot",
    "Shirahoshi": "https://onepiece.fandom.com/wiki/Shirahoshi",
    "Vinsmoke Reiju": "https://onepiece.fandom.com/wiki/Vinsmoke_Reiju",
    "Vinsmoke Judge": "https://onepiece.fandom.com/wiki/Vinsmoke_Judge",
    "Hody Jones": "https://onepiece.fandom.com/wiki/Hody_Jones",
    "Caesar Clown": "https://onepiece.fandom.com/wiki/Caesar_Clown",
    "Capone Bege": "https://onepiece.fandom.com/wiki/Capone_Bege",
    "Katakuri": "https://onepiece.fandom.com/wiki/Charlotte_Katakuri",
    "Pekoms": "https://onepiece.fandom.com/wiki/Pekoms",
    "Vivi": "https://onepiece.fandom.com/wiki/Nefertari_Vivi",
    "Marco": "https://onepiece.fandom.com/wiki/Marco",
    "Jozu": "https://onepiece.fandom.com/wiki/Jozu",
    "Vista": "https://onepiece.fandom.com/wiki/Vista",
    "King": "https://onepiece.fandom.com/wiki/King",
    "Queen": "https://onepiece.fandom.com/wiki/Queen",
    "Jack": "https://onepiece.fandom.com/wiki/Jack",
    "X Drake": "https://onepiece.fandom.com/wiki/X_Drake",
    "Scratchmen Apoo": "https://onepiece.fandom.com/wiki/Scratchmen_Apoo",
    "Basil Hawkins": "https://onepiece.fandom.com/wiki/Basil_Hawkins",
    "Ulti": "https://onepiece.fandom.com/wiki/Ulti",
    "Page One": "https://onepiece.fandom.com/wiki/Page_One",
    "Black Maria": "https://onepiece.fandom.com/wiki/Black_Maria",
    "Who's-Who": "https://onepiece.fandom.com/wiki/Who's-Who",
    "Sasaki": "https://onepiece.fandom.com/wiki/Sasaki",
    "Kiku": "https://onepiece.fandom.com/wiki/Kikunojo",
    "Denjiro": "https://onepiece.fandom.com/wiki/Denjiro",
    "Kawamatsu": "https://onepiece.fandom.com/wiki/Kawamatsu",
    "Raizo": "https://onepiece.fandom.com/wiki/Raizo",
    "Orochi": "https://onepiece.fandom.com/wiki/Kurozumi_Orochi",
    "Yamato": "https://onepiece.fandom.com/wiki/Yamato",
    "Tama": "https://onepiece.fandom.com/wiki/Tama",
    "Kin'emon": "https://onepiece.fandom.com/wiki/Kin%27emon",
    "Perona": "https://onepiece.fandom.com/wiki/Perona",
    "Shiki": "https://onepiece.fandom.com/wiki/Shiki",
    "Zephyr": "https://onepiece.fandom.com/wiki/Zephyr"
}


character_data = {}


for character, url in character_urls.items():
    print(f"fetching data for {character} ")
    character_data[character] = get_character_data(url, haki_users)
    
    
    time.sleep(2)


print(character_data)

fetching data for Monkey D. Luffy 
Monkey_D._Luffy.jpg başarıyla kaydedildi.
fetching data for Roronoa Zoro 
Roronoa_Zoro.jpg başarıyla kaydedildi.
fetching data for Nami 
Nami.jpg başarıyla kaydedildi.
fetching data for Usopp 
Usopp.jpg başarıyla kaydedildi.
fetching data for Sanji 
Sanji.jpg başarıyla kaydedildi.
fetching data for Tony Tony Chopper 
Tony_Tony_Chopper.jpg başarıyla kaydedildi.
fetching data for Nico Robin 
Nico_Robin.jpg başarıyla kaydedildi.
fetching data for Franky 
Franky.jpg başarıyla kaydedildi.
fetching data for Brook 
Brook.jpg başarıyla kaydedildi.
fetching data for Jinbe 
Jinbe.jpg başarıyla kaydedildi.
fetching data for Portgas D. Ace 
Portgas_D._Ace.jpg başarıyla kaydedildi.
fetching data for Shanks 
Shanks.jpg başarıyla kaydedildi.
fetching data for Edward Newgate (Whitebeard) 
Edward_Newgate.jpg başarıyla kaydedildi.
fetching data for Gol D. Roger 
Gol_D._Roger.jpg başarıyla kaydedildi.
fetching data for Silvers Rayleigh 
Silvers_Rayleigh.jpg başarıyla ka

In [142]:
import pandas as pd

df = pd.DataFrame.from_dict(character_data, orient='index')
df.reset_index(inplace=True)
df.rename(columns={'index': 'character_name'}, inplace=True)
df = df.drop(['character_name'],axis = 1)

In [144]:
df_copy = df

In [147]:
df_copy

Unnamed: 0,name,affiliation,devil_fruit,bounty,height,origin,haki,image
0,Monkey D. Luffy,Straw Hat Pirates,Paramecia,3000000000,174,East Blue,All Types,Monkey_D._Luffy.jpg
1,Roronoa Zoro,Straw Hat Pirates,,1111000000,181,East Blue,All Types,Roronoa_Zoro.jpg
2,Nami,Straw Hat Pirates,,366000000,170,East Blue,,Nami.jpg
3,Usopp,Straw Hat Pirates,,500000000,176,East Blue,Only Observation,Usopp.jpg
4,Sanji,Straw Hat Pirates,,1032000000,180,North Blue,Observation And Armament,Sanji.jpg
...,...,...,...,...,...,...,...,...
71,Yamato,Ninja-Pirate-Mink-Samurai Alliance,Mythical Zoan,Unknown,263,Unknown,All Types,Yamato.jpg
72,Kurozumi Tama,Kurozumi Family,Paramecia,Unknown,108,Unknown,,Kurozumi_Tama.jpg
73,Perona,Dracule Mihawk,Paramecia,Unknown,160,West Blue,,Perona.jpg
74,Shiki,Golden Lion Pirates,Paramecia,Unknown,Unknown,Unknown,,Shiki.jpg


In [148]:
df_copy['devil_fruit'].unique()

array(['Paramecia', 'None', 'Zoan', 'Logia', 'Mythical Zoan',
       'Special Paramecia', 'Ancient Zoan'], dtype=object)

In [149]:
df_copy['origin'].unique()

array(['East Blue', 'North Blue', 'Grand Line', 'West Blue', 'South Blue',
       'Unknown', 'Calm Belt', 'Red Line', 'Sky Islands',
       'Kutsukku Island', 'Wano Country'], dtype=object)

In [151]:
for i in range(0,len(df_copy)-1):

    if df_copy['origin'][i] == 'Wano Country' or df_copy['origin'][i] == 'Sky Islands':
        df_copy['origin'][i] = 'Grand Line' 
    if df_copy['origin'][i] == 'Kutsukku Island':
        df_copy['origin'][i] = 'South Blue'

    

In [152]:
df_copy['origin'].unique()

array(['East Blue', 'North Blue', 'Grand Line', 'West Blue', 'South Blue',
       'Unknown', 'Calm Belt', 'Red Line'], dtype=object)

In [153]:
df_copy['devil_fruit'].unique()

array(['Paramecia', 'None', 'Zoan', 'Logia', 'Mythical Zoan',
       'Special Paramecia', 'Ancient Zoan'], dtype=object)

In [155]:
for i in range(0,len(df_copy)-1):

    if df['devil_fruit'][i] == 'Mythical Zoan' or df['devil_fruit'][i] == 'Ancient Zoan':
        df['devil_fruit'][i] = 'Zoan'
    if df['devil_fruit'][i] == 'Special Paramecia':
        df['devil_fruit'][i] = 'Paramecia'

In [157]:
df_copy['devil_fruit'].unique()

array(['Paramecia', 'None', 'Zoan', 'Logia'], dtype=object)

In [159]:
df_copy.to_csv('character_data.csv', index=True)