In [None]:
import requests
from bs4 import BeautifulSoup
import time
from collections import defaultdict
import csv

## Inisialisasi variabel

In [None]:
character_episodes = defaultdict(list)
base_url = "https://onepiece.fandom.com/wiki/Episode_"
max_episodes = 1141
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

## Fungsi untuk dapat char dari setiap episode


In [18]:
def get_characters_from_episode(episode_number):
    url = f"{base_url}{episode_number}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() 
        soup = BeautifulSoup(response.text, 'html.parser')

        span = soup.find("span", id="Characters_in_Order_of_Appearance")
        character_section = span.find_parent("h2")
        
        if not character_section:
            print(f"No 'Characters in Order of Appearance' section found for Episode {episode_number}")
            return []
        character_list = character_section.find_next('ul')
        if not character_list:
            print(f"No character list found for Episode {episode_number}")
            return []

        characters = []
        for li in character_list.find_all('li'):
            character_name = li.find('a')
            if character_name and character_name.get('title'):
                name = character_name.get('title')
            else:
                # Fallback to plain text
                name = li.get_text(strip=True)
            # Clean name (remove annotations like "(flashback)")
            if name:
                cleaned_name = name.split('(')[0].strip()
                characters.append(cleaned_name)

        return characters

    except requests.RequestException as e:
        print(f"Error fetching Episode {episode_number}: {e}")
        return []
    except Exception as e:
        print(f"Error parsing Episode {episode_number}: {e}")
        return []

# Scrap dari eps 1 - max 

In [None]:
# Sebagai contoh, kita scrap dari episode 1 sampai 10, kalo pengen maksimal tinggal hapus aja karena sudah di set di atas untuk max_episodes
max_episodes = 10

for episode in range(1, max_episodes + 1):
    print(f"Scraping Episode {episode}...")
    characters = get_characters_from_episode(episode)
    
    for character in characters:
        if character:  # Skip nama yang kosong
            character_episodes[character].append(episode)

    # Delay to avoid rate-limiting
    time.sleep(1)
    


Scraping Episode 1...
Scraping Episode 2...
Scraping Episode 3...
Scraping Episode 4...
Scraping Episode 5...
Scraping Episode 6...
Scraping Episode 7...
Scraping Episode 8...
Scraping Episode 9...
Scraping Episode 10...


## Urutkan episodenya

In [20]:
# Sort episodes for each character
for character in character_episodes:
    character_episodes[character].sort()


## Save ke CSV

In [21]:
# Save to CSV
csv_file = "onepiece_characters.csv"
with open(csv_file, "w", encoding="utf-8", newline='') as f:
    writer = csv.writer(f)
    # Write header
    writer.writerow(["Character", "Episodes"])
    # Write character data
    for character, episodes in sorted(character_episodes.items()):
        episode_list = ','.join(map(str, episodes))
        writer.writerow([character, episode_list])

print(f"Data saved to {csv_file}")

Data saved to onepiece_characters.csv
