In [None]:
pip install requests beautifulsoup4

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import time
import random
import os
from google.colab import files

In [None]:
site = "Voice-Changing_Bowtie"
url = "https://www.detectiveconanworld.com/wiki/" + site
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")


title = soup.find("h1").text.strip()

infobox = {}
infobox_table = soup.find("table", class_="infobox")
if infobox_table:
    for row in infobox_table.find_all("tr"):
        th = row.find("th")
        td = row.find("td")
        if th and td:
            infobox[th.text.strip()] = td.text.strip()

main_content = soup.find("div", class_="mw-parser-output")
toc = main_content.find("div", id="toc")
if toc:
    toc.decompose()

skip_sections = {
    "Gallery",
    "See also",
    "References",
    "In other languages"
}

sections = {}
current_heading = None

for tag in main_content.find_all(["h2", "h3", "p", "ul"]):
    if tag.name in ["h2", "h3"]:
        current_heading = tag.text.strip()
        if current_heading in skip_sections:
            current_heading = None
        elif current_heading:
            sections[current_heading] = ""
    elif current_heading:
        if tag.name == "p":
            sections[current_heading] += tag.text.strip() + "\n"
        elif tag.name == "ul":
            for li in tag.find_all("li"):
                sections[current_heading] += "- " + li.text.strip() + "\n"

data = {
    "title": title,
    "url": url,
    "infobox": infobox,
    "sections": sections
}

with open(site + ".json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)


In [None]:
data

In [None]:
trivia = data["sections"].get("Trivia")
if trivia:
    print("=== Trivia ===\n")
    print(trivia)
else:
    print("Trivia section not found.")


In [None]:
site = "Drone"
url = "https://www.detectiveconanworld.com/wiki/" + site
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

title = soup.find("h1").text.strip()

infobox = {}
infobox_table = soup.find("table", class_="infobox")
if infobox_table:
    for row in infobox_table.find_all("tr"):
        th = row.find("th")
        td = row.find("td")
        if th and td:
            infobox[th.text.strip()] = td.text.strip()

main_content = soup.find("div", class_="mw-parser-output")
toc = main_content.find("div", id="toc")
if toc:
    toc.decompose()

skip_sections = {"Gallery", "See also", "References", "In other languages"}

sections = {}
current_heading = None

for tag in main_content.find_all(["h2", "h3", "p", "ul", "table", "div"]):
    if tag.name in ["h2", "h3"]:
        current_heading = tag.text.strip()
        if current_heading in skip_sections:
            current_heading = None
        elif current_heading:
            sections[current_heading] = ""
    elif current_heading:
        if tag.name == "p":
            sections[current_heading] += tag.get_text(strip=True) + "\n"
        elif tag.name == "ul":
            for li in tag.find_all("li"):
                sections[current_heading] += "- " + li.get_text(strip=True) + "\n"
        elif tag.name == "table":
            rows = tag.find_all("tr")
            for row in rows:
                cols = [col.get_text(strip=True) for col in row.find_all(["th", "td"])]
                if cols:
                    sections[current_heading] += " | ".join(cols) + "\n"
        elif "mw-collapsible-content" in tag.get("class", []):
            sections[current_heading] += tag.get_text(strip=True) + "\n"

data = {
    "title": title,
    "url": url,
    "infobox": infobox,
    "sections": sections
}

with open(site + ".json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)


In [None]:
BASE_URL = "https://www.detectiveconanworld.com"
EPISODE_LIST_URL = BASE_URL + "/wiki/Anime"
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(EPISODE_LIST_URL, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

episode_links = []

for table in soup.find_all("table", class_="wikitable"):
    for row in table.find_all("tr")[1:]:
        cols = row.find_all("td")
        if len(cols) >= 3:
            link = cols[2].find("a", href=True)
            if link and link["href"].startswith("/wiki/"):
                episode_links.append((
                    link.text.strip(),
                    BASE_URL + link["href"]
                ))


episode_links = list(dict.fromkeys(episode_links))
print(f"Found {len(episode_links)} episode links.")
print(episode_links)

✅ Found 1360 episode links.
[('Roller Coaster Murder Case', 'https://www.detectiveconanworld.com/wiki/Roller_Coaster_Murder_Case'), ("Company President's Daughter Kidnapping Case", 'https://www.detectiveconanworld.com/wiki/Company_President%27s_Daughter_Kidnapping_Case'), ("An Idol's Locked Room Murder Case", 'https://www.detectiveconanworld.com/wiki/An_Idol%27s_Locked_Room_Murder_Case'), ('The Coded Map of the City Case', 'https://www.detectiveconanworld.com/wiki/The_Coded_Map_of_the_City_Case'), ("The Shinkansen's Bomb Case", 'https://www.detectiveconanworld.com/wiki/The_Shinkansen%27s_Bomb_Case'), ('Valentine Murder Case', 'https://www.detectiveconanworld.com/wiki/Valentine_Murder_Case'), ('Once-A-Month Present Threat Case', 'https://www.detectiveconanworld.com/wiki/Once-A-Month_Present_Threat_Case'), ('Art Museum Owner Murder Case', 'https://www.detectiveconanworld.com/wiki/Art_Museum_Owner_Murder_Case'), ('Tenkaichi Night Festival Murder Case', 'https://www.detectiveconanworld.com

In [None]:
HEADERS = {"User-Agent": "Mozilla/5.0"}
skip_sections = {
    "BGM listing",
    "Gallery",
    "In other languages",
    "See also"
}

os.makedirs("episodes_json", exist_ok=True)

for idx, (title, url) in enumerate(episode_links[300:1360]):
    print(f"[{idx+1}/{len(episode_links)}] Fetching {title}")
    resp = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(resp.content, "html.parser")
    content = soup.find("div", class_="mw-parser-output")

    toc = content.find("div", id="toc")
    if toc:
        toc.decompose()

    infobox = {}
    box = soup.find("table", class_="infobox")
    if box:
        for row in box.find_all("tr"):
            header = row.find("th")
            data = row.find("td")
            if header and data:
                key = header.text.strip()
                val = data.text.strip()
                infobox[key] = val

    sections = {}
    current_heading = None

    for tag in content.find_all(["h2", "h3", "p", "ul"]):
        if tag.name in ["h2", "h3"]:
            heading = tag.text.strip().replace("[edit]", "")
            if heading in skip_sections:
                current_heading = None
            else:
                current_heading = heading
                sections[current_heading] = ""
        elif current_heading:
            if tag.name == "p":
                sections[current_heading] += tag.text.strip() + "\n"
            elif tag.name == "ul":
                for li in tag.find_all("li"):
                    sections[current_heading] += "- " + li.text.strip() + "\n"

    episode_data = {
        "title": title,
        "url": url,
        "infobox": infobox,
        "sections": sections
    }

    filename = f"episodes_json/{title.replace(' ', '_').replace('/', '_')}.json"
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(episode_data, f, ensure_ascii=False, indent=2)

    time.sleep(random.uniform(5, 10))

print("All episodes saved individually.")
import shutil
shutil.make_archive("episodes_json", 'zip', "episodes_json")
files.download("episodes_json.zip")

[1/1360] Fetching Parade of Malice and Saint (Part 1)
[2/1360] Fetching Parade of Malice and Saint (Part 2)
[3/1360] Fetching The Victim Who Came Back
[4/1360] Fetching The Trembling Police Headquarters: 12 Million Hostages (2 Hour Special)
[5/1360] Fetching The Unseen Suspect (Part 1)
[6/1360] Fetching The Unseen Suspect (Part 2)
[7/1360] Fetching On the Trail of a Silent Witness (Part 1)
[8/1360] Fetching On the Trail of a Silent Witness (Part 2)
[9/1360] Fetching Contact with the Black Organization (The Negotiation)
[10/1360] Fetching Contact with the Black Organization (The Pursuit)
[11/1360] Fetching Contact with the Black Organization (Desperation)
[12/1360] Fetching Festival Dolls Dyed in the Setting Sun (Part 1)
[13/1360] Fetching Festival Dolls Dyed in the Setting Sun (Part 2)
[14/1360] Fetching The Scenic Lookout with the Broken Fence
[15/1360] Fetching Place Exposed to the Sun
[16/1360] Fetching The Sullied Masked Hero (Part 1)
[17/1360] Fetching The Sullied Masked Hero (Par

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>