In [1]:
# ──────────────────────────────────────────────────────────────────────────────
# CELL: Imports & session setup
# ──────────────────────────────────────────────────────────────────────────────
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin
import re # Import regular expressions

BASE_URL = "https://highspell.fandom.com"

# ──────────────────────────────────────────────────────────────────────────────
# CELL: 1) Scrape the list of attackable NPC links
# ──────────────────────────────────────────────────────────────────────────────
list_page_url = f"{BASE_URL}/wiki/NPC"
print(f"Fetching NPC list from: {list_page_url}")
try:
    list_page = requests.get(list_page_url, timeout=15).text
    soup      = BeautifulSoup(list_page, "html.parser")

    attackable_header = soup.find("span", id="List_of_Attackable_NPCs")
    npc_links = []
    if attackable_header:
        attackable_ul = attackable_header.find_parent("h2").find_next_sibling("ul")
        if attackable_ul:
            npc_links = [urljoin(BASE_URL, a["href"]) for a in attackable_ul.find_all("a", href=True) if a.get("href")] # Ensure href exists
            print(f"Found {len(npc_links)} attackable NPC links.")
        else:
            print("Warning: Could not find the <ul> element after the 'List of Attackable NPCs' header.")
    else:
        print("Warning: Could not find the 'List of Attackable NPCs' header span.")

except requests.exceptions.RequestException as e:
    print(f"Error fetching NPC list page: {e}")
    npc_links = [] # Ensure npc_links is empty if fetch fails


# ──────────────────────────────────────────────────────────────────────────────
# CELL: 2) Function to parse NPC page data (Traits, Probabilities, Loot Tables)
# ──────────────────────────────────────────────────────────────────────────────
def parse_npc_traits(npc_url):
    """Fetches and parses an NPC page for traits, probabilities, and loot tables."""
    try:
        # print(f"\nProcessing NPC: {npc_url}") # Reduced verbosity
        resp = requests.get(npc_url, timeout=15)
        resp.raise_for_status()
        s = BeautifulSoup(resp.text, "html.parser")
    except requests.exceptions.RequestException as e:
        print(f"  Error fetching page {npc_url}: {e}")
        return None

    result = {"name": "Unknown", "url": npc_url}

    # --- 1) Name ---
    name_tag = s.select_one("aside.portable-infobox h2.pi-title")
    if name_tag:
        result["name"] = name_tag.get_text(strip=True)
    # print(f"  Name: {result['name']}") # Reduced verbosity

    # --- 2) Traits ---
    traits_section = None
    for section in s.select("aside.portable-infobox section.pi-group"):
        header = section.select_one("h2.pi-header")
        if header and header.get_text(strip=True).lower() == "traits":
             traits_section = section
             break
    if traits_section:
        for div in traits_section.select("div.pi-item.pi-data"):
            label_tag = div.select_one("h3.pi-data-label")
            value_tag = div.select_one("div.pi-data-value")
            if label_tag and value_tag:
                key = label_tag.get_text(strip=True).lower().replace(" ", "_")
                val = value_tag.get_text(strip=True)
                result[key] = val
    # else: # Reduced verbosity
    #     print(f"  Warning: Traits section not found for {result['name']}.")

    # --- 3) Rare & Root drop probabilities ---
    result["rare_prob"] = ""
    result["root_prob"] = ""
    prob_section = None
    for section in s.select("aside.portable-infobox section.pi-group"):
        header = section.select_one("h2.pi-header")
        if header and re.search("loot table probability", header.get_text(strip=True), re.IGNORECASE):
              prob_section = section
              break
    if prob_section:
        for div in prob_section.select("div.pi-item.pi-data"):
            label_tag = div.select_one("h3.pi-data-label")
            value_tag = div.select_one("div.pi-data-value")
            if label_tag and value_tag:
                label_text = label_tag.get_text(strip=True).lower()
                val = value_tag.get_text(strip=True)
                if "rare" in label_text: result["rare_prob"] = val
                elif "root" in label_text: result["root_prob"] = val
    # else: # Reduced verbosity
    #     print(f"  Warning: Loot probability section not found for {result['name']}.")


    # --- Helper to parse table rows ---
    def parse_table_rows(rows, mapping):
        parsed_data = []
        if not mapping: return []
        max_idx = max(mapping.values())

        for i, row in enumerate(rows):
            cells = row.find_all("td")
            if len(cells) >= max_idx + 1: # Check if enough cells exist for the mapping
                entry = {}
                valid_entry = True
                for name, idx in mapping.items():
                    # Safely get text, provide default if index is out of bounds for optional columns
                    if idx < len(cells):
                        cell_text = cells[idx].get_text(strip=True)
                        cell_text = re.sub(r'\{\\displaystyle.*?\}', '', cell_text).strip() # Clean math text
                        entry[name] = cell_text
                    elif name == 'chance': # Handle optional 'chance' column specifically
                        entry[name] = "" # Assign empty string if chance column doesn't exist
                    else:
                        # If a non-optional column is missing, invalidate the row
                        # print(f"      Warning: Required column '{name}' (index {idx}) missing in row {i+1}. Skipping row.")
                        valid_entry = False
                        break
                if valid_entry:
                    parsed_data.append(entry)
        return parsed_data

    # --- 4) Main Loot Table ---
    drop_table_data = []
    main_table_found = False
    # print("  Searching for Main Loot Table...") # Reduced verbosity

    # Strategy 1: H2 with id="Loot_table" + sibling table.article-table
    loot_heading_h2_span = s.find("span", id="Loot_table")
    if loot_heading_h2_span:
         loot_heading_h2 = loot_heading_h2_span.find_parent("h2")
         if loot_heading_h2:
            main_table = loot_heading_h2.find_next("table", class_="article-table")
            if main_table:
                # print("    Found main loot table (article-table) via H2 heading.") # Reduced verbosity
                # This table usually has Item=1, Quantity=2, Rarity=3
                mapping = {"item": 1, "quantity": 2, "rarity": 3}
                # Skip header row (usually first row in tbody)
                rows_to_parse = main_table.select("tbody tr")[1:]
                drop_table_data = parse_table_rows(rows_to_parse, mapping)
                main_table_found = True

    # Strategy 2: table.fandom-table with caption "Loot Table" (if Strategy 1 failed)
    if not main_table_found:
        fandom_tables = s.select("table.fandom-table")
        for tbl in fandom_tables:
            caption = tbl.find("caption")
            # Check caption contains "Loot Table" but NOT "Rare"
            if caption and re.search("loot table", caption.get_text(strip=True), re.IGNORECASE) and not re.search("rare", caption.get_text(strip=True), re.IGNORECASE):
                # print("    Found main loot table (fandom-table) via Caption.") # Reduced verbosity
                # Determine mapping based on header (4 or 5 columns)
                header_cells = tbl.select("thead th, tbody tr:first-child th")
                if len(header_cells) >= 5 and re.search('chance', header_cells[4].get_text(), re.IGNORECASE):
                     mapping = {"item": 1, "quantity": 2, "rarity": 3, "chance": 4}
                     # print("      Using 5-column mapping (incl. chance).") # Reduced verbosity
                elif len(header_cells) >= 4: # Barbarian case
                     mapping = {"item": 1, "quantity": 2, "rarity": 3}
                     # print("      Using 4-column mapping.") # Reduced verbosity
                else:
                    # print("      Warning: Could not determine mapping from headers. Skipping table.") # Reduced verbosity
                    continue # Skip this table if headers are unclear

                rows_to_parse = tbl.select("tbody tr")[1:] # Skip header row
                drop_table_data = parse_table_rows(rows_to_parse, mapping)
                main_table_found = True
                break # Found the table

    if not main_table_found:
         print(f"  Info: Main loot table not found for {result['name']} using H2/article-table or Caption/fandom-table patterns.")

    result["drop_table"] = json.dumps(drop_table_data, ensure_ascii=False)

    # --- 5) Rare Loot Table ---
    rare_drop_data = []
    rare_table_found = False
    # print("  Searching for Rare Loot Table...") # Reduced verbosity
    all_tables = s.select("table") # Check all tables
    for tbl in all_tables:
        caption = tbl.find("caption")
        # Look for captions containing "Rare Drop" or "Rare Loot" case-insensitively
        if caption and re.search(r'rare (drop|loot)', caption.get_text(strip=True), re.IGNORECASE):
            # print(f"    Found potential Rare loot table with caption: '{caption.get_text(strip=True)}'") # Reduced verbosity
            # Assume standard 5-column mapping for rare tables
            mapping = {"item": 1, "quantity": 2, "rarity": 3, "chance": 4}
            rows_to_parse = tbl.select("tbody tr")[1:]
            rare_drop_data = parse_table_rows(rows_to_parse, mapping)
            rare_table_found = True
            break

    if not rare_table_found:
         # Only print if a rare probability was found, suggesting a table *should* exist
         if result.get("rare_prob") and result["rare_prob"] not in ["", "No", "N/A", "Unknown", "0", "0%"]:
             print(f"  Info: Rare loot table not found for {result['name']} despite non-zero rare probability ('{result['rare_prob']}'). Checked tables with 'Rare Drop/Loot' in caption.")
         # else: # Reduced verbosity
             # print(f"  Info: Rare loot table not found for {result['name']} (or rare probability is zero/NA).")


    result["rare_drop_table"] = json.dumps(rare_drop_data, ensure_ascii=False)

    return result

# ──────────────────────────────────────────────────────────────────────────────
# CELL: 3) Loop over NPC links, parse data, and build DataFrame
# ──────────────────────────────────────────────────────────────────────────────
records = []
if npc_links:
    for url in npc_links:
        try:
            npc_data = parse_npc_traits(url)
            if npc_data:
                 records.append(npc_data)
        except Exception as e:
            print(f"!! Unexpected Error processing {url}: {e}")

    if records: # Proceed only if some records were successfully parsed
        df = pd.DataFrame.from_records(records)

        # Reorder columns (optional)
        desired_order = [
            'name', 'url', 'level', 'hitpoints', 'max_hit', 'aggressive',
            'accuracy', 'strength', 'defence', 'magic', 'range', 'speed',
            'respawn_timer', 'rare_prob', 'root_prob', 'drop_table', 'rare_drop_table'
        ]
        existing_cols = df.columns.tolist()
        final_cols = [col for col in desired_order if col in existing_cols]
        extra_cols = [col for col in existing_cols if col not in desired_order]
        df = df[final_cols + extra_cols] # Keep desired order + any unexpected new columns

        # ──────────────────────────────────────────────────────────────────────────────
        # CELL: 4) Display DataFrame and save to CSV
        # ──────────────────────────────────────────────────────────────────────────────
        print("\n--- Scraping Complete ---")
        if not df.empty:
             try:
                  from IPython.display import display
                  pd.set_option('display.max_rows', None) # Show all rows
                  pd.set_option('display.max_columns', None) # Show all columns
                  pd.set_option('display.width', 1000) # Adjust display width
                  pd.set_option('display.max_colwidth', None) # Show full column content
                  display(df)
             except ImportError:
                  print(df.to_string())

             output_filename = "attackable_npcs_complete_data_v3.csv"
             try:
                 df.to_csv(output_filename, index=False)
                 print(f"\n→ Saved data for {len(df)} NPCs to {output_filename}")
             except Exception as e:
                 print(f"\nError saving CSV file: {e}")
        else:
             print("\nNo NPC data was successfully parsed into the DataFrame.")
    else:
        print("\nNo NPC records were successfully parsed. DataFrame is empty.")

else:
    print("\nExecution stopped: No NPC links found on the initial page.")

Fetching NPC list from: https://highspell.fandom.com/wiki/NPC
Found 81 attackable NPC links.
  Info: Main loot table not found for Bandit (Archer) using H2/article-table or Caption/fandom-table patterns.
  Info: Rare loot table not found for Barbarian despite non-zero rare probability ('0.1%'). Checked tables with 'Rare Drop/Loot' in caption.
  Info: Rare loot table not found for Beach Gnome despite non-zero rare probability ('0.1'). Checked tables with 'Rare Drop/Loot' in caption.
  Info: Rare loot table not found for Blood Mage (Lvl 27) despite non-zero rare probability ('0.02'). Checked tables with 'Rare Drop/Loot' in caption.
  Info: Rare loot table not found for Blood Mage (Lvl 44) despite non-zero rare probability ('0.025'). Checked tables with 'Rare Drop/Loot' in caption.
  Info: Rare loot table not found for Charred Skeleton (Lvl 36) despite non-zero rare probability ('0.0075'). Checked tables with 'Rare Drop/Loot' in caption.
  Info: Rare loot table not found for Charred Skele

Unnamed: 0,name,url,level,hitpoints,max_hit,aggressive,accuracy,strength,defence,magic,range,speed,respawn_timer,rare_prob,root_prob,drop_table,rare_drop_table
0,Bandit,https://highspell.fandom.com/wiki/Bandit,10,14,2,Yes,6 + 5 bonus,6 + 4 bonus,6,1,1,2,30s,1/200,Never,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Coins"", ""quantity"": ""5"", ""rarity"": ""1/2.5""}, {""item"": ""Coins"", ""quantity"": ""10"", ""rarity"": ""~1/6.66""}, {""item"": ""Bass"", ""quantity"": ""1"", ""rarity"": ""1/8""}, {""item"": ""Tomato"", ""quantity"": ""1"", ""rarity"": ""1/20""}, {""item"": ""Bronze gloves"", ""quantity"": ""1"", ""rarity"": ""1/20""}, {""item"": ""Red cape"", ""quantity"": ""1"", ""rarity"": ""1/25""}, {""item"": ""Orange cape"", ""quantity"": ""1"", ""rarity"": ""1/25""}, {""item"": ""Leather gloves"", ""quantity"": ""1"", ""rarity"": ""~1/28.57""}, {""item"": ""Bronze helm"", ""quantity"": ""1"", ""rarity"": ""1/40""}, {""item"": ""Bronze longsword"", ""quantity"": ""1"", ""rarity"": ""1/40""}, {""item"": ""Iron gloves"", ""quantity"": ""1"", ""rarity"": ""1/50""}, {""item"": ""Iron longsword"", ""quantity"": ""1"", ""rarity"": ""~1/66.66""}, {""item"": ""Black cape"", ""quantity"": ""1"", ""rarity"": ""1/250""}, {""item"": ""Bandit mask"", ""quantity"": ""1"", ""rarity"": ""1/1,000""}]","[{""item"": ""Rough Amethyst"", ""quantity"": ""1"", ""rarity"": ""20%"", ""chance"": ""1/500""}, {""item"": ""Rough Sapphire"", ""quantity"": ""1"", ""rarity"": ""17.5%"", ""chance"": ""1/800""}, {""item"": ""Rough Emerald"", ""quantity"": ""1"", ""rarity"": ""15%"", ""chance"": ""1/1333.4""}, {""item"": ""Rough Topaz"", ""quantity"": ""1"", ""rarity"": ""12.5%"", ""chance"": ""1/2666.6""}, {""item"": ""Rough Citrine"", ""quantity"": ""1"", ""rarity"": ""10%"", ""chance"": ""1/4000""}, {""item"": ""Rough Ruby"", ""quantity"": ""1"", ""rarity"": ""7.5%"", ""chance"": ""1/8000""}, {""item"": ""Rough Diamond"", ""quantity"": ""1"", ""rarity"": ""5%"", ""chance"": ""1/13,333.3""}, {""item"": ""Rough Carbonado"", ""quantity"": ""1"", ""rarity"": ""2.5%"", ""chance"": ""1/20,000""}, {""item"": ""Warp Scroll"", ""quantity"": ""50"", ""rarity"": ""1%"", ""chance"": ""1/20,000""}, {""item"": ""Alchemy Scroll"", ""quantity"": ""50"", ""rarity"": ""1%"", ""chance"": ""1/20,000""}, {""item"": ""Celadium Ore"", ""quantity"": ""1"", ""rarity"": ""1%"", ""chance"": ""1/40,816.4""}]"
1,Bandit (Archer),https://highspell.fandom.com/wiki/Bandit_(Lvl_18),18,14,?,Yes,6,6,6,1,20,2,Unknown,,,[],[]
2,Barbarian,https://highspell.fandom.com/wiki/Barbarian,20,26,???,Yes,15 + 10 bonus,15 + 15 bonus,15 + 10 bonus,1,1,1,Unknown,0.1%,0%,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Coins"", ""quantity"": ""3"", ""rarity"": ""0.3""}, {""item"": ""Coins"", ""quantity"": ""5"", ""rarity"": ""0.15""}, {""item"": ""Coins"", ""quantity"": ""10"", ""rarity"": ""0.1""}, {""item"": ""Potato"", ""quantity"": ""1"", ""rarity"": ""0.05""}, {""item"": ""Iron Ore"", ""quantity"": ""1"", ""rarity"": ""0.05""}, {""item"": ""Raw Beef"", ""quantity"": ""1"", ""rarity"": ""0.05""}, {""item"": ""Raw Chicken"", ""quantity"": ""1"", ""rarity"": ""0.05""}, {""item"": ""Steak"", ""quantity"": ""1"", ""rarity"": ""0.035""}, {""item"": ""Chicken"", ""quantity"": ""1"", ""rarity"": ""0.035""}, {""item"": ""Iron Helm"", ""quantity"": ""1"", ""rarity"": ""0.03""}, {""item"": ""Iron Gloves"", ""quantity"": ""1"", ""rarity"": ""0.03""}, {""item"": ""Iron Battleaxe"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Coal"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Barbarian Helm"", ""quantity"": ""1"", ""rarity"": ""0.02""}, {""item"": ""Raw Salmon"", ""quantity"": ""1"", ""rarity"": ""0.01""}]",[]
3,Beach Gnome,https://highspell.fandom.com/wiki/Beach_gnome,18,25,???,No,20,15 + 10 bonus,15,10 + 5 bonus,1,1,Unknown,0.1,No,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Coins"", ""quantity"": ""8"", ""rarity"": ""0.4""}, {""item"": ""Scroll (IOU)"", ""quantity"": ""2"", ""rarity"": ""0.1""}, {""item"": ""Coins"", ""quantity"": ""15"", ""rarity"": ""0.075""}, {""item"": ""Raw Crab"", ""quantity"": ""1"", ""rarity"": ""0.05""}, {""item"": ""Logs"", ""quantity"": ""1"", ""rarity"": ""0.05""}, {""item"": ""Fire Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Water Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Energy Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Crab"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Beer"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Clownfish"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Coins"", ""quantity"": ""20"", ""rarity"": ""0.025""}, {""item"": ""Gnome's Hat"", ""quantity"": ""1"", ""rarity"": ""0.02""}, {""item"": ""Amethyst Necklace"", ""quantity"": ""1"", ""rarity"": ""0.02""}, {""item"": ""Coconut"", ""quantity"": ""1"", ""rarity"": ""0.01""}]",[]
4,Big Squirrel,https://highspell.fandom.com/wiki/Big_squirrel,12,15,???,Yes,10,12,12,1,1,1,Unknown,No,No,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Raw Rodent Meat"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Squirrel Pelt"", ""quantity"": ""1"", ""rarity"": ""0.15""}]",[]
5,Black Bear,https://highspell.fandom.com/wiki/Black_bear,32,40,???,Unknown,30,35,25,1,1,1,Unknown,No,No,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Raw Game Meat"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Raw Game Meat"", ""quantity"": ""1"", ""rarity"": ""0.4""}, {""item"": ""Black Bear Paw"", ""quantity"": ""1"", ""rarity"": ""0.025""}]",[]
6,Blood Mage (Lvl 27),https://highspell.fandom.com/wiki/Blood_mage_(Lvl_27),27,32,???,Yes,24,28,20 + 10 bonus,30 + 15 bonus,1,1,Unknown,0.02,0.1,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Coins"", ""quantity"": ""10"", ""rarity"": ""0.25""}, {""item"": ""Coins"", ""quantity"": ""15"", ""rarity"": ""0.1""}, {""item"": ""Fire Scroll"", ""quantity"": ""1"", ""rarity"": ""0.075""}, {""item"": ""Water Scroll"", ""quantity"": ""1"", ""rarity"": ""0.075""}, {""item"": ""Nature Scroll"", ""quantity"": ""1"", ""rarity"": ""0.075""}, {""item"": ""Fire Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Water Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Nature Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Energy Scroll"", ""quantity"": ""2"", ""rarity"": ""0.05""}, {""item"": ""Fury Scroll"", ""quantity"": ""2"", ""rarity"": ""0.04""}, {""item"": ""Rage Scroll"", ""quantity"": ""2"", ""rarity"": ""0.025""}, {""item"": ""Warp Scroll"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Alchemy Scroll"", ""quantity"": ""2"", ""rarity"": ""0.025""}, {""item"": ""Blood Robe Bottoms"", ""quantity"": ""1"", ""rarity"": ""0.004""}, {""item"": ""Blood Hat"", ""quantity"": ""1"", ""rarity"": ""0.004""}, {""item"": ""Potion of Magic (1)"", ""quantity"": ""1"", ""rarity"": ""0.0025""}]",[]
7,Blood Mage (Lvl 44),https://highspell.fandom.com/wiki/Blood_mage_(Lvl_44),44,42,???,Yes,35,35,30 + 15 bonus,40 + 15 bonus,1,1,Unknown,0.025,0.15,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Coins"", ""quantity"": ""15"", ""rarity"": ""0.2""}, {""item"": ""Coins"", ""quantity"": ""25"", ""rarity"": ""0.1""}, {""item"": ""Fire Scroll"", ""quantity"": ""2"", ""rarity"": ""0.075""}, {""item"": ""Water Scroll"", ""quantity"": ""2"", ""rarity"": ""0.075""}, {""item"": ""Nature Scroll"", ""quantity"": ""2"", ""rarity"": ""0.075""}, {""item"": ""Fire Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Water Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Nature Scroll"", ""quantity"": ""3"", ""rarity"": ""0.05""}, {""item"": ""Energy Scroll"", ""quantity"": ""3"", ""rarity"": ""0.04""}, {""item"": ""Fury Scroll"", ""quantity"": ""2"", ""rarity"": ""0.04""}, {""item"": ""Warp Scroll"", ""quantity"": ""1"", ""rarity"": ""0.025""}, {""item"": ""Rage Scroll"", ""quantity"": ""2"", ""rarity"": ""0.025""}, {""item"": ""Alchemy Scroll"", ""quantity"": ""2"", ""rarity"": ""0.025""}, {""item"": ""Blood Robe Top"", ""quantity"": ""1"", ""rarity"": ""0.004""}, {""item"": ""Blood Robe Bottoms"", ""quantity"": ""1"", ""rarity"": ""0.004""}, {""item"": ""Blood Hood"", ""quantity"": ""1"", ""rarity"": ""0.004""}, {""item"": ""Potion of Magic (1)"", ""quantity"": ""1"", ""rarity"": ""0.0025""}]",[]
8,Brown Bear,https://highspell.fandom.com/wiki/Brown_bear,25,30,???,Unknown,20,30,20,1,1,1,Unknown,No,No,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Raw Game Meat"", ""quantity"": ""1"", ""rarity"": ""Always""}, {""item"": ""Raw Game Meat"", ""quantity"": ""1"", ""rarity"": ""0.25""}, {""item"": ""Brown Bear Paw"", ""quantity"": ""1"", ""rarity"": ""0.025""}]",[]
9,Brute,https://highspell.fandom.com/wiki/Brute,48,50,?,Yes,15,40,25,1,1,1,Unknown,,,"[{""item"": ""Bones"", ""quantity"": ""1"", ""rarity"": ""Always""}]",[]



→ Saved data for 81 NPCs to attackable_npcs_complete_data_v3.csv


In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# CELL: 5) Load data from CSV into DataFrame (skip scraping)
# ──────────────────────────────────────────────────────────────────────────────
import os
import pandas as pd

data_file = "attackable_npcs_complete_data_v3.csv"
if os.path.isfile(data_file):
    df = pd.read_csv(data_file)
    print(f"✅ Loaded {len(df)} NPC records from {data_file}")
    # (Optional) Re-apply display settings if you want to inspect the full DataFrame:
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    pd.set_option('display.max_colwidth', None)
    display(df)
else:
    raise FileNotFoundError(f"{data_file} not found. Please run the scraper (Cell 1–4) first.")


In [31]:
import pandas as pd
import json

# Define target keywords
keywords = ["celadium"] #, "celadium"]

# Function to filter items from a JSON string list
def filter_items(json_str):
    try:
        items = json.loads(json_str)
        filtered = [entry for entry in items if any(kw in entry["item"].lower() for kw in keywords)]
        return json.dumps(filtered) if filtered else None
    except (TypeError, json.JSONDecodeError):
        return None

# Apply the function to both columns
df['filtered_drop'] = df['drop_table'].apply(filter_items)
df['filtered_rare_drop'] = df['rare_drop_table'].apply(filter_items)

# Keep only rows where at least one column has a match
df_filtered = df[(df['filtered_drop'].notnull()) | (df['filtered_rare_drop'].notnull())].copy()

# Update the original columns to contain only the filtered items
df_filtered['drop_table'] = df_filtered['filtered_drop']
df_filtered['rare_drop_table'] = df_filtered['filtered_rare_drop']

# Drop helper columns
df_filtered.drop(columns=['filtered_drop', 'filtered_rare_drop'], inplace=True)


# RAW FILTERED
#display(df_filtered)


# ──────────────────────────────────────────────────────────────────────────────
# CELL: 6) Present filtered NPCs sorted by level with readable drops
# ──────────────────────────────────────────────────────────────────────────────
import json
import pandas as pd
from IPython.display import display, HTML

# Helper to turn JSON list into newline-separated "Item: probability"
def format_drops(json_str):
    if not json_str:
        return ""
    entries = json.loads(json_str)
    return "\n".join(f"{e['item']}: {e['rarity']}" for e in entries)

# Select just the desired columns and create display DataFrame
cols = ['name', 'level', 'aggressive', 'accuracy', 'strength', 'defence', 'rare_prob', 'root_prob']
df_display = df_filtered[cols].copy()

# ─── New: sort by level ascending ──────────────────────────────────────────────
df_display['level'] = df_display['level'].astype(int)
df_display.sort_values('level', inplace=True)

# Apply formatted drop columns
df_display['Drops']      = df_filtered['drop_table'].apply(format_drops)
df_display['Rare Drops'] = df_filtered['rare_drop_table'].apply(format_drops)

# Ensure pandas shows full cell content
pd.set_option('display.max_colwidth', None)

# Build HTML formatter to replace "\n" with "<br>"
formatters = {
    'Drops':      lambda v: v.replace('\n', '<br>') if isinstance(v, str) else '',
    'Rare Drops': lambda v: v.replace('\n', '<br>') if isinstance(v, str) else ''
}

# Render sorted table with real line breaks
html = df_display.to_html(escape=False, formatters=formatters, index=False)
display(HTML(html))



name,level,aggressive,accuracy,strength,defence,rare_prob,root_prob,Drops,Rare Drops
Bandit,10,Yes,6 + 5 bonus,6 + 4 bonus,6,1/200,Never,,Celadium Ore: 1%
Pirate,29,Yes,30 + 5 bonus,25 + 4 bonus,30,0.02,0.075,,Celadium Ore: 1%
Wizard (Lvl 40),40,No,30 + 3 bonus,30 + 3 bonus,25 + 3 bonus,0.04,0.05,,Celadium Ore: 1%
Forest Warrior,49,Yes,40 + 36 bonus,40 + 36 bonus,40 + 36 bonus,0.02,0.05,,Celadium Ore: 1%
Dragon Hatchling (Fire),52,Yes,40,45,40 + 10 bonus,0.05,0.125,,Celadium Ore: 1%
Forest giant,58,Yes,45 + 42 bonus,45 + 42 bonus,45 + 30 bonus,1/50,1/4,,Celadium Ore: 1%
Frost Warrior,59,Yes,50 + 20 bonus,50 + 25 bonus,50 + 20 bonus,0.025,0.075,,Celadium Ore: 1%
Frost Giant,73,Yes,60 + 25 bonus,60 + 35 bonus,55 + 20 bonus,0.04,0.075,,Celadium Ore: 1%
Hell Warrior,77,Yes,50 + 20 bonus,50 + 30 bonus,50 + 30 bonus,0.025,Never,,Celadium Ore: 1%
Plains Dragon,84,Yes,70,70,50 + 30 bonus,0.02,0.08,,Celadium Ore: 1%


name,level,aggressive,accuracy,strength,defence,Drops,Rare Drops
Bandit,10,Yes,6 + 5 bonus,6 + 4 bonus,6,,Celadium Ore: 1%
Pirate,29,Yes,30 + 5 bonus,25 + 4 bonus,30,,Celadium Ore: 1%
Wizard (Lvl 40),40,No,30 + 3 bonus,30 + 3 bonus,25 + 3 bonus,,Celadium Ore: 1%
Forest Warrior,49,Yes,40 + 36 bonus,40 + 36 bonus,40 + 36 bonus,,Celadium Ore: 1%
Dragon Hatchling (Fire),52,Yes,40,45,40 + 10 bonus,,Celadium Ore: 1%
Forest giant,58,Yes,45 + 42 bonus,45 + 42 bonus,45 + 30 bonus,,Celadium Ore: 1%
Frost Warrior,59,Yes,50 + 20 bonus,50 + 25 bonus,50 + 20 bonus,,Celadium Ore: 1%
Frost Giant,73,Yes,60 + 25 bonus,60 + 35 bonus,55 + 20 bonus,,Celadium Ore: 1%
Hell Warrior,77,Yes,50 + 20 bonus,50 + 30 bonus,50 + 30 bonus,,Celadium Ore: 1%
Plains Dragon,84,Yes,70,70,50 + 30 bonus,,Celadium Ore: 1%
