In [26]:
import re
import requests
from bs4 import BeautifulSoup
import numpy as np
import csv

In [2]:
# scrape from the ID List website (https://minecraft-ids.grahamedgecombe.com/)

#retrieve the body content from a link
def getLinkSoup(link):
    response = requests.get(
        url=link,
    )
    if response.status_code != 200:
        print(f"> ERROR: Link [{link}] not found...")
        return None
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup


In [18]:
# retrieve from the tables
soup = getLinkSoup("https://minecraft-ids.grahamedgecombe.com/")
ids_raw = soup.find_all("td", {"class": "id"})
names_raw = soup.find_all("span", {"class": "name"})
mc_id_names_raw = soup.find_all("span", {"class": "text-id"})

# clean up
ids = [x.text for x in ids_raw]
names = [x.text for x in names_raw]
mc_id_names = [x.text.replace(")","").replace("(minecraft:","") for x in mc_id_names_raw]

In [20]:
for i,n,m in zip(ids, names, mc_id_names):
    print(f"{i}: {n} ({m})")

0: Air (air)
1: Stone (stone)
1:1: Granite (stone)
1:2: Polished Granite (stone)
1:3: Diorite (stone)
1:4: Polished Diorite (stone)
1:5: Andesite (stone)
1:6: Polished Andesite (stone)
2: Grass (grass)
3: Dirt (dirt)
3:1: Coarse Dirt (dirt)
3:2: Podzol (dirt)
4: Cobblestone (cobblestone)
5: Oak Wood Plank (planks)
5:1: Spruce Wood Plank (planks)
5:2: Birch Wood Plank (planks)
5:3: Jungle Wood Plank (planks)
5:4: Acacia Wood Plank (planks)
5:5: Dark Oak Wood Plank (planks)
6: Oak Sapling (sapling)
6:1: Spruce Sapling (sapling)
6:2: Birch Sapling (sapling)
6:3: Jungle Sapling (sapling)
6:4: Acacia Sapling (sapling)
6:5: Dark Oak Sapling (sapling)
7: Bedrock (bedrock)
8: Flowing Water (flowing_water)
9: Still Water (water)
10: Flowing Lava (flowing_lava)
11: Still Lava (lava)
12: Sand (sand)
12:1: Red Sand (sand)
13: Gravel (gravel)
14: Gold Ore (gold_ore)
15: Iron Ore (iron_ore)
16: Coal Ore (coal_ore)
17: Oak Wood (log)
17:1: Spruce Wood (log)
17:2: Birch Wood (log)
17:3: Jungle Wood (l

In [33]:
# export to a bunch of csv files

# write the distinctly named minecraft blocks (with real and assigned ID)
with open('../block_data_files/block_names.csv', 'w+') as f:
    writer = csv.writer(f)

    writer.writerow(["ID","REAL_ID","BLOCK_NAME"])
    for i in range(len(names)):
        writer.writerow([i,ids[i],names[i]])
        
        
# write the minecraft id tag and their id number
ind_single_ids = [i for i in range(len(ids)) if ":" not in ids[i]]

with open('../block_data_files/block_tags.csv', 'w+') as f:
    writer = csv.writer(f)

    writer.writerow(["ID","REAL_ID","MC_TAG"])
    for i in range(len(ind_single_ids)): 
        ind = ind_single_ids[i]
        writer.writerow([i,ids[ind],mc_id_names[ind]])
        
# make dictionary of set of names associated to ID and sub-ID
id_dict = {}
for i, n in zip(ids, names):
    ip = i if ":" not in i else i.split(":")[0]
    if ip not in id_dict:
        id_dict[ip] = []
    id_dict[ip].append(n)

with open('../block_data_files/block_id_arr.txt', 'w+') as f:
    f.write("SUPER_ID,BLOCKS\n")
    for k,v in id_dict.items():
        f.write(f"{k}:{','.join(v)}\n")

## Using the Official Minecraft Wiki

In [53]:
# retrieve from the tables
soup2 = getLinkSoup("https://minecraft.fandom.com/wiki/Tag")
block_table = soup2.find_all("table", {"class": "wikitable"})[1]

all_tags = []
for tr in block_table.find_all("tr"):
    td = tr.find_all("td")
    if len(td) < 2:
        continue
    tag_cell = td[1].text
    items = tag_cell.split(",")
    valid_tags = [x.strip() for x in items if "</a>" not in x and "#" not in x]
    all_tags += valid_tags
print(all_tags)

['note_block', 'attached_melon_stem', 'attached_pumpkin_stem', 'azalea', 'bamboo', 'barrel', 'bee_nest', 'beehive', 'beetroots', 'big_dripleaf_stem', 'big_dripleaf', 'bookshelf', 'brown_mushroom_block', 'brown_mushroom', 'campfire', 'carrots', 'cartography_table', 'carved_pumpkin', 'cave_vines_plant', 'cave_vines', 'chest', 'chorus_flower', 'chorus_plant', 'cocoa', 'composter', 'crafting_table', 'crimson_fungus', 'daylight_detector', 'dead_bush', 'fern', 'fletching_table', 'glow_lichen', 'grass', 'hanging_roots', 'jack_o_lantern', 'jukebox', 'ladder', 'large_fern', 'lectern', 'lily_pad', 'loom', 'melon_stem', 'melon', 'mushroom_stem', 'nether_wart', 'potatoes', 'pumpkin_stem', 'pumpkin', 'red_mushroom_block', 'red_mushroom', 'scaffolding', 'small_dripleaf', 'smithing_table', 'soul_campfire', 'spore_blossom', 'sugar_cane', 'sweet_berry_bush', 'tall_grass', 'trapped_chest', 'twisting_vines_plant', 'twisting_vines', 'vine', 'warped_fungus', 'weeping_vines_plant', 'weeping_vines', 'wheat',

In [56]:
uniq_all_tags = sorted(np.unique(all_tags))
uniq_all_tags.remove("air")
uniq_all_tags.insert(0,"air") 

with open('../block_data_files/wiki_block_tags.csv', 'w+') as f:
    writer = csv.writer(f)

    writer.writerow(["ID","BLOCK_TAG"])
    for i in range(len(uniq_all_tags)):
        writer.writerow([i,uniq_all_tags[i]])