# Finding the Pigstep Disc

I want that music disc. And I'm not above a little big of shenangians to find it.

## Imports, Setup and Macros

In [20]:
import json
from os import environ
from pathlib import Path
from typing import Any, Collection, Dict, Set

import mutf8
import pandas as pd
from IPython.display import Markdown, display
from nbt import nbt, region

In [2]:
def format_file_size(path: Path) -> str:
    """Print the size of the specified file in
    human-readible form (KB / MB / GB)

    Parameters
    ----------
    path : Path
        The path to the file

    Returns
    -------
    str
        A prettily formatted file size

    Notes
    -----
    I would be shocked if there isn't a utility already built
    into the standard library to do this, but all I could find
    via Googling was a bunch of recipes and examples
    """
    size = path.stat().st_size  # in bytes
    for unit in ("B", "KB", "MB", "GB"):
        if size < 1024 / 2:
            return f"{size:.1f} {unit}"
        size = size / 1024
    return f"{size} TB"

In [3]:
def summarize_keystore(keystore: Dict[str, Any]) -> None:
    """Display a summary of the contents of a key-value store

    Parameters
    ----------
    keystore : dict
        The keystore to summarize

    Returns
    -------
    None
    """

    def _summarize_keystore(keystore: Dict[str, Any]) -> str:
        summary = ""
        for k, v in keystore.items():
            summary += f"\n - `{k}` : "
            if isinstance(v, (str, nbt.TAG_String)):
                summary += f'`"{v}"`'
            elif not isinstance(v, Collection):
                summary += f"`{str(v)}`"
            else:
                length = len(v)
                if 0 < length < 3:
                    summary += "\n"
                    if not isinstance(v, Dict):
                        v = {i: item for i, item in enumerate(v)}
                    summary += "\n".join(
                        (f"\t{line}" for line in _summarize_keystore(v).split("\n"))
                    )
                else:
                    summary += f"({len(v)} items)"
        return summary

    display(Markdown(_summarize_keystore(keystore)))

In [4]:
save_folder = Path(environ["SAVE_PATH"])

# make sure this is set correctly
for path in sorted(save_folder.glob("*")):
    print(f"- {path.name} ({'folder' if path.is_dir() else format_file_size(path)})")

- DIM-1 (folder)
- DIM1 (folder)
- advancements (folder)
- data (folder)
- datapacks (folder)
- entities (folder)
- icon.png (8.6 KB)
- level.dat (12.3 KB)
- level.dat_old (12.3 KB)
- level11326231134829879582.dat (0.0 B)
- level14927678268100607923.dat (0.0 B)
- level1786655981796876926.dat (0.0 B)
- level4463453738642305340.dat (0.0 B)
- level6425531070021529407.dat (0.0 B)
- level7443636089696258371.dat (0.0 B)
- level8832581565660323154.dat (0.0 B)
- playerdata (folder)
- poi (folder)
- region (folder)
- serverconfig (folder)
- session.lock (3.0 B)
- stats (folder)


In [6]:
idiot = tuple(v for v in level["Data"]["Player"]["UUID"])

## What's in a Chest?

Well I guess the first thing to do is to actually find a chest

In [7]:
all_entitiy_files = sorted(
    (save_folder / "entities").glob("*"), key=lambda path: -path.stat().st_size
)
for path in all_entitiy_files[:10]:
    print(f"- {path.name} ({'folder' if path.is_dir() else format_file_size(path)})")
print(f"... {len(all_entitiy_files) - 10} more")

- r.-5.-1.mca (3.9 MB)
- r.-6.-1.mca (1.7 MB)
- r.-2.-9.mca (0.9 MB)
- r.-2.-11.mca (0.8 MB)
- r.6.0.mca (0.8 MB)
- r.-1.-12.mca (0.8 MB)
- r.-3.-11.mca (0.8 MB)
- r.3.-8.mca (0.8 MB)
- r.-2.-12.mca (0.7 MB)
- r.3.-2.mca (0.7 MB)
... 250 more


Let's look at all the unique entity IDs in the largest chunk

In [21]:
largest_region = region.RegionFile(all_entitiy_files[0])

entity_ids = set()  # type: Set[str]

for chunk in largest_region.iter_chunks():
    for entity in chunk["Entities"]:
        entity_ids.add(entity["id"].value)
for entity_id in entity_ids:
    print(f" - {entity_id}")

 - minecraft:wolf
 - minecraft:pig
 - minecraft:chicken
 - minecraft:zombie_villager
 - minecraft:creeper
 - minecraft:turtle
 - minecraft:enderman
 - minecraft:sheep
 - minecraft:rabbit
 - minecraft:chest_minecart
 - minecraft:iron_golem
 - minecraft:salmon
 - minecraft:fox
 - minecraft:llama
 - minecraft:villager
 - minecraft:cow
 - minecraft:bat
 - minecraft:squid
 - minecraft:falling_block
 - minecraft:zombie
 - minecraft:horse
 - minecraft:skeleton
 - minecraft:spider
 - minecraft:item
 - minecraft:glow_squid


Interesting that I *don't* see "chest" (but I do see "chest_minecart"). Let's dig deeper into the "items":

In [24]:
for chunk in largest_region.iter_chunks():
    for entity in chunk["Entities"]:
        if entity["id"].value == "minecraft:item":
            summarize_keystore(entity)
            break
    else:
        continue
    break


 - `Motion` : (3 items)
 - `Health` : `5`
 - `Invulnerable` : `0`
 - `Air` : `300`
 - `OnGround` : `0`
 - `PortalCooldown` : `0`
 - `Rotation` : 
	
	 - `0` : `119.47333526611328`
	 - `1` : `0.0`
 - `FallDistance` : `0.0`
 - `Item` : 
	
	 - `0` : `"id"`
	 - `1` : `"Count"`
 - `Pos` : (3 items)
 - `PickupDelay` : `0`
 - `Fire` : `0`
 - `id` : `"minecraft:item"`
 - `UUID` : (4 items)
 - `Age` : `5557`

In [26]:
entity["Item"]["id"]

minecraft:glow_ink_sac

Oh, yep. Something died.

In [28]:
item_ids = set()  # type: Set[str]

for chunk in largest_region.iter_chunks():
    for entity in chunk["Entities"]:
        if entity["id"].value == "minecraft:item":
            item_ids.add(entity["Item"]["id"].value)

for item_id in item_ids:
    print(f" - {item_id}")

 - minecraft:mutton
 - minecraft:white_wool
 - minecraft:egg
 - minecraft:glow_ink_sac


Yeah, so these are all drop items--I'm beginning to wonder whether chests can even be found in the entity files. But let's go ahead and be thorough:

In [31]:
%%time
entity_ids = set()  # type: Set[str]

for region_file in all_entitiy_files:
    region_data = region.RegionFile(region_file)
    for chunk in region_data.iter_chunks():
        for entity in chunk["Entities"]:
            entity_ids.add(entity["id"].value)
for entity_id in entity_ids:
    print(f" - {entity_id}")

 - minecraft:elder_guardian
 - minecraft:wolf
 - minecraft:trident
 - minecraft:pig
 - minecraft:parrot
 - minecraft:trader_llama
 - minecraft:chicken
 - minecraft:zombie_villager
 - minecraft:creeper
 - minecraft:glow_item_frame
 - minecraft:mule
 - minecraft:mooshroom
 - minecraft:bee
 - minecraft:turtle
 - minecraft:enderman
 - minecraft:dolphin
 - minecraft:sheep
 - minecraft:rabbit
 - minecraft:chest_minecart
 - minecraft:iron_golem
 - minecraft:polar_bear
 - minecraft:salmon
 - minecraft:goat
 - minecraft:zombified_piglin
 - minecraft:slime
 - minecraft:cod
 - minecraft:fox
 - minecraft:drowned
 - minecraft:llama
 - minecraft:cat
 - minecraft:villager
 - minecraft:donkey
 - minecraft:cow
 - minecraft:item_frame
 - minecraft:bat
 - minecraft:arrow
 - minecraft:squid
 - minecraft:ocelot
 - minecraft:armor_stand
 - minecraft:falling_block
 - minecraft:zombie
 - minecraft:boat
 - minecraft:piglin
 - minecraft:horse
 - minecraft:wandering_trader
 - minecraft:snow_golem
 - minecraft:sk

Okay, so yeah, no chests here. We'll have to check the block data.

## Emphasis on the "block" part

From [the wiki](https://minecraft.fandom.com/wiki/Chunk_format#Block_entity_format):

> A block entity **(not related to entity)** is used by Minecraft to store information about a block that can't be stored in the block's block states. 

(emphasis mine)

In [33]:
all_region_files = sorted(
    (save_folder / "region").glob("*"), key=lambda path: -path.stat().st_size
)
for path in all_region_files[:10]:
    print(f"- {path.name} ({'folder' if path.is_dir() else format_file_size(path)})")
print(f"... {len(all_region_files) - 10} more")

- r.-5.-1.mca (113.2 MB)
- r.-6.-1.mca (45.1 MB)
- r.-5.0.mca (27.4 MB)
- r.-6.-2.mca (13.7 MB)
- r.-5.-2.mca (12.3 MB)
- r.-2.-12.mca (12.1 MB)
- r.-2.-4.mca (12.0 MB)
- r.-2.1.mca (11.8 MB)
- r.-3.1.mca (11.7 MB)
- r.-3.0.mca (11.3 MB)
... 381 more


In [37]:
largest_region = region.RegionFile(all_region_files[0])
first_chunk = next(largest_region.iter_chunks())
summarize_keystore(first_chunk)


 - `Status` : `"full"`
 - `zPos` : `-32`
 - `block_entities` : (0 items)
 - `yPos` : `-4`
 - `LastUpdate` : `12817424`
 - `structures` : 
	
	 - `0` : `"References"`
	 - `1` : `"starts"`
 - `InhabitedTime` : `0`
 - `xPos` : `-160`
 - `Heightmaps` : (4 items)
 - `sections` : (24 items)
 - `isLightOn` : `1`
 - `block_ticks` : (187 items)
 - `PostProcessing` : (24 items)
 - `DataVersion` : `2865`
 - `fluid_ticks` : 
	
	 - `0` : (6 items)

Oh hey, `block_entities`. Right there.

Let's see what an example block entity looks like.

In [39]:
for chunk in largest_region.iter_chunks():
    if len(chunk["block_entities"]) > 0:
        summarize_keystore(chunk["block_entities"][0])
        break


 - `MaxNearbyEntities` : `6`
 - `RequiredPlayerRange` : `16`
 - `SpawnCount` : `4`
 - `SpawnData` : 
	
	 - `0` : `"entity"`
 - `MaxSpawnDelay` : `800`
 - `Delay` : `20`
 - `keepPacked` : `0`
 - `x` : `-2557`
 - `y` : `9`
 - `z` : `-439`
 - `id` : `"minecraft:mob_spawner"`
 - `SpawnRange` : `4`
 - `MinSpawnDelay` : `200`
 - `SpawnPotentials` : (0 items)

Okay, cool. And we have our "id" field.

In [40]:
entity_ids = set()  # type: Set[str]

for chunk in largest_region.iter_chunks():
    for entity in chunk["block_entities"]:
        entity_ids.add(entity["id"].value)

for entity_id in entity_ids:
    print(f" - {entity_id}")

 - minecraft:smoker
 - minecraft:brewing_stand
 - minecraft:ender_chest
 - minecraft:chest
 - minecraft:barrel
 - minecraft:banner
 - minecraft:furnace
 - minecraft:bed
 - minecraft:bell
 - minecraft:sign
 - minecraft:mob_spawner


Yay we have chests!

In [41]:
for chunk in largest_region.iter_chunks():
    for entity in chunk["block_entities"]:
        if entity["id"].value == "minecraft:chest":
            summarize_keystore(entity)
            break
    else:
        continue
    break


 - `LootTable` : `"minecraft:chests/simple_dungeon"`
 - `keepPacked` : `0`
 - `x` : `-2541`
 - `y` : `-33`
 - `z` : `-506`
 - `id` : `"minecraft:chest"`
 - `LootTableSeed` : `442060047808683048`

Ah, I'd wondered about this, whether chests generate their loot on worldgen or when you open them.

Unfortunately, **this answers my question: without being able to translate seed into contents, there is no way to find out in advance which chests contain which swag**.