# Hermits Need to Wax Their Signs

Look—I don't know for sure that that waxing written (vs. unwritten) signs reduces lag, but I still think it's fair to say that it's **good practice** when it comes to preparing an archival world download. And the sheer number of unwaxed signs I've found throughout the world is making me cry.

I could just go around with a bunch of honeycomb... or I could just write a script to fix that for everyone.

In [1]:
import json
import re
from collections import Counter
from collections.abc import Collection
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import partial
from os import environ
from pathlib import Path
from typing import Any

import mutf8
import pandas as pd
from IPython.display import Markdown, display
from nbt import nbt, region

In [2]:
def format_file_size(path: Path) -> str:
    """Print the size of the specified file in
    human-readible form (KB / MB / GB)

    Parameters
    ----------
    path : Path
        The path to the file

    Returns
    -------
    str
        A prettily formatted file size

    Notes
    -----
    I would be shocked if there isn't a utility already built
    into the standard library to do this, but all I could find
    via Googling was a bunch of recipes and examples
    """
    size: float = path.stat().st_size  # in bytes
    for unit in ("B", "KB", "MB", "GB"):
        if size < 1024 / 2:
            return f"{size:.1f} {unit}"
        size /= 1024
    return f"{size} TB"

In [3]:
def summarize_keystore(keystore: dict[str, Any]) -> None:
    """Display a summary of the contents of a key-value store

    Parameters
    ----------
    keystore : dict
        The keystore to summarize

    Returns
    -------
    None
    """

    def _summarize_keystore(keystore: dict[str, Any]) -> str:
        summary = ""
        for k, v in keystore.items():
            summary += f"\n - `{k}` : "
            if isinstance(v, (str, nbt.TAG_String)):
                summary += f'`"{v}"`'
            elif not isinstance(v, Collection):
                summary += f"`{str(v)}`"
            else:
                length = len(v)
                if 0 < length < 3:
                    summary += "\n"
                    if not isinstance(v, dict):
                        v = {i: item for i, item in enumerate(v)}
                    summary += "\n".join(
                        (f"\t{line}" for line in _summarize_keystore(v).split("\n"))
                    )
                else:
                    summary += f"({len(v)} items)"
        return summary

    display(Markdown(_summarize_keystore(keystore)))

In [4]:
save_folder = Path(environ["SAVE_PATH"])

# make sure this is set correctly
for path in sorted(save_folder.glob("*")):
    print(f"- {path.name} ({'folder' if path.is_dir() else format_file_size(path)})")

- .git (folder)
- .gitignore (1.0 B)
- .gsb_manifest (110.0 B)
- DIM-1 (folder)
- DIM1 (folder)
- advancements (folder)
- audio_player_data (folder)
- carpet-fixes.conf (22.0 B)
- carpet.conf (57.0 B)
- data (folder)
- datapacks (folder)
- entities (folder)
- icon.png (9.0 KB)
- level.dat (25.8 KB)
- level.dat_old (25.8 KB)
- playerdata (folder)
- poi (folder)
- region (folder)
- resources.zip (34.0 MB)
- scripts (folder)
- session.lock (3.0 B)
- stats (folder)


In [5]:
all_overworld_regions = sorted(
    (save_folder / "region").glob("*"), key=lambda path: -path.stat().st_size
)
all_nether_regions = sorted(
    (save_folder / "DIM-1" / "region").glob("*"), key=lambda path: -path.stat().st_size
)
all_end_regions = sorted(
    (save_folder / "DIM1" / "region").glob("*"), key=lambda path: -path.stat().st_size
)
all_region_files = all_overworld_regions + all_nether_regions + all_end_regions
for path in all_region_files[:10]:
    print(f"- {path.name} ({'folder' if path.is_dir() else format_file_size(path)})")
print(f"... {len(all_region_files) - 10} more")

- r.-1.-1.mca (13.5 MB)
- r.-2.-1.mca (13.3 MB)
- r.2.-1.mca (13.3 MB)
- r.-2.0.mca (12.8 MB)
- r.-4.-1.mca (12.2 MB)
- r.0.0.mca (12.1 MB)
- r.0.-6.mca (12.0 MB)
- r.-1.0.mca (11.9 MB)
- r.-3.-1.mca (11.7 MB)
- r.-3.0.mca (11.5 MB)
... 309 more


## Find a Sign

We did this [yesterday](There%27s%20Your%20Sign.ipynb)

In [6]:
%%time
for path in all_region_files:
    region_data = region.RegionFile(path)
    for chunk in region_data.iter_chunks():
        for entity in chunk["block_entities"]:
            if entity["id"].value == "minecraft:sign":
                break
        else:
            continue
        break
    else:
        continue
    break
summarize_keystore(entity)


 - `z` : `-485`
 - `x` : `-506`
 - `is_waxed` : `0`
 - `id` : `"minecraft:sign"`
 - `y` : `113`
 - `front_text` : (3 items)
 - `keepPacked` : `0`
 - `components` : (0 items)
 - `back_text` : (3 items)

CPU times: user 19.9 ms, sys: 3.99 ms, total: 23.9 ms
Wall time: 23.8 ms


In [7]:
entity["is_waxed"].value

0

And now let's see if we can find a _waxed_ sign.

In [8]:
%%time
for path in all_region_files:
    region_data = region.RegionFile(path)
    for chunk in region_data.iter_chunks():
        for entity in chunk["block_entities"]:
            if entity["id"].value == "minecraft:sign":
                if entity["is_waxed"].value != 0:
                    break
        else:
            continue
        break
    else:
        continue
    break
summarize_keystore(entity)
entity["is_waxed"].value


 - `z` : `-470`
 - `x` : `-499`
 - `is_waxed` : `1`
 - `id` : `"minecraft:sign"`
 - `y` : `126`
 - `front_text` : (3 items)
 - `keepPacked` : `0`
 - `components` : (0 items)
 - `back_text` : (3 items)

CPU times: user 31.6 ms, sys: 1.09 ms, total: 32.7 ms
Wall time: 32.6 ms


1

Cool, so it's just a matter of setting it from 0 to 1.

## Sizing the Damage

I'm just curious what fraction of signs on HermitCraft were actually waxed

In [9]:
def count_waxed_and_unwaxed_signs(path: Path) -> Counter[int]:
    """Count and return the number of waxed and unwaxed signs in a given region

    Parameters
    ----------
    path : Path
        The path of the region file to scan

    Returns
    -------
    dict of int to int
        The counts of signs by their waxed state (0: unwaxed, 1: waxed)
    """
    waxed_counts: Counter[int] = Counter()
    region_data = region.RegionFile(path)
    for chunk in region_data.iter_chunks():
        for entity in chunk["block_entities"]:
            if entity["id"].value not in ("minecraft:sign", "minecraft:hanging_sign"):
                continue

            waxed_counts[entity["is_waxed"].value] += 1
    return waxed_counts

In [10]:
%%time
combined_waxed_counts: Counter[int] = Counter()
with ProcessPoolExecutor(max_workers=24) as executor:
    futures = []
    for region_file_path in all_region_files:
        futures.append(executor.submit(count_waxed_and_unwaxed_signs, region_file_path))
    for result in as_completed(futures):
        combined_waxed_counts += result.result()
combined_waxed_counts

CPU times: user 61.6 ms, sys: 75.2 ms, total: 137 ms
Wall time: 10.7 s


Counter({0: 12892, 1: 4006})

... not even a **quarter** of the **almost 17000** signs on HermitCraft were waxed. 😭

## Welp, let's fix that

In [11]:
def wax_all_signs(path: Path) -> None:
    """Wax all signs in a given region

    Parameters
    ----------
    path : Path
        The path of the region file to scan

    Returns
    -------
    None
    """
    region_data = region.RegionFile(path)
    for chunk in region_data.iter_chunks():
        for entity in chunk["block_entities"]:
            if entity["id"].value not in ("minecraft:sign", "minecraft:hanging_sign"):
                continue

            entity["is_waxed"].value = 1

        # TODO: technically only chunks that got updated needed to be rewritten
        region_data.write_chunk(chunk.loc.x, chunk.loc.z, chunk)

In [12]:
%%time
with ProcessPoolExecutor(max_workers=24) as executor:
    futures = []
    for region_file_path in all_region_files:
        futures.append(executor.submit(wax_all_signs, region_file_path))  # type: ignore[arg-type]
    for result in as_completed(futures):
        pass

CPU times: user 65.2 ms, sys: 78.8 ms, total: 144 ms
Wall time: 25.1 s


### Verify that it worked

In [13]:
%%time
combined_waxed_counts = Counter()
with ProcessPoolExecutor(max_workers=24) as executor:
    futures = []
    for region_file_path in all_region_files:
        futures.append(executor.submit(count_waxed_and_unwaxed_signs, region_file_path))
    for result in as_completed(futures):
        combined_waxed_counts += result.result()
combined_waxed_counts

CPU times: user 69.8 ms, sys: 71.9 ms, total: 142 ms
Wall time: 10.8 s


Counter({1: 16898})