In [2]:
import json
import pandas as pd

In [3]:
def parse_json(filename, class_type, encoding="utf-8"):
    with open(filename, "r", encoding=encoding) as f:
        return class_type(**json.load(f))

In [4]:
from tools.dump_format import RecipeStacks
from tools.nerd_format import RecipeFile, Recipe
from tools.st_format import STDataFile
from tools.config_format import Config
stacks = parse_json("data/recipes_stacks.json", RecipeStacks)
st_file = parse_json("data/st_ex_data.json", STDataFile)
recipe_file = parse_json("data/recipes_preprocessed.json", RecipeFile, encoding="cp1252")

In [7]:
config = parse_json("config.json", Config)
config

Config(filter=FilterConfig(handler_names=['crafting'], handler_mods=['GT: New Horizons', 'GregTech', 'BartWorks', 'GTNH: Lanthanides', 'Railcraft', 'Good Generator', 'Ender IO'], exclude_handler_names=['gt.recipe.packager', 'gt.recipe.unpackager', 'gregtech.nei.multiblockhandler', 'gt.recipe.mixer', 'gt.recipe.centrifuge', 'gt.recipe.electrolyzer', 'gtpp.recipe.chemicaldehydrator', 'gt.recipe.chemicalreactor'], exclude_recipes=[RecipeFilter(machines=['Shaped Crafting', 'Shapeless Crafting'], inputs=IngredientExactFilter(kind='exactly_match', oredict=['dust.*'], num_matches=[1]), outputs=IngredientAnyFilter(kind='all_match_any', oredict=['dustTiny.*', 'dustSmall.*'])), RecipeFilter(machines=None, inputs=IngredientAnyFilter(kind='any_match_any', oredict=['tool.*', 'craftingTool.*']), outputs=None)]))

In [8]:
st_file.recipes.items()

dict_items([('Recipe_AILimiter_C', Recipe(slug='ai-limiter', name='AI Limiter', className='Recipe_AILimiter_C', alternate=False, time=12, manualTimeMultiplier=1.0, ingredients=[RecipeItem(item='Desc_CopperSheet_C', amount=5.0), RecipeItem(item='Desc_HighSpeedWire_C', amount=20.0)], forBuilding=False, inMachine=True, inHand=True, inWorkshop=False, products=[RecipeItem(item='Desc_CircuitBoardHighSpeed_C', amount=1.0)], producedIn=['Desc_AssemblerMk1_C'], isVariablePower=False, minPower=None, maxPower=None)), ('Recipe_Alternate_AdheredIronPlate_C', Recipe(slug='alternate-adhered-iron-plate', name='Alternate: Adhered Iron Plate', className='Recipe_Alternate_AdheredIronPlate_C', alternate=True, time=16, manualTimeMultiplier=1.0, ingredients=[RecipeItem(item='Desc_IronPlate_C', amount=3.0), RecipeItem(item='Desc_Rubber_C', amount=1.0)], forBuilding=False, inMachine=True, inHand=False, inWorkshop=False, products=[RecipeItem(item='Desc_IronPlateReinforced_C', amount=1.0)], producedIn=['Desc_As

In [9]:
handlers = pd.read_csv("data/handlers.csv")
handlers.describe()

Unnamed: 0,Handler Recipe Name,Handler Class,Overlay Identifier,Mod DisplayName,ItemStack
count,301,301,222,301,301
unique,268,171,201,47,266
top,§9NASA Workbench,gregtech.nei.GT_NEI_DefaultHandler,crafting,GregTech,Unknown
freq,11,131,19,113,14


In [10]:
# Map slug->oredict
oredict = pd.read_csv("data/oredict.csv", index_col="ItemStack")


item_df = pd.DataFrame.from_records([(k, v.name) for k, v in stacks.items.items()], columns=["slug", "name"], index="slug")
# Some items have the damage included in ".damage" notation, which doesn't match with the oredict (and is also inconsistent)
# Let's remove it
halves = item_df["name"].str.rsplit(".", n=1, expand=True)
item_df["name"][halves[1].notna() & halves[1].str.isnumeric()] = halves[0]
# Add 1x for stack notation
item_df["stack"] = "1x" + item_df["name"]

# Wildcard items are ones where damage actually means damage, and not a different item
# Match them with the items with damage values removed
wildcard_oredict = oredict[oredict["Wildcard"] == True]
wildcard_oredict.index = wildcard_oredict.index.str.rsplit("@", n=1, expand=True).get_level_values(0)
item_df = item_df.merge(wildcard_oredict["Ore Name"], left_on="stack", right_index=True, how="left")

# For non-wildcard cases, we must add the damage to the stack using @ notation
item_df["stack"] = item_df["stack"] + "@" + item_df.index.str.split("d", n=1, expand=True).get_level_values(1)
item_df = item_df.merge(oredict["Ore Name"], left_on="stack", right_index=True, how="left", suffixes=("_wild", "_normal"))

combined_orenames = item_df["Ore Name_normal"].fillna(item_df["Ore Name_wild"])
item_df["Ore Name"] = combined_orenames
item_df.drop(columns=["Ore Name_normal", "Ore Name_wild"], inplace=True)

In [11]:
slug_to_oredict = item_df["Ore Name"]
slug_to_oredict

slug
6574d0               dustImpureFlorencite
472d4878            oreRedgraniteOilsands
472d4877           oreRedgraniteSoapstone
482d5                           blockOpal
4350d19101        plateTriplePlutonium241
                         ...             
5805d0        plateDoubleMaragingSteel250
6779d0           dustFormaldehydeCatalyst
6383d0                dustPureZirconolite
5925d0               plateDenseHastelloyW
4351d2299             toolHeadShovelStone
Name: Ore Name, Length: 31290, dtype: object

In [12]:
# Precompute oredict matches for every item
exclude_recipe_filters = config.filter.exclude_recipes
all_filters = set()
for f in exclude_recipe_filters:
    if f.inputs: all_filters.update(f.inputs.oredict)
    if f.outputs: all_filters.update(f.outputs.oredict)
slug_matches = pd.DataFrame(index=item_df.index, columns=list(all_filters), dtype=bool)

for filter_regex in all_filters:
    slug_matches[filter_regex] = slug_to_oredict.str.fullmatch(filter_regex, case=True)

slug_matches.describe()
# slug_matches.loc[["6574d0", "472d4877"]]["dust.*"].sum()
# stacks.items["4350d934"]

Unnamed: 0,dustSmall.*,tool.*,craftingTool.*,dustTiny.*,dust.*
count,21929,21929,21929,21929,21929
unique,2,2,2,2,2
top,False,False,False,False,False
freq,21168,18747,21868,21168,19237


In [13]:
import itertools
missing_slugs = set(slug_matches.index[slug_matches.isna().any(axis=1)]) - set(itertools.chain(*[[i.slug for i in r.inputs + r.outputs] for r in recipe_file.recipes]))
print(len(missing_slugs)) # WHY SO MANY??
print(next(iter(missing_slugs))) # 7002d0
print(stacks.items[next(iter(missing_slugs))]) # Entry still exists, but it's not in the oredict. I did a left join!!
print(all([i in stacks.items for i in missing_slugs])) #WHY!!!??

2979
4352d32042
id=4352 regName='gregtech:gt.metaitem.03' name='gt.metaitem.03.32042' displayName='NOR Memory Chip (Wafer)' nbt={'id': '4352S', 'Count': '1B', 'Damage': '32042S'}
True


In [14]:
# RECIPE FILTERING
from collections import Counter
import itertools

import re
from tools.config_format import IngredientListFilter, RecipeFilter
from tools.nerd_format import Stack


filter_machines = set(
    handlers[
        (
            (
                handlers["Overlay Identifier"].apply(
                    lambda x: x in config.filter.handler_names
                )
            )
            | (
                handlers["Mod DisplayName"].apply(
                    lambda x: x in config.filter.handler_mods
                )
            )
        )
        & ~(
            handlers["Overlay Identifier"].apply(
                lambda x: x in config.filter.exclude_handler_names
            )
        )
    ]["Handler Recipe Name"]
)

missing_slugs_counter = 0

def matches_ingredient_list_filter(ingredients: list[Stack], filter: IngredientListFilter):
    slugs = [i.slug for i in ingredients]
    try:
        matches = slug_matches.loc[slugs]
        if filter.kind == "all_match_any":
            return matches[filter.oredict].any(axis=1).all()
        elif filter.kind == "any_match_any":
            return matches[filter.oredict].any().any()
        elif filter.kind == "exactly_match":
            return (matches[filter.oredict].sum(axis=0) == filter.num_matches).all()
        else: 
            raise ValueError("Invalid filter kind")
    except KeyError:
        global missing_slugs_counter
        missing_slugs_counter += 1
        return False

def matches_recipe_filter(recipe: Recipe, filter: RecipeFilter):
    if filter.machines and recipe.machine not in filter.machines: return False
    if filter.inputs and not matches_ingredient_list_filter(recipe.inputs, filter.inputs): return False
    if filter.outputs and not matches_ingredient_list_filter(recipe.outputs, filter.outputs): return False
    return True


def passes_filter(recipe: Recipe):
    return (recipe.machine in filter_machines) and not any(matches_recipe_filter(recipe, filter) for filter in exclude_recipe_filters)

In [15]:
original_len = len(recipe_file.recipes)

# filtered_len = len(list())
filtered = list(filter(passes_filter, recipe_file.recipes))

print("Original length:", original_len, "Filtered length:", len(filtered))
c = Counter(map(lambda x: x.machine, filtered))
print("Missing slugs:", missing_slugs_counter) # Few enough, I guess
print(c)

Original length: 159402 Filtered length: 85916
Missing slugs: 30817
Counter({'Arc Furnace Recycling': 9436, 'Macerator Recycling': 9357, 'Fluid Extractor Recycling': 8302, 'Bending Machine': 5720, 'Cutting Machine': 5073, 'Assembler': 4947, 'Shaped Crafting': 4527, 'Macerator': 3997, 'Fluid Solidifier': 3906, 'Alloy Smelter Recycling': 3707, 'Fluid Canner': 2898, 'Extruder': 2838, 'Chemical Bath': 2394, 'Forge Hammer': 1793, 'Shapeless Crafting': 1408, 'Alloy Smelter Molding': 1171, 'Lathe': 1104, 'Fluid Extractor': 1089, 'Multiblock Centrifuge': 1027, 'Wiremill': 1024, 'Laser Engraver': 966, 'Implosion Compressor': 772, 'Thermal Centrifuge': 688, 'Large Chemical Reactor': 658, 'Ore Washer': 654, 'Simple Dust Washer': 592, 'Alloy Smelter': 498, 'Compressor': 411, 'Distillery': 367, 'Blast Furnace': 339, 'Multiblock Mixer': 301, 'Multiblock Electrolyzer': 277, 'Primitive Blast Furnace': 266, 'Vacuum Freezer': 237, 'Autoclave': 204, 'Cryogenic Freezer': 193, 'Forming Press': 187, 'Electr

4527

In [17]:
# Too friggin complicated, just use the slow loop method

# # Convert the recipe_file.recipes list into a DataFrame
# recipe_slugs_df = pd.DataFrame.from_records({"machine": r.machine, "inputs": [i.slug for i in r.inputs], "outputs": [o.slug for o in r.outputs]} for r in recipe_file.recipes)

# recipe_outputs_df = recipe_slugs_df[["outputs"]].explode('outputs')
# recipe_inputs_df = recipe_slugs_df[["inputs"]].explode('inputs')

# # Create a DataFrame for the filter machines
# filter_machines_df = pd.DataFrame({'machine': list(filter_machines)})

# # Merge the recipes_df with the filter_machines_df to add a column indicating whether the recipe machine is in the filter machines
# recipe_slugs_df = pd.merge(recipe_slugs_df, filter_machines_df, on='machine', how='inner')

# filts = pd.DataFrame.from_records([f.__dict__ for f in exclude_recipe_filters])



In [24]:
def print_item(item):
    slug = item.slug
    name = None
    if "d" in slug:
        if slug not in stacks.items:
            name = f"{slug} [MISSING]"
        else:
            name = f"{stacks.items[slug].displayName} [{slug_to_oredict.loc[slug]}]"
    else:
        if slug not in stacks.fluids:
            name = f"{slug} [MISSING]"
        else: 
            name =  stacks.fluids[slug].localizedName if hasattr(stacks.fluids[slug],"localizedName") else stacks.fluids[slug].unlocalizedName
    return f"({item.amount:10f}x) {slug:<16}: {name}"

def print_nice(recipe: Recipe):
    inputs = "\n\t".join(map(lambda x: print_item(x), recipe.inputs))
    outputs = "\n\t".join(map(lambda x: print_item(x), recipe.outputs))
    print("Inputs:\n\t"+ inputs)
    print("Outputs:\n\t"+ outputs)
    print("Machine:", recipe.machine)

In [22]:
print_nice(next(filter(lambda x: x.machine=="Shaped Crafting", filtered)))

Inputs:
	(         6x) 4350d11337      : Shadow Steel Ingot [ingotShadowSteel]
	(         1x) 4350d23305      : Steel Rod [stickSteel]
Outputs:
	(         2x) 4355d12         : Hammer [craftingToolHardHammer]
Machine: Shaped Crafting


In [None]:
slug_to_oredict[["4350d2809", "4355d24"]]

slug
4350d2809               dustWood
4350d2809               pulpWood
4355d24       craftingToolMortar
4355d24      toolMortarandpestle
Name: Ore Name, dtype: object

In [None]:
filtered = list(set(filtered) - set(handtools))
len(filtered)

97320

In [25]:
for i in range(0, len(filtered), len(filtered)//50):
    print_nice(filtered[i])
    print("*"*80)

Inputs:
	(  1.000000x) 459d1343        : 8x Cupronickel Wire [nan]
Outputs:
	(  8.000000x) 459d1340        : 1x Cupronickel Wire [nan]
Machine: Shapeless Crafting
********************************************************************************
Inputs:
	(  1.000000x) 6824d0          : Teflon Plate [plateTeflon]
Outputs:
	(144.000000x) 1056            : molten.teflon
Machine: Fluid Extractor
********************************************************************************
Inputs:
	(  1.000000x) 4350d23043      : Rubidium Rod [stickRubidium]
	( 42.000000x) 52              : oxygen
Outputs:
	(  4.000000x) 4350d9043       : Rubidium Nugget [nuggetRubidium]
Machine: Arc Furnace Recycling
********************************************************************************
Inputs:
	(  1.000000x) 4350d11028      : Titanium Ingot [ingotTitanium]
Outputs:
	(144.000000x) 294             : molten.titanium
Machine: Fluid Extractor Recycling
****************************************************************

In [26]:
filtered_file = RecipeFile(recipes=filtered, dump_version=recipe_file.dump_version, dump_sha=recipe_file.dump_sha)

with open("data/recipes_filtered.json", "w", encoding="utf-8") as f:
    f.write(filtered_file.model_dump_json())