In [11]:
# Using PuLP dependency to seriously simplify setting up LP problem. It's just a wrapper to help set things up,
#  but it does come with a built-in solver (which we will also use)
from pulp import LpVariable, LpProblem, LpMinimize, lpDot, LpStatus, value
import numpy as np

from src.graph import Graph
from src.graph._preProcessing import connectGraph, removeBackEdges
from src.data.basicTypes import Ingredient, IngredientCollection, Recipe
from factory_graph import ProgramContext

In [12]:
class LPSolver:
    def __init__(self, graph):
        self.graph = graph
        self.variables = []
        # self.variable_idx_counter = 0 # Autogen current "head" index for variable number
        # self.system = []
        self.solved_vars = None # Result from linear solver

        self.lookup = {} # (machine, product, direction, multi_idx) -> variable index
        self.edge_from_perspective_to_index = {} # (edge, machine_id) -> variable index

def graphPreProcessing(self):
    connectGraph(self)
    # if not self.graph_config.get('KEEP_BACK_EDGES', False):
    #     removeBackEdges(self)
    Graph.createAdjacencyList(self)

def linearProgrammingSolver(self: ProgramContext, project_name: str, recipes: list[Recipe], graph_config: dict):
    g = Graph(project_name, recipes, self, graph_config=graph_config)
    self._graph = g # For test access
    graphPreProcessing(g)
    print(g.adj)
    
    


In [13]:
context = ProgramContext()

context.graph_gen = linearProgrammingSolver # Override solver
context.generate_one("power/fish/methane.yaml")

defaultdict(<function createAdjacencyList.<locals>.<lambda> at 0x000001E0C02A8540>, {'source': defaultdict(<class 'list'>, {'O': [('source', '0', 'pams fish')]}), '0': defaultdict(<class 'list'>, {'I': [('source', '0', 'pams fish')], 'O': [('0', '1', 'methane')]}), '1': defaultdict(<class 'list'>, {'I': [('0', '1', 'methane')], 'O': [('1', 'sink', 'biogas')]}), 'sink': defaultdict(<class 'list'>, {'I': [('1', 'sink', 'biogas')]})})


In [14]:
# This is me figuring out edge cases.

"""
- m: centrifuge
  tier: LV
  I:
    pams fish: 1
  O:
    methane: 96
  eut: 5
  dur: 19
  number: 1
  cost_priority:
    pams fish: 1
  # target:
  #   # methane: 250
  #   pams fish: 1
- m: distillery
  tier: LV
  I:
    methane: 125
  O:
    biogas: 375
  eut: 7
  dur: 1
"""

#  

problem = LpProblem("test", LpMinimize)
cost_inputs = ["pams fish"]
recipe_vars = ["pams fish", "methane"]
items = ["pams fish", "methane", "biogas"]
target_vector = [0,0,1000]
ing_matrix = {
  "fuge": [-1, 96, 0],
  "dist": [0, -125, 375]
}
# How do you figure out which variables should be inputs?
input_vectors = {
  "fish_in": [1, 0, 0],
}
# rec_mat = np.array(list(recipe_vectors.values())+ list(input_vectors.values()))
# display(rec_mat)
# rec_mat.transpose()
all_vectors = ing_matrix.copy()
all_vectors.update(input_vectors)
# Item constraints are transpose of values of constraint vectors (all vectors)
item_constraints = np.array(list(all_vectors.values())).transpose()


In [15]:
# problem.variables()
# Create variables
recipe_vars = LpVariable.dicts("rec", all_vectors.keys(), lowBound=0, cat="Continuous")

# Add objective
problem += recipe_vars["fish_in"]
# Add constraints
for item_constraint_vec, target in zip(item_constraints, target_vector):
    problem += lpDot(recipe_vars.values(), item_constraint_vec) >= target
# problem += lpDot(x.values(), target_vector) >= 0

problem.solve()
LpStatus[problem.status]

'Optimal'

In [16]:
for v in problem.variables():
    print(v.name, "=", v.varValue)

rec_dist = 2.6666667
rec_fish_in = 3.4722222
rec_fuge = 3.4722222


In [17]:
# That works, now let's mess around with ways to identify input candidates
# Cases that need to be inputs are useful to report the user, 
# but we want to find "eliminated" variables as described by https://github.com/ClaudeMetz/FactoryPlanner/blob/master/modfiles/backend/calculation/matrix_engine.lua
# - that is, variables that are net-zero after all loops/etc and don't need to be system inputs.

# I'm theorizing that the "eliminated" variables might be findable via the reduced row echelon form of the matrix.

from sympy import Matrix
# m = Matrix(np.hstack([item_constraints, np.array(target_vector).reshape(-1,1)]))
# m.rref()

# One example is a loop in the palladium component of the platline,
# which inputs palladium enriched ammonia and loops that and pall met pow dust
# [ammonia, pall enriched ammonia, pall met pow, pall salt, repre pall]
# and cols are:
# [lcr pall met pow recycle, lcr repre, sifter salt recycle]
pall_loop = [
    [-1000, 0,      0],
    [1000,  -9000,  0],
    [-1,    -9,     0.95],
    [0,     16,     -1],
    [0,     2,      0],
]
 
positive_cost_cycle = Matrix(pall_loop)
 
positive_cost_cycle.rref()
#  Oh yeah, duh, rref on an over-defined matrix. Uhhhh
# This matrix converts recipe executions to items produced, 
# and solving it finds necessary recipes executions/unit time to produce desired items
# We're looking for input-item-to-output item conversions. How do we find those?
# - 

(Matrix([
 [1, 0, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 0],
 [0, 0, 0]]),
 (0, 1, 2))

In [18]:
positive_cost_cycle.singular_value_decomposition()

(Matrix([
 [  -0.0123427693881819,     0.999920219926959, -0.00266637715277894],
 [    0.999921784104044,     0.012337578452739, -0.00197762613328934],
 [ 0.000975236276396803,   0.00201218158793192,    0.688745946762978],
 [ -0.00175569605886587,  -0.00179957317684196,   -0.724995172566176],
 [-0.000219462003270191, -0.000224946177417711,   1.0320007302374e-6]]),
 Matrix([
 [9056.0851669197,                0,                0],
 [              0, 993.812582692507,                0],
 [              0,                0, 1.37930382200068]]),
 Matrix([
 [  0.111777170775024,   -0.99373329625207,  3.6777441977233e-6],
 [ -0.993733296258832,  -0.111777170773151, 7.11721275761968e-7],
 [2.96173288126787e-7, 3.73425105498543e-6,   0.999999999992984]]))

In [19]:
positive_cost_cycle * positive_cost_cycle.transpose()

Matrix([
[ 1000000, -1000000,    1000,       0,      0],
[-1000000, 82000000,   80000, -144000, -18000],
[    1000,    80000, 82.9025, -144.95,    -18],
[       0,  -144000, -144.95,     257,     32],
[       0,   -18000,     -18,      32,      4]])

In [20]:
simpler_pos_cost = Matrix([
    [-1, 2],
    [0.25, -1]
])
simpler_pos_cost.rref()
# I don't think this is going to work for determining which should be inputs, but I think I can figure it out using LP

(Matrix([
 [1, 0],
 [0, 1]]),
 (0, 1))

In [21]:
# One example is a loop in the palladium component of the platline,
# which inputs palladium enriched ammonia and loops that and pall met pow dust
# [ammonia, pall enriched ammonia, pall met pow, pall salt, repre pall]
# and cols are:
# [lcr pall met pow recycle, lcr repre, sifter salt recycle]
pall_loop = [
    [-1000, 0,      0],
    [1000,  -9000,  0],
    [-1,    -9,     0.95],
    [0,     16,     -1],
    [0,     2,      0],
]
target_vector = [0, 0, 0, 0, 10]

# New theory: for anything that is ever an input, make an input vector, but tell LP to minimize number used
# Possible upgrade: seek to maximize earliness of inputs in a topological sort or derivative.

In [22]:
# Priorities: [recipe tax, pall enriched ammonia, additional vector cost]
priority_ratio = 9000/0.95 # Smallest/biggest

from pulp import LpProblem, LpMinimize, LpBinary, LpVariable, value, lpSum, lpDot

problem = LpProblem("min input test", LpMinimize)
recipe_vars = LpVariable.dicts("recipe",  ["lcr pall met pow recycle", "lcr repre", "sifter salt recycle"],0)
variables =  ["ammonia", "pall enriched ammonia", "pall met pow", "pall salt", "repre salt"] 
# Filtered only by things that are ever an input
inputs = ["ammonia", "pall enriched ammonia", "pall met pow", "pall salt"] 
explicit_inputs = ["pall enriched ammonia"]
explicit_input_amounts = LpVariable.dicts("input", explicit_inputs, 0)
# Filtered to remove explicit inputs
additional_inputs = ["ammonia", "pall met pow", "pall salt"] 
additional_input_subtractors = LpVariable.dicts("input", additional_inputs, 0)
additional_input_switches = LpVariable.dicts("in_switch", additional_inputs, cat=LpBinary)
try:
    for item, recipe_coeffs, target in zip(variables, pall_loop, target_vector):
        # To reach each target amount, use a combination of the recipe outputs and switched inputs
        # The amount for each input is unlimited, but there is a high cost for switching each one on
        if item in explicit_inputs:
            input_term = explicit_input_amounts[item]
        elif item in additional_inputs:
            input_term = additional_input_subtractors[item] * additional_input_switches[item]
        else:
            input_term = 0
        problem += lpDot(recipe_vars.values(), recipe_coeffs) + input_term >= target
        
    # Objective, in increasing order of priority:
    # 1: Per-recipe tax
    # (2, n-1): explicit costs to minimize (explicit inputs)
    # n: High-cost additional inputs
    problem += (priority_ratio**0 * lpSum(recipe_vars.values())
        + priority_ratio**1 * explicit_input_amounts["pall enriched ammonia"]
        + priority_ratio**2 * lpSum(additional_input_switches))
except TypeError as e:
    print("Well poop.")
    print(e)

Well poop.
Non-constant expressions cannot be multiplied




In [23]:
# Little essay about a new approach to solve this which-inputs-should-we-choose problem:

# Alright, so making the linear optimizer solve for minimal inputs while also solving the problem ain't going to work.
# ... because this formulation is quadratic and I can't think of a nice model that isn't.
# ... we could get rid of the toggle and treat new inputs cumulatively as the highest priority class
# ... but that has lots of edge cases and naturally gives a higher weight to minimizing large-number stuff, like fluids.
# ... we could counteract *that* by normalizing quantities (so 9000L of oxygen becomes 9kL of oxygen, or a less nice unit)
# ... but I can't think of a good normalization scheme that doesn't have problems with the different ranges of quantities between recipes.

# New insight: Focus on solving for minimal inputs first, then solve problem.
# Model this as a cover problem: we want to find the smallest set of inputs which reaches the inputs of all recipes.

# We can once again use Linear Optimization to solve this problem.
# We want to find the minimum number of inputs we must provide so that every recipe *can* be run.
# We want to prioritize hitting every recipe, *then* minimizing the number of inputs.
# Note that the linear optimizer might not use all resources if it choses not to include a relevant recipe in a solution.
# Note that if you can run a parent recipe (e.g. ammonia and oxygen for nitric acid), 
# ... you can run descendent recipes (e.g. nitric acid [and ammonia] for ammonium chloride)

# I'm going to model this as a bipartite graph in my head.
# - Every resource that is ever an input to any recipe becomes a requirement (constraint in LP terms)
#       - (one for oxygen, hydrochloric, ilmenite, whatever)
#       - These are nodes on the right-hand side of the graph.
#       - These are the inputs to "cover" with as few new/raw/actual inputs as possible. 
#       - I will call them "requirement" resources to avoid confusing myself.
# - We also take this same list of inputs and make a binary (on or off) variable for every resource
#       - Picture these as nodes on the left-hand side of the graph
#       - Turning on these inputs corresponds to selecting that variable as an input.
#       - I will call them "providable" resources to avoid confusing myself.
# - In graph terms, we put an edge between every providable resource and the requirements it covers. This includes:
#       - The resource itself (providing oxygen obviously covers oxygen requirements)
#       - For every recipe which uses this resource, cover all of its outputs
#       - For all of *those* resources, keep covering - this becomes DFS.
#       - In LP terms, every constraint is sum(every binary variable that covers this required resource) >= 1
# - Finally, the objective is the minimize the sum of all those binary variables (aka find fewest selected inputs)
# - As a preprocessing step, all requirements that are indirectly covered by explicitly provided inputs can be deleted.

# Something in my head tickles that this is NP-Complete, but I don't feel like putting in the work to test/prove it.
# Regardless, LP solvers can totally tackle NPC problems like Knapsack (and they're reasonably optimized about it)

# One big issue with this approach: selecting a valuable resource before a loop 
# (for example, palladium salt dust, which loops around and indirectly supplies *many* recipes)
# ... will have high value and cover many resources. 
# Theory #1: We could use a heuristic-based "cyclic graph toposort" to minimize back-edges (loop-backs)
# ... use that sort to form a DAG that mostly moves ingredient->output, and then use this algorithm
# Theory #2: By the nature of most GTNH recipe graphs, especially when at least one start-of-the-chain ingredient has been provided,
# ... a solution which uses just-before-a-loop resources as an input won't usually be "minimal".
# Further, this algorithm doesn't need to *perfectly* solve this problem, 
# ... because the user can fix wierd behavior by explicitly providing more resources.


In [24]:
# One example is a loop in the palladium component of the platline,
# which inputs palladium enriched ammonia and loops that and pall met pow dust
# [ammonia, pall enriched ammonia, pall met pow, pall salt, repre pall]
# and cols are:
# [lcr pall met pow recycle, lcr repre, sifter salt recycle]
pall_loop = [
    [-1000, 0,      0],
    [1000,  -9000,  0],
    [-1,    -9,     0.95],
    [0,     16,     -1],
    [0,     2,      0],
]
target_vector = [0, 0, 0, 0, 10]
item_recipe_covers = {
    "ammonia": {"lcr pall met pow recycle": {"pall enriched ammonia"}},
    "pall met pow": {"lcr pall met pow recycle": {"pall enriched ammonia"}},
    "pall enriched ammonia": {"lcr repre": {"pall salt", "repre pall"}},
    "pall met pow": {"lcr repre": {"pall salt", "repre pall"}},
    "pall salt":{"sifter salt recycle": {"pall met pow"}}
}
recipes = {
    "lcr pall met pow recycle": {
        "I": {"ammonia":1000, "pall met pow": 1},
        "O": {"pall enriched ammonia": 1000},
    },
    "lcr repre": {
        "I": {"pall enriched ammonia": 9000, "pall met pow": 9},
        "O": {"pall salt": 16, "repre pall": 2},
    },
    "sifter salt recycle": {
        "I": {"pall salt": 1},
        "O": {"pall met pow": 0.95},
    }
}

In [25]:
import re
from collections import Counter
from src.data.loadMachines import recipesFromConfig
def genSlug(s, try_acronym=False):
    slug = re.sub(r"[^a-zA-Z0-9_]", "", re.sub(r"\W+", "_", s.lower()))
    words = slug.split("_")
    if len(slug) > 25 or (try_acronym and len(words)>=2):
        return "".join(word[0] for word in words)
    else:
        return slug

def genRecipeNames(recipes):
    counter = Counter()
    # name_map = {"sink": "sink", "source": "source"}
    names = []
    for recipe in recipes:
        name = genSlug(recipe.machine, True) + "_" + genSlug(recipe.I[0].name)
        if name in counter:
            counter[name] += 1
            name += f"_{counter[name]}"
        names.append(name)
    return names

In [26]:
def stripBrackets(ing):
    prefix = False
    if ing[:2] == '\u2588 ':
        prefix = True
    stripped = ing.split(']')[-1].strip()
    if prefix and stripped[:2] != '\u2588 ': 
        stripped = '\u2588 ' + stripped
    return stripped

def getItemCovers(recipes):
    item_covers = {}

    item_to_covered_recipes = {}
    recipe_to_covered_items = {}
    items_to_cover = set()
    # Just use indices to identify recipes here
    for covered_recipe, io in enumerate(recipes):
        for ing in io.I:
            ing_name = stripBrackets(ing.name)
            item_to_covered_recipes[ing_name] = item_to_covered_recipes.get(ing_name, []) + [covered_recipe]
            items_to_cover.add(ing_name)
        for out in io.O:
            out_name = stripBrackets(out.name)
            recipe_to_covered_items[covered_recipe] = recipe_to_covered_items.get(covered_recipe, []) + [out_name]


    # Cheeky DFS for each item to figure out what it covers.
    # This could be done all-at-once with some shenanigans, but I think we want to not allow loops on a per-item basis
    # My gut is telling me that will be a slightly more restricted/less-prone-to-error heuristic.
    for item in items_to_cover:
        used_recipes = set()
        covered_items = {item}
        frontier = [item]
        while not len(frontier) == 0:
            covered_item = frontier.pop()
            if covered_item not in item_to_covered_recipes: continue # item covers no recipes
            for covered_recipe in item_to_covered_recipes[covered_item]:
                if covered_recipe in used_recipes: continue # Recipe already used (we've looped)
                used_recipes.add(covered_recipe)
                if covered_recipe not in recipe_to_covered_items: continue # recipe covers no items (we deleted its items in preprocessing)
                for outgoing_item in recipe_to_covered_items[covered_recipe]:
                    if outgoing_item in covered_items: continue # We've already hit this item
                    covered_items.add(outgoing_item)
                    frontier.append(outgoing_item)
        item_covers[item] = covered_items
    return item_covers
recipes = recipesFromConfig("devtest/palladium_loop.yaml")
getItemCovers(recipes)
# Only ammonia covers ammonia (good), but I think this loopy example is a bad example.

{'palladium enriched ammonia': {'palladium enriched ammonia',
  'palladium metallic powder dust',
  'palladium salt dust',
  'reprecipitated palladium dust'},
 'ammonia': {'ammonia',
  'palladium enriched ammonia',
  'palladium metallic powder dust',
  'palladium salt dust',
  'reprecipitated palladium dust'},
 'palladium metallic powder dust': {'palladium enriched ammonia',
  'palladium metallic powder dust',
  'palladium salt dust',
  'reprecipitated palladium dust'},
 'palladium salt dust': {'palladium enriched ammonia',
  'palladium metallic powder dust',
  'palladium salt dust',
  'reprecipitated palladium dust'}}

In [27]:
bauxite_recipes = recipesFromConfig("223_bauxite_line.yaml")
baux_covers = getItemCovers(bauxite_recipes)

display({item: len(covered) for (item, covered) in baux_covers.items()})
set(baux_covers.keys()) - set(baux_covers["bauxite dust"])
# That spooked me at first but turns out yeah, bauxite line has one input: bauxite. Lets try another.

{'chlorine': 11,
 'magnesium chloride dust': 11,
 'bauxite dust': 15,
 'salt': 11,
 'magnesium dust': 11,
 'titanium tetrachloride': 11,
 'rutile dust': 12,
 'sodium dust': 11,
 'hot titanium ingot': 2,
 'carbon dust': 11,
 'carbon monoxide': 11}

set()

In [28]:
bauxite_recipes = recipesFromConfig("2200_tungstate.yaml")
baux_covers = getItemCovers(bauxite_recipes)

display({item: len(covered) for (item, covered) in baux_covers.items()})
set(baux_covers.keys()) - set(baux_covers["endstone dust"])
# Okay, this finally proves a concern I had - that this binary "cover/not cover" approach
# won't handle when an input would reach an input, but not *enough*
# for tungstate, endstone covers most inputs, but byproduct hydrogen only covers *some* of the needed amount
# I thought this might be a non-event, but nay, it's an event. We'll need to start calculating ratios.

{'scheelite dust': 12,
 'endstone dust': 17,
 'hydrochloric acid': 13,
 'salt': 12,
 'hydrogen': 2,
 'tungstic acid dust': 3,
 'tungsten trioxide dust': 2,
 'calcium chloride dust': 12,
 'sodium dust': 12,
 'sodium hydroxide dust': 12,
 'water': 13,
 'tungstate dust': 13,
 'sodium tungstate': 12}

{'hydrochloric acid', 'water'}

In [29]:
# New essay on how to avoid doing what the last essay was talking about 
# (solving for min inputs one at a time is hard, lets do it all at once again!)

# It might be possible to frame this as an LP (actually just an rref),
# but I think it would still require a DFS from me to figure out which recipes to include in the solve
# so I'm going to run the ratios myself.
# Main Idea: if you run across a recipe you've already seen and think you can cover
#   an additional recipe-ingredient using a byproduct, confirm you have enough of that byproduct to meet that requirement.
# Problem: what if you attempt this when you don't have enough byproduct, but later another recipe gives you more of that byproduct?
# Problem: what if you dfs and hit this recipe with the byproduct ingredient first and the main ingredient later?
#   - In this case, you show as producing excess of the main ingredient

# Example of this: "insufficient_byproduct_fake.png", where a sodium tungstate input can either:
#   - Cover the EBF with scheelite, then produce byproduct hydrogen, which is not enough to cover the EBF's hydrogen input 
#       - This requires an extra hydrogen input, which is desired behavior.
#   - Cover the salt LCR with salt for hydrogen, then cover the EBF with hydrogen. 
#       - When we reach the EBF with scheelite, it looks like we have excess - great! (not desired behavior)

#.... poop. I can't think of a way to prioritize "main ingredients" like scheelite without requiring a priori knowledge of ingredient value.
#   - An LP problem which asks to maximize reachable ingredients will of course use the route that covers both hydrogen and scheelite
#   - An LP problem which minimizes reachable ingredients... finds no reachable ingredients?
#   - What about an LP problem that is constrained to reach as many ingredients as possible but minimizes out these byproduct events?

# AH! Theory: a reformulation of the quadratic issue before. We find a quantity of each input that is  
# "The most this problem could possibly use" - get to that later. Point is, that's wired to a binary input we want to minimize.
# Then, another variable subtracts from that binary-with-big-coefficient number.
# If the binary is off, that subtractor goes to zero (no less, because of constraints/bounds), 
# .... but if it isn't, it's equal to difference from big-number.
# Big-number minus subtractor yields amount of the switched-on input. Could work!

# But how do we figure out the most an input could possibly need? I don't love the "1 trillion or so ought to do" solution.
# I think the priority ratio math will still apply: sum of targets * (ratio of smallest to biggest value in problem)
# But there could be a setup like:
# Biggest-number input (1000 A) produces smallest-number output (0.1 B)
# Second-biggest-number input (999 B) produces second-smallest-number output (0.2 C)
# ... and so on. Those two would require (999/0.2*1000/0.1) = 49950000.0 units of A for 1 C

# This would require some sort of DFS ratio calculation that would be no fun.
# I say we use the "1 trillion or so ought to do strategy" and include a warning if any subtractor is zero (input 100% used)
# ... but no, if that much of an input really is used (maybe a VERY large liquid air distilling problem),
# ... then no solution would be found at all! Wait, not true - another input could be used as well.
# ... We'll go with this solution.

# Choose a BIG_NUMBER that doesn't intrude too far into double precision when LP starts doing its math.
# Doubles have ~15 decimal digits of precision. Let's leave... uh... 6? for recipes, leaving 9 for BIG_NUMBER.
# 1,000,000,000 - A trillion or so ought to do. That wasn't even intentional.
# This problem should be somewhat helped (in not-contrived examples) by input scale normalization (see above).

In [30]:
BIG_NUMBER = 1E15
print("Two digits with 1E15", BIG_NUMBER-2.16, BIG_NUMBER-2.12, BIG_NUMBER-2.09)
# NO, BAD - not enough precision.
BIG_NUMBER = 1E9
print("Eight Digits with 1E9", BIG_NUMBER-2.12345678, BIG_NUMBER-2.12345679, "\nSeven Digits", BIG_NUMBER-2.1234567, BIG_NUMBER-2.1234568)
# I suppose it's not so surprising when math does what it is supposed to do, but it's neat to see that (almost) work.
# Bc floats use exponents, the first 2 gets divided to 1, which is implicit in a float (so that digit doesn't count towards precision).
# I would have wanted to see Seven Digits not work bc it should be 6, but I probably chose numbers that fit
# into the gaps of "Approximately" 6 digits of precision.
# Let's try this approach with some problems and see what happens when we reduce BIG_NUMBER.

Two digits with 1E15 999999999999997.9 999999999999997.9 999999999999997.9
Eight Digits with 1E9 999999997.8765432 999999997.8765432 
Seven Digits 999999997.8765433 999999997.8765432


In [31]:
# New LP Plan:
# Constraints: (recipe contributions) + ingredient_switch*BIG_NUMBER - ingredient_subtractor >= target
# Objective Priorities (low->high):
# 0: Recipe Tax (unweighted sum of recipe amounts to avoid unnecessary work)
# (1, n-2): Explicit Ingredient Priorities (ratio**k * (sum of n-priority explicit ingredient vectors))
# n-1: Additional Ingredient Quantities (sum)
# n: Ingredient Switches (sum)

# Note that this prioritizes not using an ingredient at all over using less of it
# and it prioritizes using explicit ingredients before additional ones. 

In [32]:
# Priorities: [recipe tax, pall enriched ammonia, additional input amounts, additional input switches]
priority_ratio = 9000/0.95 # Smallest/biggest
BIG_NUMBER = 1E9

from pulp import LpProblem, LpMinimize, LpBinary, LpVariable, value, lpSum, lpDot

def solveProblem(big_number):
    problem = LpProblem("min input v2", LpMinimize)
    recipe_vars = LpVariable.dicts("recipe",  ["lcr pall met pow recycle", "lcr repre", "sifter salt recycle"],0)
    variables =  ["ammonia", "pall enriched ammonia", "pall met pow", "pall salt", "repre salt"] 
    # Filtered only by things that are ever an input
    inputs = ["ammonia", "pall enriched ammonia", "pall met pow", "pall salt"] 
    explicit_inputs = ["pall enriched ammonia"]
    explicit_input_amounts = LpVariable.dicts("input", explicit_inputs, 0)
    # Filtered to remove explicit inputs
    additional_inputs = ["ammonia", "pall met pow", "pall salt"]
    additional_input_subtractors = LpVariable.dicts("input", additional_inputs, 0, 1)
    additional_input_switches = LpVariable.dicts("in_switch", additional_inputs, 0, 1, cat=LpBinary)


    for item, recipe_coeffs, target in zip(variables, pall_loop, target_vector):
        # To reach each target amount, use a combination of the recipe outputs and switched inputs
        # The amount for each input is unlimited, but there is a high cost for switching each one on
        if item in explicit_inputs:
            input_term = explicit_input_amounts[item]
        elif item in additional_inputs:
            # input_term = 0
            input_term = big_number * (additional_input_switches[item] - additional_input_subtractors[item])
            # Also restrict input to be strictly positive (no inputting negative items)
            problem += (additional_input_switches[item] - additional_input_subtractors[item]) >= 0 
        else:
            input_term = 0
        problem += lpDot(recipe_vars.values(), recipe_coeffs) + input_term == target

    problem += (priority_ratio**0 * lpSum(recipe_vars.values())
        + priority_ratio**1 * lpSum(explicit_input_amounts)
        + priority_ratio**2 * -1 * lpSum(additional_input_subtractors.values()) # invert (to max)
        + priority_ratio**3 * lpSum(additional_input_switches)
    )
    problem.solve()
    
    print("Problem Status", LpStatus[problem.status])

    def show_values(var_dict):
        for var in var_dict:
            print(var, value(var_dict[var]))
            
    print("\nSwitches")
    show_values(additional_input_switches)
    print("\nSubtractors")
    show_values(additional_input_subtractors)
    print("\nRecipe vars")
    show_values(recipe_vars)
    print("\nExplicit Ingredient Vars")
    show_values(explicit_input_amounts)

    print("\nConstraints: ")
    for item, recipe_coeffs, target in zip(variables, pall_loop, target_vector):
        # To reach each target amount, use a combination of the recipe outputs and switched inputs
        # The amount for each input is unlimited, but there is a high cost for switching each one on
        if item in explicit_inputs:
            input_term = value(explicit_input_amounts[item])
        elif item in additional_inputs:
            input_term = big_number * (value(additional_input_switches[item]) - value(additional_input_subtractors[item]))
        else:
            input_term = 0
        recipe_sum = sum(value(v) * coeff for ((name, v), coeff) in zip(recipe_vars.items(), recipe_coeffs))
        coeff_string = " + ".join([f"{value(v)} * {coeff} [{name}]" for ((name, v), coeff) in zip(recipe_vars.items(), recipe_coeffs)])
        print(item, f"{coeff_string} + {input_term} = {recipe_sum+input_term} >= {target}")
solveProblem(1E4)
# That is a correct solution! I think there are some edge cases regarding byproduct handling...
# There are some cases where you *need* a byproduct (no solution otherwise),
# And there are some cases where you *want* a byproduct (a side-recipe cracks e.g. radioactive waste into uranium, etc)
# I'll explore solving that next. 

Problem Status Infeasible

Switches
ammonia 3.1
pall met pow 0.0
pall salt 0.0

Subtractors
ammonia 0.0
pall met pow 0.0
pall salt 0.0

Recipe vars
lcr pall met pow recycle 31.0
lcr repre 5.0
sifter salt recycle 80.0

Explicit Ingredient Vars
pall enriched ammonia 14000.0

Constraints: 
ammonia 31.0 * -1000 [lcr pall met pow recycle] + 5.0 * 0 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 31000.0 = 0.0 >= 0
pall enriched ammonia 31.0 * 1000 [lcr pall met pow recycle] + 5.0 * -9000 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 14000.0 = 0.0 >= 0
pall met pow 31.0 * -1 [lcr pall met pow recycle] + 5.0 * -9 [lcr repre] + 80.0 * 0.95 [sifter salt recycle] + 0.0 = 0.0 >= 0
pall salt 31.0 * 0 [lcr pall met pow recycle] + 5.0 * 16 [lcr repre] + 80.0 * -1 [sifter salt recycle] + 0.0 = 0.0 >= 0
repre salt 31.0 * 0 [lcr pall met pow recycle] + 5.0 * 2 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 0 = 10.0 >= 10


In [33]:
# Find behavior with a nearly-too-small BIG_NUMBER
solveProblem(1E5)

Problem Status Optimal

Switches
ammonia 1.0
pall met pow 0.0
pall salt 0.0

Subtractors
ammonia 0.69
pall met pow 0.0
pall salt 0.0

Recipe vars
lcr pall met pow recycle 31.0
lcr repre 5.0
sifter salt recycle 80.0

Explicit Ingredient Vars
pall enriched ammonia 14000.0

Constraints: 
ammonia 31.0 * -1000 [lcr pall met pow recycle] + 5.0 * 0 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 31000.000000000004 = 3.637978807091713e-12 >= 0
pall enriched ammonia 31.0 * 1000 [lcr pall met pow recycle] + 5.0 * -9000 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 14000.0 = 0.0 >= 0
pall met pow 31.0 * -1 [lcr pall met pow recycle] + 5.0 * -9 [lcr repre] + 80.0 * 0.95 [sifter salt recycle] + 0.0 = 0.0 >= 0
pall salt 31.0 * 0 [lcr pall met pow recycle] + 5.0 * 16 [lcr repre] + 80.0 * -1 [sifter salt recycle] + 0.0 = 0.0 >= 0
repre salt 31.0 * 0 [lcr pall met pow recycle] + 5.0 * 2 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 0 = 10.0 >= 10


In [34]:
# Find behavior with a too-small BIG_NUMBER
solveProblem(1E4)
# This failed how we would want to it - straight-out
# - but I suspect it will stop failing like this as we ease off on byproduct restrictions for byproduct-y problems.

Problem Status Infeasible

Switches
ammonia 3.1
pall met pow 0.0
pall salt 0.0

Subtractors
ammonia 0.0
pall met pow 0.0
pall salt 0.0

Recipe vars
lcr pall met pow recycle 31.0
lcr repre 5.0
sifter salt recycle 80.0

Explicit Ingredient Vars
pall enriched ammonia 14000.0

Constraints: 
ammonia 31.0 * -1000 [lcr pall met pow recycle] + 5.0 * 0 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 31000.0 = 0.0 >= 0
pall enriched ammonia 31.0 * 1000 [lcr pall met pow recycle] + 5.0 * -9000 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 14000.0 = 0.0 >= 0
pall met pow 31.0 * -1 [lcr pall met pow recycle] + 5.0 * -9 [lcr repre] + 80.0 * 0.95 [sifter salt recycle] + 0.0 = 0.0 >= 0
pall salt 31.0 * 0 [lcr pall met pow recycle] + 5.0 * 16 [lcr repre] + 80.0 * -1 [sifter salt recycle] + 0.0 = 0.0 >= 0
repre salt 31.0 * 0 [lcr pall met pow recycle] + 5.0 * 2 [lcr repre] + 80.0 * 0 [sifter salt recycle] + 0 = 10.0 >= 10


In [54]:
# Modify the solver to use generic problems and allow penalized byproducts.

# Priorities: [recipe tax, pall enriched ammonia, additional input amounts, additional input switches, byproducts]
priority_ratio = 39/0.2*5 # biggest/Smallest
BIG_NUMBER = 1E4

from pulp import LpProblem, LpMinimize, LpBinary, LpVariable, value, lpSum, lpDot
from collections import Counter

def genSlug(s, try_acronym=False):
    slug = re.sub(r"[^a-zA-Z0-9_]", "", re.sub(r"\W+", "_", s.lower()))
    words = slug.split("_")
    if len(slug) > 40 or (try_acronym and len(words)>=2):
        return "".join(word[0] for word in words if len(word) > 0)
    else:
        return slug[:25]

def genRecipeNames(recipes):
    counter = Counter()
    names = []
    for recipe in recipes:
        name = genSlug(recipe.machine, True) + "_" + genSlug(recipe.I[0].name)
        if name in counter:
            counter[name] += 1
            name += f"_{counter[name]}"
        names.append(name)
    return names

class LpProject:
    def __init__(self, inputs, explicit_inputs, outputs, targets, variables, recipe_names, ing_matrix, target_vector):
        self.inputs = inputs
        self.explicit_inputs = explicit_inputs
        self.outputs = outputs
        self.targets = targets
        self.variables = variables
        self.recipe_names = recipe_names
        self.ing_matrix = ing_matrix
        self.target_vector = target_vector
        
    @staticmethod
    def fromRecipes(recipes):
        inputs = set()
        explicit_inputs = set()
        outputs = set()
        targets = {}
        # Just use indices to identify recipes here
        for recipe, io in enumerate(recipes):
            for ing in io.I:
                ing_name = stripBrackets(ing.name)
                inputs.add(ing_name)
            for out in io.O:
                out_name = stripBrackets(out.name)
                outputs.add(out_name)
            if hasattr(io, "cost"):
                for explicit_input in getattr(io, "cost"):
                    explicit_inputs.add(explicit_input)
            if hasattr(io, "target"):
                for target, quant in getattr(io, "target").items():
                    targets[target] = quant
                    
        if not all(target in outputs for target in targets): 
            raise RuntimeError("Encountered target which is never an output (likely a spelling mistake). targets: " +str(targets))
        if not all(cost in inputs for cost in explicit_inputs):
            raise RuntimeError("Encountered cost/explicit input which is never an input (likely a spelling mistake). costs: " +str(explicit_inputs))
        
        variables = list(inputs | outputs)
        
        recipe_vectors = []
        variable_indices = {var: i for i, var in enumerate(variables)}
        print(variable_indices)
        for recipe in recipes:
            vector = [0] * len(variables)
            for ing in recipe.I:
                vector[variable_indices[stripBrackets(ing.name)]] = -1 * ing.quant
            for out in recipe.O:
                vector[variable_indices[stripBrackets(out.name)]] = out.quant
            recipe_vectors.append(vector)
        recipe_names = genRecipeNames(recipes)
        print("Recipe Vectors", list(zip(recipe_names, recipe_vectors)))
        ing_vectors = list(zip(*recipe_vectors)) # Transpose (constraints are per-item, not per-recipe)
        target_vector = [targets.get(var, 0) for var in variables]
        
        return LpProject(inputs, explicit_inputs, outputs, targets, variables, recipe_names, ing_vectors, target_vector)
    
    @staticmethod
    def fromConfig(config_path):
        recipes = recipesFromConfig(config_path)
        return LpProject.fromRecipes(recipes)
    

def solveProblem(recipes):
    project = LpProject.fromRecipes(recipes)    
    
    additional_inputs = project.inputs -project.explicit_inputs
    print(additional_inputs)
    # Outputs that are only ever outputs (and not targets) are likely desired.        
    desired_byproducts = (project.outputs - project.inputs) - set(project.targets.keys())
    minimized_byproducts = set(project.variables) - desired_byproducts
    
    problem = LpProblem("gtnh_flow_lp_solver", LpMinimize)
    
    recipe_vars = LpVariable.dicts("recipe", project.recipe_names, 0)
    explicit_input_amounts = LpVariable.dicts("input", project.explicit_inputs, 0)
    additional_input_subtractors = LpVariable.dicts("in_sub", additional_inputs, lowBound= 0, upBound= 1)
    additional_input_switches = LpVariable.dicts("in_switch", additional_inputs, 0, 1, cat=LpBinary)
    byproduct_amounts = LpVariable.dicts("byproduct", project.variables, 0)

    for item, recipe_coeffs, target in zip(project.variables, project.ing_matrix, project.target_vector):
        # To reach each target amount, use a combination of the recipe outputs and switched inputs
        # The amount for each input is unlimited, but there is a high cost for switching each one on
        if item in project.explicit_inputs:
            input_term = explicit_input_amounts[item]
        elif item in additional_inputs:
            # input_term = 0
            input_term = BIG_NUMBER * (additional_input_switches[item] - additional_input_subtractors[item])
            # Also restrict input to be strictly positive (no inputting negative items)
            problem += (additional_input_switches[item] - additional_input_subtractors[item]) >= 0 
        else:
            input_term = 0
        
        # Most of the time, products beyond the target should be heavily penalized
        # to encourage using other recipes to reprocess them.
        # However, some items are the output of byproduct processing (like uranium from radioactive waste byproduct)
        # our heuristic is that if something is only output (never an input), it should have a small production priority
        problem += lpDot(recipe_vars.values(), recipe_coeffs) + input_term == target + byproduct_amounts[item]
        # problem += lpDot(recipe_vars.values(), recipe_coeffs) + input_term == target

    desired_byproduct_amounts = [byproduct_amounts[var] for var in desired_byproducts]
    minimized_byproduct_amounts = [byproduct_amounts[var] for var in minimized_byproducts]
    print("Desired Byproducts:", desired_byproducts)
    print("Minimized Byproducts:", minimized_byproducts)
    problem += (priority_ratio**0 * lpSum(recipe_vars)
        + priority_ratio**1 * -1 * lpSum(desired_byproduct_amounts) # Byproducts are lowest priority (nearly)
        + priority_ratio**2 * lpSum(explicit_input_amounts)
        + priority_ratio**3 * -1 * lpSum(additional_input_subtractors) # invert (to max)
        + priority_ratio**4 * 2 * lpSum(additional_input_switches) # Subtractors are <= 1, so we can use smaller priority step
        + priority_ratio**4 * 4 * lpSum(minimized_byproduct_amounts) # Same as above
    )
    problem.solve()
    
    print("Problem Status:", LpStatus[problem.status], "in", problem.solutionCpuTime, "cpu-seconds")

    def show_values(var_dict):
        for var in var_dict:
            print(var, value(var_dict[var]))
            
    print("\nSwitches")
    show_values(additional_input_switches)
    print("\nSubtractors")
    show_values(additional_input_subtractors)
    print("\nRecipe vars")
    show_values(recipe_vars)
    print("\nExplicit Ingredient Vars")
    show_values(explicit_input_amounts)
    print("\nSurplus Vars")
    show_values(byproduct_amounts)

    print("\nConstraints: ")
    for item, recipe_coeffs, target in zip(project.variables, project.ing_matrix, project.target_vector):
        # To reach each target amount, use a combination of the recipe outputs and switched inputs
        # The amount for each input is unlimited, but there is a high cost for switching each one on
        if item in project.explicit_inputs:
            input_term = value(explicit_input_amounts[item])
        elif item in additional_inputs:
            input_term = BIG_NUMBER * (value(additional_input_switches[item]) - value(additional_input_subtractors[item]))
        else:
            input_term = 0
        recipe_sum = sum(value(v) * coeff for ((name, v), coeff) in zip(recipe_vars.items(), recipe_coeffs))
        coeff_string = " + ".join([f"{value(v)} * {coeff} [{name}]" for ((name, v), coeff) in zip(recipe_vars.items(), recipe_coeffs)])
        surplus_term = value(byproduct_amounts[item])
        print(item, f"{coeff_string} + {input_term} = {recipe_sum+input_term} == {target} + {surplus_term}")
    return problem
        
# recipes = recipesFromConfig("devtest/insufficient_byproduct_fake.yaml")
recipes = recipesFromConfig("devtest/bauxite.yaml")
# recipes = recipesFromConfig("devtest/palladium_loop.yaml")
problem = solveProblem(recipes)
            
    

{'heated bauxite slurry': 0, 'nickel dust': 1, 'antimony dust': 2, 'calcite': 3, 'sluice juice': 4, 'sodium carbonate': 5, 'sodium aluminate': 6, 'water': 7, 'bauxite slag': 8, 'oxygen': 9, 'carbon dioxide': 10, 'gallium': 11, 'hydrogen': 12, 'rutile': 13, 'silicon dioxide': 14, 'sodium hydroxide': 15, 'stone dust': 16, 'aluminium dust': 17, 'purified bauxite': 18, 'carbon dust': 19, 'copper dust': 20, 'quicklime': 21, 'bauxite slurry': 22, 'alumina': 23, 'sodium dust': 24, 'iron dust': 25, 'tin dust': 26, 'steam': 27, 'aluminium hydroxide': 28}
Recipe Vectors [('mixer_purified_bauxite', [0, 0, 0, 0, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, -32, 0, 0, -4, 8, 0, 0, 0, 0, 0, 0]), ('oc_steam', [32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -32, 0, 0, 0, 0, -2, 0]), ('lcr_carbon_dioxide', [-32, 0, 0, 10, 5, 9, 0, 0, 16, 0, -5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, -1]), ('lcr_calcite', [0, 0, 0, -5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 

In [36]:
problem.constraints

OrderedDict([('_C1',
              -1*in_sub_heated_bauxite_slurry + 1*in_switch_heated_bauxite_slurry + 0 >= 0),
             ('_C2',
              -1*byproduct_heated_bauxite_slurry + -10000.0*in_sub_heated_bauxite_slurry + 10000.0*in_switch_heated_bauxite_slurry + -32*recipe_lcr_carbon_dioxide + 32*recipe_oc_steam + 0.0 = 0),
             ('_C3',
              -1*byproduct_nickel_dust + 0.2*recipe_centrifuge_sluice_juice + 0.0 = 0),
             ('_C4',
              -1*byproduct_antimony_dust + 0.2*recipe_centrifuge_sluice_juice + 0.0 = 0),
             ('_C5', -1*in_sub_calcite + 1*in_switch_calcite + 0 >= 0),
             ('_C6',
              -1*byproduct_calcite + -10000.0*in_sub_calcite + 10000.0*in_switch_calcite + -5*recipe_lcr_calcite + 10*recipe_lcr_carbon_dioxide + 0.0 = 0),
             ('_C7',
              -1*in_sub_sluice_juice + 1*in_switch_sluice_juice + 0 >= 0),
             ('_C8',
              -1*byproduct_sluice_juice + -10000.0*in_sub_sluice_juice + 10000.0*in

In [37]:
problem.variables

<bound method LpProblem.variables of gtnh_flow_lp_solver:
MINIMIZE
3614751562500.0*byproduct_alumina + 3614751562500.0*byproduct_aluminium_dust + 3614751562500.0*byproduct_aluminium_hydroxide + -975.0*byproduct_antimony_dust + 3614751562500.0*byproduct_bauxite_slag + 3614751562500.0*byproduct_bauxite_slurry + 3614751562500.0*byproduct_calcite + 3614751562500.0*byproduct_carbon_dioxide + -975.0*byproduct_carbon_dust + -975.0*byproduct_copper_dust + -975.0*byproduct_gallium + 3614751562500.0*byproduct_heated_bauxite_slurry + -975.0*byproduct_hydrogen + -975.0*byproduct_iron_dust + -975.0*byproduct_nickel_dust + -975.0*byproduct_oxygen + 3614751562500.0*byproduct_purified_bauxite + 3614751562500.0*byproduct_quicklime + -975.0*byproduct_rutile + -975.0*byproduct_silicon_dioxide + 3614751562500.0*byproduct_sluice_juice + 3614751562500.0*byproduct_sodium_aluminate + 3614751562500.0*byproduct_sodium_carbonate + 3614751562500.0*byproduct_sodium_dust + 3614751562500.0*byproduct_sodium_hydroxide

In [38]:
# That looks like it's working! Next on the chopping block is what I'm calling "Matrix Normalization" 
# ... although I'm not even sure that's the right name.
# The idea is to scale both item and recipe vectors so that the largest value in each is 1, 
# ... AND we minimize the ratio between the largest and smallest values in the matrix (which we use as the priority ratio)
# This should make the LP solver more numerically stable and less likely to run into precision issues.
# It will also help with the "1 trillion or so ought to do" problem, as we can use a smaller BIG_NUMBER,
# and help put items in the same priority level on the same-ish scale.

# The rows can be scaled for free - divide the row (all ingredients) by the largest value in the row.
# The challenge is finding a scale factor for the *columns* which minimizes the ratio between the largest and smallest
# ... values in the *matrix*.

# One way to formulate this in LP would be to minimize the difference between
# ... the largest and smallest values in the values, which are themselves variables that are solved for.

# I'mma ask ChatGPT for this one... oh boy did GPT deliver. I love this stuff.

# Here's its write-up:
# This approach aims to scale a matrix such that each column is adjusted by a 
# scaling factor to ensure numerical stability in linear programming (LP) solvers. 
# First, each vector (row) in the matrix is normalized so that the largest value in 
# each vector becomes 1. Using the PuLP library, an optimization problem is then set 
# up with the objective of minimizing the difference between the maximum and minimum 
# scaled values in the matrix. The variables in this optimization problem are the 
# scaling factors for each column. Constraints are added to ensure that each element 
# in the matrix, when scaled, lies between the defined maximum and minimum values. 
# The optimization problem is solved to determine the optimal scaling factors, 
# which are then applied to the normalized matrix. 
# This process ensures the matrix values are balanced and the ratio between the 
# largest and smallest values is minimized, enhancing the numerical stability of 
# LP solvers.


In [44]:
def getIngMatrix(recipes):
    inputs = set()
    explicit_inputs = set()
    outputs = set()
    targets = {}
    # Just use indices to identify recipes here
    for recipe, io in enumerate(recipes):
        for ing in io.I:
            ing_name = stripBrackets(ing.name)
            inputs.add(ing_name)
        for out in io.O:
            out_name = stripBrackets(out.name)
            outputs.add(out_name)
        if hasattr(io, "cost"):
            for explicit_input in getattr(io, "cost"):
                explicit_inputs.add(explicit_input)
        if hasattr(io, "target"):
            for target, quant in getattr(io, "target").items():
                targets[target] = quant
                
    if not all(target in outputs for target in targets): 
        raise RuntimeError("Encountered target which is never an output (likely a spelling mistake). targets: " +str(targets))
    if not all(cost in inputs for cost in explicit_inputs):
        raise RuntimeError("Encountered cost/explicit input which is never an input (likely a spelling mistake). costs: " +str(explicit_inputs))
    
    variables = list(inputs | outputs) # Make it a list because order matters
    additional_inputs = inputs - explicit_inputs
    # print(additional_inputs)
    # Outputs that are only ever outputs (and not targets) are likely desired.        
    desired_byproducts = (outputs - inputs) - set(targets.keys())
    minimized_byproducts = set(variables) - desired_byproducts
    
    recipe_vectors = []
    variable_indices = {var: i for i, var in enumerate(variables)}
    # print(variable_indices)
    for recipe in recipes:
        vector = [0] * len(variables)
        for ing in recipe.I:
            vector[variable_indices[stripBrackets(ing.name)]] = -1 * ing.quant
        for out in recipe.O:
            vector[variable_indices[stripBrackets(out.name)]] = out.quant
        recipe_vectors.append(vector)
    recipe_names = genRecipeNames(recipes)
    print("Recipe Vectors", list(zip(recipe_names, recipe_vectors)))
    ing_vectors = list(zip(*recipe_vectors)) # Transpose (constraints are per-item, not per-recipe)
    return ing_vectors
    # target_vector = [targets.get(var, 0) for var in variables]
    # row_scales,col_scales, scaled_matrix = scale_matrix(ing_vectors)
    
    # print(list(zip(variables, row_scales)))
    # print(list(zip(recipe_names, col_scales)))
    # print(col_scales)
    # print()
    # # for row in scaled_matrix: print(row)
    # import csv
    # with open("test_mat.csv", "w") as f:
    #     csv.writer(f).writerows(scaled_matrix)
    # print(value(problem.objective))
    # print(list(zip(scaled_matrix)))

getIngMatrix(recipesFromConfig("renewables/calcium/ashes.yaml"))

Recipe Vectors [('centrifuge_ashes', [5.4, -36, 0, 1, 4, 0.45, 0, 11.52, 2]), ('electrolyzer_quicklime_dust', [0, 0, 1, 0, 0, 0, 1000, -2, 0])]


[(5.4, 0),
 (-36, 0),
 (0, 1),
 (1, 0),
 (4, 0),
 (0.45, 0),
 (0, 1000),
 (11.52, -2),
 (2, 0)]

In [41]:
# ... and here's the routine:
import pulp

# Function to normalize a matrix with the added constraint
def pulp_scale_matrix(matrix):
    
    num_rows = len(matrix)
    num_cols = len(matrix[0])
   
    # Create a PuLP problem instance
    prob = pulp.LpProblem("MinimizeMatrixRatio", pulp.LpMinimize)
    
    # Variables: scaling factors for each row and column
    row_scale_factors = [pulp.LpVariable(f"rs_{i}", lowBound=1) for i in range(num_rows)]
    
    # Constraints and objective function
    max_val = pulp.LpVariable("max_val", lowBound=0)
    min_val = pulp.LpVariable("min_val", lowBound=0)
    
    # Define constraints and objective for each element in the matrix
    for i in range(num_rows):
        for j in range(num_cols):
            if abs(matrix[i][j]) > 0:
                scaled_value = abs(matrix[i][j]) * row_scale_factors[i]
                prob += max_val >= scaled_value
                prob += min_val <= scaled_value
    
    # Objective: min (max_val - min_val)
    prob += max_val - min_val
    
    # Solve the problem
    prob.solve()
    
    # Extract the scale factors
    row_scale_factors = [scale_factor.varValue for scale_factor in row_scale_factors]
    
    # Apply the scaling factors to the normalized matrix
    scaled_matrix = [[matrix[i][j] * row_scale_factors[i] for j in range(num_cols)] for i in range(num_rows)]
    
    return row_scale_factors, scaled_matrix
    # print(value(min_val), value(max_val))
    # return row_scales, col_scales, scaled_matrix

# Example usage
pall_loop = [
    [-1000, 0,      0],
    [1000,  -9000,  0],
    [-1,    -9,     0.95],
    [0,     16,     -1],
    [0,     2,      0],
]

pulp_row_scales, pulp_matrix = pulp_scale_matrix(pall_loop)
for row in pulp_matrix:
    print(row)

[-9000.0, 0.0, 0.0]
[1000.0, -9000.0, 0.0]
[-592.10526, -5328.947340000001, 562.499997]
[0.0, 9000.0, -562.5]
[0.0, 9000.0, 0.0]


In [51]:
# I think solving jointly for both row and column scale factors will yield a better ratio, but that requires
# some quadratic solving. I think the cvxpy library may do the trick, but I want to confirm that the 
# ratio improvement is actually good enough for adding the dependency (or switching to it from pulp)

import cvxpy as cp
import numpy as np

def qp_scale_matrix(matrix):
    num_rows = len(matrix)
    num_cols = len(matrix[0])
    
    # Variables: scaling factors for each row and column
    row_scale_factors = cp.Variable(num_rows, pos=True)
    col_scale_factors = cp.Variable(num_cols, pos=True)
    
    # Constraints and objective function
    constraints = []
    max_val = cp.Variable(pos=True)
    min_val = cp.Variable(pos=True)
    
    # Define constraints and objective for each element in the matrix
    for i in range(num_rows):
        for j in range(num_cols):
            if abs(matrix[i][j]) > 0:
                scaled_value = abs(matrix[i][j]) * row_scale_factors[i] * col_scale_factors[j]
                constraints.append(max_val >= scaled_value)
                constraints.append(min_val <= scaled_value)
    
    # Objective: min (max_val - min_val)
    objective = cp.Minimize(max_val / min_val)
    
    # Create the problem
    problem = cp.Problem(objective, constraints)
    
    # Solve the problem
    problem.solve(gp=True)
    print("Solve time", problem.solver_stats.solve_time)
    
    # Extract the scale factors
    row_scale_factors = row_scale_factors.value
    col_scale_factors = col_scale_factors.value
    
    # Apply the scaling factors to the normalized matrix
    scaled_matrix = np.multiply(matrix, np.outer(row_scale_factors, col_scale_factors))
    
    return row_scale_factors, col_scale_factors, scaled_matrix


def ratio_test(matrix):
    pulp_row_scales, pulp_matrix = pulp_scale_matrix(matrix)
    pulp_matrix = np.array(pulp_matrix)
    qp_row_scales, qp_col_scales, qp_matrix = qp_scale_matrix(matrix)
    og_nonzero_abs = np.abs(matrix[np.nonzero(matrix)])
    pulp_nonzero_abs = np.abs(pulp_matrix[np.nonzero(pulp_matrix)])
    qp_nonzero_abs = np.abs(qp_matrix[np.nonzero(np.array(qp_matrix))])
    print("Original Ratio", np.max(og_nonzero_abs) / np.min(og_nonzero_abs))
    print("LP Ratio", np.max(pulp_nonzero_abs) / np.min(pulp_nonzero_abs))
    print("QP Ratio", np.max(qp_nonzero_abs) / np.min(qp_nonzero_abs))
    return qp_matrix
display(ratio_test(np.array(pall_loop)))
display(np.array(pall_loop))
# That is dramatic... but I want to make sure I haven't optimized too close to the sun.
# After doing this, does the LP solution (after getting un-scaled) still work?

Solve time 8.18e-05
Original Ratio 9473.684210526317
LP Ratio 16.000000085333333
QP Ratio 1.2995725793517645


array([[-4.70937433,  0.        ,  0.        ],
       [ 4.91960199, -4.50813025,  0.        ],
       [-4.50813025, -4.1310737 ,  5.3686301 ],
       [ 0.        ,  5.3686301 , -4.1310737 ],
       [ 0.        ,  4.70937433,  0.        ]])

array([[-1.0e+03,  0.0e+00,  0.0e+00],
       [ 1.0e+03, -9.0e+03,  0.0e+00],
       [-1.0e+00, -9.0e+00,  9.5e-01],
       [ 0.0e+00,  1.6e+01, -1.0e+00],
       [ 0.0e+00,  2.0e+00,  0.0e+00]])