In [1]:
import json, math, random, itertools, copy
from collections import defaultdict, Counter

In [2]:
import re

FRACTION_CHARS = {
    "¼": "1/4", "½": "1/2", "¾": "3/4",
    "⅐": "1/7", "⅑": "1/9", "⅒": "1/10",
    "⅓": "1/3", "⅔": "2/3",
    "⅕": "1/5", "⅖": "2/5", "⅗": "3/5", "⅘": "4/5",
    "⅙": "1/6", "⅚": "5/6",
    "⅛": "1/8", "⅜": "3/8", "⅝": "5/8", "⅞": "7/8",
}

def parse_amount(x) -> float:
    """Parse numbers like 100, 1/2, 2 3/4, ¾, 2-3 into a float. Fallback 0.0."""
    if isinstance(x, (int, float)):
        return float(x)
    if x is None:
        return 0.0
    s = str(x).strip().lower()
    if not s:
        return 0.0

    # Replace unicode fraction chars with ascii "a/b"
    for ch, rep in FRACTION_CHARS.items():
        s = s.replace(ch, f" {rep} ")

    s = s.replace(",", " ").replace("–", "-").replace("—", "-")

    # Mixed number: "2 3/4"
    m = re.match(r"^\s*(\d+(?:\.\d+)?)\s+(\d+)\s*/\s*(\d+)\s*$", s)
    if m:
        whole = float(m.group(1))
        num, den = int(m.group(2)), int(m.group(3)) or 1
        return whole + (num / den)

    # Simple fraction: "3/4"
    m = re.match(r"^\s*(\d+)\s*/\s*(\d+)\s*$", s)
    if m:
        num, den = int(m.group(1)), int(m.group(2)) or 1
        return num / den

    # Range: "2-3" or "2 to 3" → take the mean
    m = re.match(r"^\s*(\d+(?:\.\d+)?)\s*(?:-|to)\s*(\d+(?:\.\d+)?)\s*$", s)
    if m:
        a, b = float(m.group(1)), float(m.group(2))
        return (a + b) / 2.0

    # Plain number anywhere in the string
    m = re.search(r"(\d+(?:\.\d+)?)", s)
    if m:
        return float(m.group(1))

    # Could be "pinch", "to taste", etc.
    return 0.0


In [2]:
with open("grp11_combined_cookie_knowledgebase.json", "r", encoding="utf-8") as f:
    KB = json.load(f)["recipes"]  # expects {"recipes":[{name, ingredients:[{amount,unit,ingredient},...]}, ...]}

ALL_INGS = sorted({i["ingredient"].strip().lower()
                   for r in KB for i in r["ingredients"]})

In [3]:
# build a tiny co-occurrence table from inspiring set
pair_counts = Counter()
for r in KB:
    ing_list = sorted({i["ingredient"].strip().lower() for i in r["ingredients"]})
    for a, b in itertools.combinations(ing_list, 2):
        pair_counts[(a, b)] += 1

In [4]:
def flavor_compatibility(ings):
    """average co-occurrence score in [0,1]"""
    ings = sorted({x[0] for x in ings})
    if len(ings) < 2: return 0.0
    pairs = list(itertools.combinations(ings, 2))
    raw = 0
    for a, b in pairs:
        k = (a, b) if (a, b) in pair_counts else (b, a)
        raw += pair_counts.get(k, 0)
    # normalize by a small constant to keep within [0,1]
    return min(1.0, raw / (len(pairs) * 3.0 + 1e-9))

# simple role dictionaries (keyword-based & super rough)
LIQUIDS = {"water", "stock", "vegetable stock", "chicken stock", "broth",
           "milk", "coconut milk", "cream", "tomato puree", "tomato passata"}
FLOURS = {"flour", "all-purpose flour", "plain flour", "self-raising flour", "bread flour"}
FATS   = {"butter", "salted butter", "unsalted butter", "oil", "olive oil", "coconut oil"}
SUGARS = {"sugar", "caster sugar", "granulated sugar", "soft light brown sugar", "brown sugar", "powdered sugar"}


In [5]:
def guess_role(name: str):
    n = name.lower()
    if n in LIQUIDS: return "liquid"
    if n in FLOURS:  return "flour"
    if n in FATS:    return "fat"
    if n in SUGARS:  return "sugar"
    return "other"

In [6]:
# ------------------------
# recipe representation
# ------------------------
def random_amount(unit="g"):
    if unit == "g": return random.randint(10, 300)
    if unit == "ml": return random.randint(30, 500)
    if unit in ("tbsp", "tsp"): return random.randint(1, 8)
    return random.randint(1, 200)

def random_unit_for(ingredient):
    name = ingredient.lower()
    if name in LIQUIDS: return "ml"
    if name in FLOURS or name in SUGARS: return "g"
    if name in FATS: return "g"
    return random.choice(["g","ml","tbsp","tsp"])

def make_random_recipe(pool, min_items=4, max_items=9):
    chosen = random.sample(pool, k=random.randint(min_items, max_items))
    items = []
    for ing in chosen:
        unit = random_unit_for(ing)
        items.append( (ing, random_amount(unit), unit) )
    return canonicalize(items)

def canonicalize(items):
    # merge duplicate ingredients by unit (very simple)
    d = {}
    for ing, amt, unit in items:
        key = (ing, unit)
        d[key] = d.get(key, 0) + float(amt)
    return [(ing, round(amt, 1), unit) for (ing, unit), amt in d.items()]

In [7]:
# ------------------------
# validity & fitness
# ------------------------
def has_role(items, role):
    return any(guess_role(ing) == role for ing, _, _ in items)

def soup_valid(items):
    return has_role(items, "liquid")

def cookie_valid(items):
    return has_role(items, "flour") and has_role(items, "fat") and has_role(items, "sugar")

def soup_ratio_score(items):
    # target liquid:solid ~ 2:1
    liquid_ml = sum(amt for ing, amt, unit in items if guess_role(ing)=="liquid" and unit=="ml")
    solids_g  = sum(amt for ing, amt, unit in items if unit=="g" and guess_role(ing)!="liquid")
    if solids_g <= 0: return 0.0
    ratio = liquid_ml / solids_g
    # gaussian around mu=2.0
    mu, sigma = 2.0, 0.8
    return math.exp(-0.5*((ratio-mu)/sigma)**2)

def cookie_balance_score(items):
    # rough: prefer flour 200–400 g, sugar 60–160 g, fat 80–200 g
    def range_score(total, lo, hi):
        if total < lo: return total/lo
        if total > hi: return hi/total
        return 1.0
    flour = sum(amt for ing, amt, unit in items if guess_role(ing)=="flour" and unit=="g")
    sugar = sum(amt for ing, amt, unit in items if guess_role(ing)=="sugar" and unit=="g")
    fat   = sum(amt for ing, amt, unit in items if guess_role(ing)=="fat"   and unit=="g")
    return 0.4*range_score(flour,200,400) + 0.3*range_score(sugar,60,160) + 0.3*range_score(fat,80,200)

def fitness(items, target="cookie"):
    valid = soup_valid(items) if target=="soup" else cookie_valid(items)
    if not valid:
        return -100.0  # hard penalty
    # base scores
    flavor = flavor_compatibility(items)
    if target=="soup":
        balance = soup_ratio_score(items)
    else:
        balance = cookie_balance_score(items)
    # keep number of ingredients reasonable (mild penalty for > 12)
    n = len(items)
    size_pen = 0.0 if n <= 12 else min(0.5, (n-12)*0.05)
    return 0.6*flavor + 0.35*balance - size_pen

In [8]:
# ------------------------
# GA operators
# ------------------------
def tournament_select(pop, k=3):
    best = None
    for _ in range(k):
        cand = random.choice(pop)
        if (best is None) or (cand["fitness"] > best["fitness"]):
            best = cand
    return copy.deepcopy(best)

def crossover(a_items, b_items):
    # role-aware: keep liquids together, keep flour/fat/sugar blocks together
    def split_blocks(items):
        blocks = {
            "liquid":[x for x in items if guess_role(x[0])=="liquid"],
            "flour":[x for x in items if guess_role(x[0])=="flour"],
            "fat":[x for x in items if guess_role(x[0])=="fat"],
            "sugar":[x for x in items if guess_role(x[0])=="sugar"],
            "other":[x for x in items if guess_role(x[0])=="other"]
        }
        return blocks
    A, B = split_blocks(a_items), split_blocks(b_items)
    child = []
    for role in ["liquid","flour","fat","sugar","other"]:
        child.extend(random.choice([A[role], B[role]]))
    return canonicalize(child)

def mutate(items, p_add=0.3, p_drop=0.2, p_tweak=0.6, pool=ALL_INGS):
    items = items[:]
    # add
    if random.random() < p_add and len(items) < 16:
        new_ing = random.choice(pool)
        unit = random_unit_for(new_ing)
        items.append((new_ing, random_amount(unit), unit))
    # drop
    if random.random() < p_drop and len(items) > 3:
        items.pop(random.randrange(len(items)))
    # tweak amounts
    if random.random() < p_tweak and items:
        i = random.randrange(len(items))
        ing, amt, unit = items[i]
        factor = random.choice([0.8, 0.9, 1.1, 1.2])
        items[i] = (ing, round(max(1, amt*factor), 1), unit)
    return canonicalize(items)

In [None]:
def run_ga(target="cookie", pop_size=40, gens=30, elitism=2, seed=None):
    if seed is not None:
        random.seed(seed)

    # initial population: recombine real recipes to get sensible starting points
    initial_pool = []
    for _ in range(pop_size):
        src = random.choice(KB)
        # sample some ingredients from 1–2 real recipes
        items = []
        for r in random.sample(KB, k=random.choice([1,2])):
            # items.extend([(i["ingredient"].strip().lower(),
            #                float(i.get("amount", 0) or 0),
            #                (i.get("unit") or "").lower() or random_unit_for(i["ingredient"])) 
            #               for i in r["ingredients"]])
            items.extend([
                (
                    i["ingredient"].strip().lower(),
                    parse_amount(i.get("amount", 0)),
                    (i.get("unit") or "").lower() or random_unit_for(i["ingredient"])
                )
                for i in r["ingredients"]
                ])

        initial_pool.append(canonicalize(items))

    population = [{"items": it, "fitness": fitness(it, target)} for it in initial_pool]

    for g in range(gens):
        population.sort(key=lambda x: x["fitness"], reverse=True)
        next_pop = population[:elitism]  # carry over the best

        while len(next_pop) < pop_size:
            p1 = tournament_select(population, k=3)
            p2 = tournament_select(population, k=3)
            child_items = crossover(p1["items"], p2["items"])
            child_items = mutate(child_items)
            next_pop.append({"items": child_items, "fitness": fitness(child_items, target)})
        population = next_pop
        # simple adaptive mutation: if top doesn't improve for long, you could increase mutation here

    population.sort(key=lambda x: x["fitness"], reverse=True)
    return population[0], population

In [11]:
best, pop = run_ga(target="cookie", pop_size=40, gens=30, elitism=2, seed=42)
print("BEST FITNESS:", round(best["fitness"], 3))
print("BEST RECIPE:")
for ing, amt, unit in best["items"]:
    print(f" - {amt} {unit} {ing}")

ValueError: could not convert string to float: '2 3/4'