In [3]:
import random
import math
import operator            # <--- CORRECCIÓN: importar operator para attrgetter
from functools import partial

from deap import base, creator, tools, gp

# ------------------ Utilities (operadores protegidos) ------------------
def protectedDiv(a, b):
    try:
        return a / b
    except Exception:
        return 1.0

def if_gt(a, b, c):
    # if a > 0 then b else c
    return b if a > 0 else c

def protectedSqrt(a):
    return math.sqrt(abs(a))

# ------------------ Entorno / simulador ------------------
class GridEnv:
    def __init__(self, size, engineers, max_steps=50, allow_multiple_deliveries=True):
        """
        size: lado del cuadrado (int)
        engineers: list of (x,y) positions
        max_steps: horizonte de simulación
        allow_multiple_deliveries: si True, se puede entregar múltiples galletas al mismo ingeniero
        """
        self.size = size
        self.engineers = list(engineers)
        self.max_steps = max_steps
        self.allow_multiple = allow_multiple_deliveries

    def reset(self):
        # Robot empieza en el centro (floor)
        self.robot = (self.size // 2, self.size // 2)
        # state tracking: deliveries count per engineer
        self.deliveries = [0 for _ in self.engineers]
        self.t = 0

    def nearest_engineer_info(self):
        rx, ry = self.robot
        best_d = None
        best_dx = 0
        best_dy = 0
        best_idx = None
        for idx, (ex, ey) in enumerate(self.engineers):
            dx = ex - rx
            dy = ey - ry
            d = math.hypot(dx, dy)
            if (best_d is None) or (d < best_d):
                best_d = d
                best_dx = dx
                best_dy = dy
                best_idx = idx
        return best_idx, best_dx, best_dy, best_d

    def step(self, action):
        """
        action: int 0..4 (0:N,1:S,2:W,3:E,4:DELIVER)
        Returns: reward (0 or 1), done (bool)
        """
        rx, ry = self.robot
        reward = 0

        if action == 0:  # Norte
            ny = max(0, ry - 1)
            self.robot = (rx, ny)
        elif action == 1:  # Sur
            ny = min(self.size - 1, ry + 1)
            self.robot = (rx, ny)
        elif action == 2:  # Oeste
            nx = max(0, rx - 1)
            self.robot = (nx, ry)
        elif action == 3:  # Este
            nx = min(self.size - 1, rx + 1)
            self.robot = (nx, ry)
        elif action == 4:  # Deliver
            # si hay ingeniero en la celda -> entregar
            for i, (ex, ey) in enumerate(self.engineers):
                if (ex, ey) == self.robot:
                    # permitir multiples o solo una
                    if self.allow_multiple or (self.deliveries[i] == 0):
                        self.deliveries[i] += 1
                        reward = 1
                    break
        else:
            # acción inválida: no-op
            pass

        self.t += 1
        done = (self.t >= self.max_steps)
        return reward, done

    def get_sensors(self):
        """
        Devuelve (dx, dy, dist, remaining, tleft) normalizados:
        - dx, dy normalizados en [-1,1] (dividiendo por size)
        - dist normalizado en [0,1]
        - remaining normalizado en [0,1]
        - tleft normalizado en [0,1]
        """
        idx, dx, dy, d = self.nearest_engineer_info()
        # Normalizaciones simples
        maxd = math.hypot(self.size, self.size)
        dxn = dx / max(1.0, self.size)
        dyn = dy / max(1.0, self.size)
        distn = d / maxd
        remaining = sum(1 for val in self.deliveries if val == 0) / max(1, len(self.engineers))
        tleft = max(0, self.max_steps - self.t) / self.max_steps
        return (dxn, dyn, distn, remaining, tleft)

# ------------------ GP setup ------------------
def make_pset():
    pset = gp.PrimitiveSet("MAIN", 5)  # 5 entradas: dx, dy, dist, remaining, tleft
    pset.renameArguments(ARG0='dx')
    pset.renameArguments(ARG1='dy')
    pset.renameArguments(ARG2='dist')
    pset.renameArguments(ARG3='remaining')
    pset.renameArguments(ARG4='tleft')

    # funciones
    pset.addPrimitive(lambda a, b: a + b, 2, name="add")
    pset.addPrimitive(lambda a, b: a - b, 2, name="sub")
    pset.addPrimitive(lambda a, b: a * b, 2, name="mul")
    pset.addPrimitive(protectedDiv, 2, name="pdiv")
    pset.addPrimitive(lambda a: -a, 1, name="neg")
    pset.addPrimitive(abs, 1, name="abs")
    pset.addPrimitive(protectedSqrt, 1, name="psqrt")
    pset.addPrimitive(math.sin, 1, name="sin")
    pset.addPrimitive(math.cos, 1, name="cos")
    # if_gt(cond, then_val, else_val) => if cond > 0 return then_val else else_val
    pset.addPrimitive(if_gt, 3, name="if_gt")

    # constantes
    pset.addTerminal(0.0)
    pset.addTerminal(1.0)
    pset.addTerminal(-1.0)
    pset.addTerminal(0.5)

    return pset

# ------------------ fitness: simular episodio ------------------
def evaluate_individual(individual, toolbox, env: GridEnv,
                        alpha_var=0.5, beta_idle=0.01):
    """
    Simula y devuelve fitness. fitness = total_deliveries - alpha*varianza - beta*idle_steps
    """
    func = toolbox.compile(expr=individual)
    env.reset()
    total_reward = 0
    idle_steps = 0

    for step in range(env.max_steps):
        dx, dy, dist, remaining, tleft = env.get_sensors()
        try:
            out = func(dx, dy, dist, remaining, tleft)
        except Exception:
            out = 0.0
        # map output to action 0..4
        action = int(abs(out)) % 5
        reward, done = env.step(action)
        if reward == 0:
            idle_steps += 1
        else:
            total_reward += reward
        if done:
            break

    # fairness: varianza de entregas por ingeniero
    vals = env.deliveries
    mean = sum(vals) / len(vals)
    var = sum((v - mean) ** 2 for v in vals) / len(vals)

    fitness = total_reward - alpha_var * var - beta_idle * idle_steps
    # Queremos maximizar fitness; devolver como tupla
    return (fitness,)

# ------------------ main GP experiment ------------------
def run_experiment(seed=42,
                   pop_size=200,
                   ngen=30,
                   cxpb=0.6,
                   mutpb=0.3,
                   grid_size=7,
                   max_steps=50):
    random.seed(seed)
    # ejemplo: ubicaciones de ingenieros (fijas)
    engineers = [(1,1),(1,5),(5,1),(5,5),(3,1),(1,3)]
    env = GridEnv(size=grid_size, engineers=engineers, max_steps=max_steps)

    pset = make_pset()

    # proteger creator.create para re-ejecuciones en notebook
    try:
        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    except Exception:
        pass
    try:
        creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)
    except Exception:
        pass

    toolbox = base.Toolbox()
    toolbox.register("expr_init", gp.genHalfAndHalf, pset=pset, min_=1, max_=3)
    toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr_init)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("compile", gp.compile, pset=pset)

    # genetic ops
    toolbox.register("evaluate", evaluate_individual, toolbox=toolbox, env=env)
    toolbox.register("select", tools.selTournament, tournsize=3)
    toolbox.register("mate", gp.cxOnePoint)
    toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
    toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

    # limitar altura de árboles (usar operator.attrgetter('height'))
    toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
    toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

    pop = toolbox.population(n=pop_size)
    hof = tools.HallOfFame(1)

    # evaluate initial
    invalid = [ind for ind in pop if not ind.fitness.valid]
    for ind in invalid:
        ind.fitness.values = toolbox.evaluate(ind)

    for gen in range(1, ngen + 1):
        offspring = tools.selTournament(pop, len(pop), tournsize=3)
        offspring = list(map(toolbox.clone, offspring))
        # crossover
        for c1, c2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < cxpb:
                toolbox.mate(c1, c2)
                del c1.fitness.values
                del c2.fitness.values
        # mutation
        for m in offspring:
            if random.random() < mutpb:
                toolbox.mutate(m)
                del m.fitness.values
        # evaluate
        invalid = [ind for ind in offspring if not ind.fitness.valid]
        for ind in invalid:
            ind.fitness.values = toolbox.evaluate(ind)

        pop[:] = offspring
        hof.update(pop)

        best = hof[0]
        print(f"Gen {gen} best fitness {best.fitness.values[0]:.4f} expr: {best}")

    # final
    best = hof[0]
    print("\nMejor individuo (árbol):\n", best)
    # ejecutar y mostrar métricas
    env.reset()
    func = toolbox.compile(expr=best)
    for step in range(env.max_steps):
        sensors = env.get_sensors()
        out = func(*sensors)
        action = int(abs(out)) % 5
        r, done = env.step(action)
        if done:
            break
    print("Entregas por ingeniero:", env.deliveries)
    print("Total entregas:", sum(env.deliveries))
    return best, env

if __name__ == "__main__":
    # Parámetros rápidos para pruebas en Colab
    best, env = run_experiment(seed=1234, pop_size=150, ngen=25, grid_size=7, max_steps=50)


Gen 1 best fitness -0.5000 expr: pdiv(abs(pdiv(0.5, 1.0)), mul(if_gt(tleft, tleft, dx), abs(0.5)))
Gen 2 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 3 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 4 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 5 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 6 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 7 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 8 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 9 best fitness 0.4406 expr: add(pdiv(sub(0.5, remaining), sub(tleft, abs(if_gt(0.5, 0.5, remaining)))), dx)
Gen 1