In [None]:
# hpo_rf_ga.ipynb
import numpy as np
import random
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score

random.seed(123); np.random.seed(123)
POP_SIZE = 24
GENERATIONS = 18
TOURNAMENT_K = 3
CROSSOVER_RATE = 0.9
MUTATION_RATE = 0.2
ELITISM = True

X, y = load_iris(return_X_y=True)

# Espacios de hiperparámetros
N_ESTIMATORS = (10, 300)     # int
MAX_DEPTH = (1, 30)          # int  (None lo omitimos para simplificar demo)
MIN_SAMPLES_SPLIT = (2, 20)  # int
MAX_FEATURES_CHOICES = ["sqrt", "log2", None]

def init_individual():
    return {
        "n_estimators": np.random.randint(*N_ESTIMATORS),
        "max_depth": np.random.randint(*MAX_DEPTH),
        "min_samples_split": np.random.randint(*MIN_SAMPLES_SPLIT),
        "max_features": random.choice(MAX_FEATURES_CHOICES),
    }

def fitness(ind):
    clf = RandomForestClassifier(
        n_estimators=ind["n_estimators"],
        max_depth=ind["max_depth"],
        min_samples_split=ind["min_samples_split"],
        max_features=ind["max_features"],
        random_state=123,
        n_jobs=-1
    )
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    return cross_val_score(clf, X, y, cv=cv, scoring="accuracy").mean()

def tournament_select(pop, fits):
    idxs = np.random.choice(len(pop), size=TOURNAMENT_K, replace=False)
    best = max(idxs, key=lambda i: fits[i])
    return pop[best].copy()

def uniform_crossover(a, b):
    if np.random.rand() >= CROSSOVER_RATE:
        return a.copy(), b.copy()
    child1, child2 = {}, {}
    for k in a.keys():
        if np.random.rand() < 0.5:
            child1[k] = a[k]; child2[k] = b[k]
        else:
            child1[k] = b[k]; child2[k] = a[k]
    return child1, child2

def mutate(ind):
    ind = ind.copy()
    if np.random.rand() < MUTATION_RATE:
        ind["n_estimators"] = int(np.clip(
            ind["n_estimators"] + np.random.randint(-20, 21),
            N_ESTIMATORS[0], N_ESTIMATORS[1]-1))
    if np.random.rand() < MUTATION_RATE:
        ind["max_depth"] = int(np.clip(
            ind["max_depth"] + np.random.randint(-3, 4),
            MAX_DEPTH[0], MAX_DEPTH[1]-1))
    if np.random.rand() < MUTATION_RATE:
        ind["min_samples_split"] = int(np.clip(
            ind["min_samples_split"] + np.random.randint(-2, 3),
            MIN_SAMPLES_SPLIT[0], MIN_SAMPLES_SPLIT[1]-1))
    if np.random.rand() < MUTATION_RATE:
        ind["max_features"] = random.choice(MAX_FEATURES_CHOICES)
    return ind

# GA
population = [init_individual() for _ in range(POP_SIZE)]
fitnesses = [fitness(ind) for ind in population]
for gen in range(GENERATIONS):
    new_pop = []
    if ELITISM:
        elite = population[int(np.argmax(fitnesses))].copy()
        new_pop.append(elite)
    while len(new_pop) < POP_SIZE:
        p1 = tournament_select(population, fitnesses)
        p2 = tournament_select(population, fitnesses)
        c1, c2 = uniform_crossover(p1, p2)
        c1, c2 = mutate(c1), mutate(c2)
        new_pop.extend([c1, c2])
    population = new_pop[:POP_SIZE]
    fitnesses = [fitness(ind) for ind in population]
    print(f"Gen {gen+1:02d} | best={np.max(fitnesses):.4f} | avg={np.mean(fitnesses):.4f}")

best = population[int(np.argmax(fitnesses))]
print("Best hyperparams:", best)
print("Best CV accuracy:", np.max(fitnesses))
