In [None]:
!pip install deap

In [None]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time
from copy import deepcopy
from functools import partial
import multiprocessing
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from deap import base, creator, tools, algorithms
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K

In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
POP_SIZE = 20
GENERATIONS = 12
CX_PB = 0.6
MUT_PB = 0.3
N_JOBS = max(1, multiprocessing.cpu_count() - 1)
EPOCHS = 25
BATCH_OPTIONS = [16, 32, 64]
LR_OPTIONS = [1e-3, 5e-4, 1e-4]
ACTIVATIONS = ['relu', 'tanh', 'elu']
NEURON_CHOICES = [8, 16, 32, 64, 128, 256]
MAX_HIDDEN_LAYERS = 3

In [None]:
def load_data_from_optional_csv(csv_path=None, target_col=None):
    if csv_path is None:
        d = load_breast_cancer()
        X = d.data
        y = d.target
        feature_names = d.feature_names
        return X, y, feature_names
    else:
        df = pd.read_csv(csv_path)
        if target_col is None:
            raise ValueError("Si subes CSV debes proveer target_col (nombre de columna objetivo).")
        y = df[target_col].values
        X = df.drop(columns=[target_col]).values
        feature_names = df.drop(columns=[target_col]).columns.values
        return X, y, feature_names

In [None]:
csv_path = None
target_col = None
X, y, feature_names = load_data_from_optional_csv(csv_path, target_col)

# Train/Validation/Test split
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.15, random_state=SEED, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1765, random_state=SEED, stratify=y_trainval)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

INPUT_DIM = X_train.shape[1]
N_CLASSES = 1 if len(np.unique(y)) == 2 else len(np.unique(y))

print("Dataset shape: X_train", X_train.shape, "X_val", X_val.shape, "X_test", X_test.shape)
print("Input dim:", INPUT_DIM)

In [None]:
DROPOUT_OPTIONS = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
EPOCHS_OPTIONS = [8, 12, 25, 40]

IND_SIZE = 1 + MAX_HIDDEN_LAYERS + 1 + 1 + 1 + 1 + 1


def decode_individual(ind):
    n_hidden = int(ind[0])
    neurons_idx = ind[1:1+MAX_HIDDEN_LAYERS]
    neurons = [NEURON_CHOICES[int(i)] for i in neurons_idx[:n_hidden]]
    act = ACTIVATIONS[int(ind[1+MAX_HIDDEN_LAYERS])]
    dropout = DROPOUT_OPTIONS[int(ind[2+MAX_HIDDEN_LAYERS])]
    lr = LR_OPTIONS[int(ind[3+MAX_HIDDEN_LAYERS])]
    batch = BATCH_OPTIONS[int(ind[4+MAX_HIDDEN_LAYERS])]
    epochs_eval = int(EPOCHS_OPTIONS[int(ind[5+MAX_HIDDEN_LAYERS])])
    return {
        'n_hidden': n_hidden,
        'neurons': neurons,
        'activation': act,
        'dropout': dropout,
        'lr': lr,
        'batch': batch,
        'epochs_eval': epochs_eval
    }

def build_model_from_genome(genome):
    K.clear_session()
    model = Sequential()
    if genome['n_hidden'] == 0:
        if N_CLASSES == 1:
            model.add(Dense(1, input_dim=INPUT_DIM, activation='sigmoid'))
        else:
            model.add(Dense(N_CLASSES, input_dim=INPUT_DIM, activation='softmax'))
        return model
    model.add(Dense(genome['neurons'][0], input_dim=INPUT_DIM, activation=genome['activation']))
    if genome['dropout'] > 0:
        model.add(Dropout(genome['dropout']))
    for n in genome['neurons'][1:]:
        model.add(Dense(n, activation=genome['activation']))
        if genome['dropout'] > 0:
            model.add(Dropout(genome['dropout']))
    if N_CLASSES == 1:
        model.add(Dense(1, activation='sigmoid'))
    else:
        model.add(Dense(N_CLASSES, activation='softmax'))
    return model

def evaluate_individual(individual, X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val):
    genome = decode_individual(individual)
    model = build_model_from_genome(genome)
    optimizer = Adam(learning_rate=genome['lr'])
    if N_CLASSES == 1:
        loss = 'binary_crossentropy'
        metrics = ['accuracy']
    else:
        loss = 'sparse_categorical_crossentropy'
        metrics = ['accuracy']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    es = EarlyStopping(monitor='val_accuracy', patience=4, restore_best_weights=True, verbose=0)
    try:
        history = model.fit(X_train, y_train,
                            validation_data=(X_val, y_val),
                            epochs=genome['epochs_eval'],
                            batch_size=genome['batch'],
                            callbacks=[es],
                            verbose=0)
        val_metrics = model.evaluate(X_val, y_val, verbose=0)
        val_acc = val_metrics[1] if len(val_metrics) > 1 else val_metrics[0]
    except Exception as e:
        print("Error entrenando individuo:", e)
        val_acc = 0.0
    K.clear_session()
    return (float(val_acc),)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("n_hidden_attr", random.randint, 1, MAX_HIDDEN_LAYERS)
toolbox.register("neuron_attr", random.randrange, 0, len(NEURON_CHOICES))
toolbox.register("act_attr", random.randrange, 0, len(ACTIVATIONS))
toolbox.register("dropout_attr", random.randrange, 0, len(DROPOUT_OPTIONS))
toolbox.register("lr_attr", random.randrange, 0, len(LR_OPTIONS))
toolbox.register("batch_attr", random.randrange, 0, len(BATCH_OPTIONS))
toolbox.register("epochattr", random.randrange, 0, len(EPOCHS_OPTIONS))

def create_individual():
    ind = []
    ind.append(toolbox.n_hidden_attr())
    for _ in range(MAX_HIDDEN_LAYERS):
        ind.append(toolbox.neuron_attr())
    ind.append(toolbox.act_attr())
    ind.append(toolbox.dropout_attr())
    ind.append(toolbox.lr_attr())
    ind.append(toolbox.batch_attr())
    ind.append(toolbox.epochattr())
    return creator.Individual(ind)
toolbox.register("individual", create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)

In [None]:
def mutate_architecture(individual, indpb=0.2):
    if random.random() < indpb:
        individual[0] = random.randint(1, MAX_HIDDEN_LAYERS)
    for i in range(1, 1 + MAX_HIDDEN_LAYERS):
        if random.random() < indpb:
            individual[i] = random.randrange(0, len(NEURON_CHOICES))
    if random.random() < indpb:
        individual[1 + MAX_HIDDEN_LAYERS] = random.randrange(0, len(ACTIVATIONS))
    if random.random() < indpb:
        individual[2 + MAX_HIDDEN_LAYERS] = random.randrange(0, len(DROPOUT_OPTIONS))
    if random.random() < indpb:
        individual[3 + MAX_HIDDEN_LAYERS] = random.randrange(0, len(LR_OPTIONS))
    if random.random() < indpb:
        individual[4 + MAX_HIDDEN_LAYERS] = random.randrange(0, len(BATCH_OPTIONS))
    if random.random() < indpb:
        individual[5 + MAX_HIDDEN_LAYERS] = random.randrange(0, len(EPOCHS_OPTIONS))
    return (individual,)
toolbox.register("mutate", mutate_architecture, indpb=0.25)
toolbox.register("select", tools.selTournament, tournsize=3)

def evaluate_pool_wrapper(individual):
    return evaluate_individual(individual)

def run_ga(pop_size=POP_SIZE, gens=GENERATIONS, cxpb=CX_PB, mutpb=MUT_PB, n_jobs=N_JOBS):
    pop = toolbox.population(n=pop_size)
    if n_jobs and n_jobs > 1:
        pool = multiprocessing.Pool(processes=n_jobs)
        toolbox.register("map", pool.map)
    else:
        pool = None
    toolbox.register("evaluate", evaluate_pool_wrapper)
    hall_of_fame = tools.HallOfFame(3)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)
    logbook = tools.Logbook()
    logbook.header = ["gen", "nevals"] + stats.fields
    start_time = time()
    pop, log = algorithms.eaSimple(pop, toolbox, cxpb=cxpb, mutpb=mutpb,
                                   ngen=gens, stats=stats, halloffame=hall_of_fame, verbose=True)
    elapsed = time() - start_time
    if pool is not None:
        pool.close()
        pool.join()
    return pop, log, hall_of_fame, elapsed

In [None]:
print("Iniciando GA con pop_size", POP_SIZE, "generations", GENERATIONS, "n_jobs", N_JOBS)
pop, log, hof, elapsed = run_ga()
print("GA terminado en {:.1f}s".format(elapsed))

best = hof[0]
best_genome = decode_individual(best)
print("Mejor genoma encontrado:\n", best_genome)

# Reentrenar mejor modelo en TRAIN+VAL combinado con más epochs
X_full_train = np.vstack([X_train, X_val])
y_full_train = np.hstack([y_train, y_val])

final_model = build_model_from_genome(best_genome)
optimizer = Adam(learning_rate=best_genome['lr'])
if N_CLASSES == 1:
    final_model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
else:
    final_model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

es_final = EarlyStopping(monitor='val_accuracy', patience=6, restore_best_weights=True, verbose=1)
history_final = final_model.fit(X_full_train, y_full_train,
                                validation_split=0.1,
                                epochs=80,
                                batch_size=best_genome['batch'],
                                callbacks=[es_final],
                                verbose=1)

# Evaluación final en test set
y_pred_prob = final_model.predict(X_test)
if N_CLASSES == 1:
    y_pred = (y_pred_prob > 0.5).astype(int).flatten()
else:
    y_pred = np.argmax(y_pred_prob, axis=1)

print("\nAccuracy final en test:", accuracy_score(y_test, y_pred))
print("\nClassification Report (test):\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix (test):\n", confusion_matrix(y_test, y_pred))
s
os.makedirs("neuroevo_results", exist_ok=True)
final_model.save("neuroevo_results/best_model.h5")
pd.DataFrame(log).to_csv("neuroevo_results/logbook.csv", index=False)
with open("neuroevo_results/best_genome.txt", "w") as f:
    f.write(str(best_genome))

# Plot training history final
plt.figure(figsize=(8,4))
plt.plot(history_final.history['accuracy'], label='train_acc')
plt.plot(history_final.history['val_accuracy'], label='val_acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training history - best model (final retrain)')
plt.show()

# Plot GA log (max fitness per generation)
gens = [entry['gen'] for entry in log]
maxs = [entry['max'] for entry in log]
plt.figure(figsize=(8,4))
plt.plot(gens, maxs, marker='o')
plt.xlabel('Generation')
plt.ylabel('Max fitness (val acc)')
plt.title('GA progress')
plt.show()

print("Resultados guardados en ./neuroevo_results/")