### Requerimientos

In [1]:
import numpy as np
import heapq 
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import arff
from sklearn.model_selection import KFold, StratifiedKFold # Stratified es mejor para clasificaci√≥n
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from lightgbm import LGBMClassifier
from sklearn.impute import SimpleImputer
from sklearn.base import clone
from sklearn.metrics import accuracy_score, f1_score # M√©tricas de clasificaci√≥n
from sklearn.preprocessing import StandardScaler
import gc

import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message="X does not have valid feature names")

### Lectura de los datos
#### Input:
  - $file\_path$: Nombre completo con path de la base de datos .arff a cargar
  
#### Output:
  - $X$: Atributos de entrada num√©ricos
  - $y$: Salida num√©rica

In [2]:
from sklearn.preprocessing import LabelEncoder


# --- CELDA LOAD DATA (ADAPTADA A CLASIFICACI√ìN) ---
def LoadData(file_path):
    """
    Carga datasets .arff para Clasificaci√≥n.
    Procesa variables categ√≥ricas y codifica el target a enteros (0, 1, 2...).
    """
    try:
        # 1. Cargar con liac-arff
        with open(file_path, 'r') as f:
            dataset = arff.load(f)

        col_names = [attr[0] for attr in dataset['attributes']]
        df = pd.DataFrame(dataset['data'], columns=col_names)

        # Convertir 'None' a NaN
        df.replace([None], np.nan, inplace=True)

        filename = file_path.lower()

        # --- CORRECCIONES ESPEC√çFICAS (Datasets de Clasificaci√≥n) ---

        # CASO WINE: A veces 'class' es la primera columna. La movemos al final.
        if 'wine' in filename and 'class' in df.columns:
            if df.columns[-1] != 'class':
                cols = [c for c in df.columns if c != 'class'] + ['class']
                df = df[cols]

        # 2. Separar Features (X) y Target (y)
        # Asumimos que la clase es la √öLTIMA columna (est√°ndar OpenML)
        X = df.iloc[:, :-1]
        y = df.iloc[:, -1]

        # 3. Preprocesado de Features (X)
        # Si hay columnas de texto (categ√≥ricas), las pasamos a n√∫meros
        cat_cols = X.select_dtypes(include=['object', 'category']).columns
        if len(cat_cols) > 0:
            le = LabelEncoder()
            for col in cat_cols:
                # Convertimos a string por seguridad
                X[col] = le.fit_transform(X[col].astype(str))

        # 4. Imputaci√≥n de Nulos en X (Media)
        if X.isnull().sum().sum() > 0:
            imputer = SimpleImputer(strategy='mean')
            X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

        # 5. Preprocesado del Target (y) -> CLASIFICACI√ìN
        # Convertimos etiquetas de texto a n√∫meros enteros (0, 1, 2...)
        # Borramos filas sin etiqueta
        mask_not_null = y.notnull()
        X = X[mask_not_null]
        y = y[mask_not_null]

        le_target = LabelEncoder()
        y = le_target.fit_transform(y.astype(str))

        return np.array(X), np.array(y)

    except Exception as e:
        print(f"‚ùå Error cargando {file_path}: {e}")
        return None, None

### Par√°metros del algoritmo evolutivo
- $G$: N√∫mero de generaciones
- $N$: Tama√±o de la poblaci√≥n
- $p\_c$: Probabilidad de cruce
- $p\_m$: Probabilidad de mutaci√≥n
- $random\_state$: Semilla para reproducibilidad
- $X,y$: Datos
- $Phi$: Algoritmos de aprendizaje

In [3]:
import os
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
# Nuevos imports para clasificaci√≥n
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

# --- 1. PAR√ÅMETROS GEN√âTICOS ---
G = 15          # Reducimos un poco (de 50 a 40) para agilizar
N = 20          # Reducimos poblaci√≥n (de 50 a 30) para agilizar
p_c = 0.7
p_m = 0.1

# --- 2. RUTAS ---
DATA_DIR = "../data/classification"
# Carpeta de salida espec√≠fica
RESULTS_DIR = "../results/classification"
os.makedirs(RESULTS_DIR, exist_ok=True)
# Subcarpetas por modo
RESULTS_SOFT_DIR = os.path.join(RESULTS_DIR, 'soft')
RESULTS_HARD_DIR = os.path.join(RESULTS_DIR, 'hard')
os.makedirs(RESULTS_SOFT_DIR, exist_ok=True)
os.makedirs(RESULTS_HARD_DIR, exist_ok=True)

files = {
    'BreastW':          os.path.join(DATA_DIR, 'breastw.arff'),
    'Wine':          os.path.join(DATA_DIR, 'wine.arff'),
    'Glass': os.path.join(DATA_DIR, 'glass.arff'),
    'Sonar':         os.path.join(DATA_DIR, 'sonar.arff'),
    'Ionosphere':    os.path.join(DATA_DIR, 'ionosphere.arff')
}
'''

# --- 3. MODELOS BASE OPTIMIZADOS (Velocidad) ---
def get_base_models(random_state=1):
    return [
        # 1. Random Forest Classifier "Frenado":
        # - max_depth=10: Evita que el √°rbol crezca infinito y memorice datos.
        # - max_features='sqrt': Ayuda a generalizar mejor.
        RandomForestClassifier(n_estimators=30, max_depth=10, max_features='sqrt', n_jobs=-1, random_state=random_state),

        # 2. SVC Suave (Equivalente a SVR):
        # - probability=True: IMPRESCINDIBLE para Soft Voting.
        # - C=10.0: Valor intermedio.
        make_pipeline(StandardScaler(), SVC(probability=True, C=10.0, kernel='rbf', random_state=random_state)),

        # 3. Logistic Regression (Equivalente a Ridge):
        # - Modelo lineal robusto para clasificaci√≥n.
        make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, random_state=random_state))
    ]
'''
def get_base_models(random_state=42, pool_size=30):
    """
    Genera un pool diverso de clasificadores d√©biles y fuertes.
    Objetivo: Darle al EA "piezas de lego" variadas para construir un buen ensemble.
    """
    models = []
    np.random.seed(random_state)

    # 1. k-NN (Vecinos m√°s cercanos) - Capturan estructura local
    # Variamos k para tener diferentes sensibilidades al ruido
    for k in [1, 3, 5, 7, 9]:
        models.append(make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=k)))

    # 2. Support Vector Machines (SVM) - Diferentes Kernels
    # Probability=True es necesario para 'soft' voting
    for C in [1, 10, 50]:
        # RBF Kernel (Radial)
        models.append(make_pipeline(StandardScaler(), SVC(C=C, kernel='rbf', probability=True, random_state=random_state)))
        # Linear Kernel (bueno para alta dimensionalidad como Sonar)
        models.append(make_pipeline(StandardScaler(), SVC(C=C, kernel='linear', probability=True, random_state=random_state)))

    # 3. Random Forests (√Årboles diversos)
    # Profundidad limitada para evitar overfitting individual
    for n_est in [10, 50]:
        models.append(RandomForestClassifier(n_estimators=n_est, max_depth=5, max_features='sqrt', random_state=random_state))
        models.append(RandomForestClassifier(n_estimators=n_est, max_depth=10, max_features='log2', random_state=random_state+1))

    # 4. Extra Trees (M√°s aleatorios que RF -> M√°s diversidad)
    models.append(ExtraTreesClassifier(n_estimators=50, max_depth=8, min_samples_split=5, random_state=random_state))

    # 5. Modelos Lineales Simples (Logistic Regression)
    models.append(make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=0.5, random_state=random_state)))
    models.append(make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=1.0, random_state=random_state)))

    # 6. Naive Bayes (Muy diferente a los anteriores, aporta independencia)
    models.append(make_pipeline(StandardScaler(), GaussianNB()))

    # Selecci√≥n aleatoria si generamos m√°s de los pedidos
    if len(models) > pool_size:
        indices = np.random.choice(len(models), pool_size, replace=False)
        return [models[i] for i in indices]

    return models

pool = get_base_models()
print(f"‚úÖ Pool generado con {len(pool)} modelos diversos.")


print("‚úÖ Configuraci√≥n cargada.")
print(f"   Modelos optimizados para velocidad.")
print(f"   Resultados ir√°n a: {os.path.abspath(RESULTS_DIR)}")

‚úÖ Pool generado con 19 modelos diversos.
‚úÖ Configuraci√≥n cargada.
   Modelos optimizados para velocidad.
   Resultados ir√°n a: C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification


### Definici√≥n del problema de optimizaci√≥n

$\textit{Maximizar} \ f(\textbf{x}) = Accuracy(\textbf{x},D_{train},D_{val},\Phi)$
- $\textbf{x}=(\textbf{matriz})$ es el vector de variables de decisi√≥n, donde:
  - $matriz_{i,j}\in\{0,1\}$ indica si para el algoritmos de aprendizaje $i$, el atributos $j$ se ha seleccionado.
  - $vector_i\in\{0,1\}$ indica si el algoritmo de aprendizaje $i$ se ha seleccionado
- $D_{train}=(X_{train},y_{train})$: Conjunto de datos de entrenamiento
- $D_{val}=(X_{val},y_{val})$: Conjunto de datos de validaci√≥n
- $\Phi$: Algoritmos de aprendizaje

In [4]:
from sklearn.metrics import accuracy_score
from sklearn.base import clone
from scipy.stats import mode as stats_mode
from sklearn.model_selection import StratifiedKFold

class Problem:
    def __init__(self, X_train, y_train, Phi, voting='soft', n_splits=3):
        self.X_train = np.asarray(X_train)
        self.y_train = np.asarray(y_train)
        self.Phi = Phi
        self.voting = voting
        self.n_splits = n_splits
        self.n = self.X_train.shape[1]
        self.m = len(self.Phi)
        self.classes = np.unique(self.y_train)
        self.n_classes = len(self.classes)
        self.skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        self.folds = list(self.skf.split(self.X_train, self.y_train))

    def f(self, matriz, vector):
        if np.sum(vector) == 0:
            return 1.0
        errors = []
        for train_idx, val_idx in self.folds:
            X_t, y_t = self.X_train[train_idx], self.y_train[train_idx]
            X_v, y_v = self.X_train[val_idx], self.y_train[val_idx]
            error_fold = self._evaluate_ensemble(matriz, vector, X_t, y_t, X_v, y_v)
            errors.append(error_fold)
        return np.mean(errors)

    def _evaluate_ensemble(self, matriz, vector, X_t, y_t, X_v, y_v):
        vector_index = np.flatnonzero(vector)
        if self.voting == 'soft':
            probas_sum = np.zeros((X_v.shape[0], self.n_classes))
            for index in vector_index:
                cols = np.flatnonzero(matriz[:, index])
                if len(cols) == 0:
                    cols = np.arange(self.n)
                model = clone(self.Phi[index])
                model.fit(X_t[:, cols], y_t)
                probas_sum += model.predict_proba(X_v[:, cols])
            final_pred = np.argmax(probas_sum, axis=1)
        else:
            votes = []
            for index in vector_index:
                cols = np.flatnonzero(matriz[:, index])
                if len(cols) == 0:
                    cols = np.arange(self.n)
                model = clone(self.Phi[index])
                model.fit(X_t[:, cols], y_t)
                votes.append(model.predict(X_v[:, cols]))
            votes = np.array(votes)
            final_pred, _ = stats_mode(votes, axis=0, keepdims=False)
            if final_pred.ndim > 1:
                final_pred = final_pred.flatten()
        return 1.0 - accuracy_score(y_v, final_pred)

    def evaluate_on(self, matriz, vector, X_t, y_t, X_v, y_v):
        return self._evaluate_ensemble(matriz, vector, np.asarray(X_t), np.asarray(y_t), np.asarray(X_v), np.asarray(y_v))

### Definici√≥n de individuo

In [5]:
class Individuo:
    def __init__(self,problem):
        self.matriz = np.zeros((problem.n, problem.m), dtype=int) # selecci√≥n de atributos para cada algoritmo de aprendizaje
        self.vector = np.zeros(problem.m, dtype=int)  # selecci√≥n de algoritmos de aprendizaje        
        self.f = 0.0
    def __lt__(self, ind):
        return self.f < ind.f

### Inicializaci√≥n de la poblaci√≥n

In [6]:
def InitializePopulation(P):
    for I in P:
        I.matriz[:] = np.random.randint(0, 2, size=I.matriz.shape)
        I.vector[:] = np.random.randint(0 ,2, size=I.vector.size)  

### Funci√≥n de reparo

In [7]:
def repair(I):

    n = I.matriz.shape[0]
    m = I.matriz.shape[1]

    n_selected = I.vector.sum()

    # --- 1) garantizar >= 2 modelos base ---
    if n_selected == 0:
        idx = np.random.choice(m, 2, replace=False)
        I.vector[:] = 0
        I.vector[idx] = 1

    elif n_selected == 1:
        selected_idx = np.flatnonzero(I.vector)[0]
        rem = np.delete(np.arange(m), selected_idx)
        I.vector[np.random.choice(rem)] = 1

    # √≠ndices de modelos base activos
    Phi_index = np.flatnonzero(I.vector)

    # --- 2) garantizar ‚â•1 atributo por base seleccionada ---
    cols_empty = Phi_index[np.sum(I.matriz[:, Phi_index], axis=0) == 0]
    if len(cols_empty) > 0:
        rand_rows = np.random.randint(0, n, size=len(cols_empty))
        I.matriz[rand_rows, cols_empty] = 1  # in-place


### Funci√≥n de evaluaci√≥n

In [8]:
def evaluate(I,problem):
    I.f = problem.f(I.matriz,I.vector)

### Copia de individuo

In [9]:
def copia(I1, I2):
    I1.matriz[:] = I2.matriz
    I1.vector[:] = I2.vector
    I1.f = I2.f 

### Selecci√≥n por torneo binario

In [10]:
def binary_tournament_selection(P):
    return min(np.random.choice(P,2,replace=False))

### Cruce
- Cruce uniforme con probabilidad $p\_c$

In [11]:
def crossover(I1,I2,p_c):
    if np.random.random()<=p_c:
        for l in range(I1.vector.size):
            if np.random.random()<=0.5:
                I1.vector[l], I2.vector[l] = I2.vector[l], I1.vector[l]
        for l1 in range(I1.matriz.shape[0]):
            for l2 in range(I1.matriz.shape[1]):        
                if np.random.random()<=0.5:
                    I1.matriz[l1][l2], I2.matriz[l1][l2] = I2.matriz[l1][l2], I1.matriz[l1][l2]

### Mutaci√≥n
- Mutaci√≥n uniforme con probabilidad $p\_m$

In [12]:
def mutation(I,p_m):
    for l in range(I.vector.size):
        if np.random.random()<=p_m:
            I.vector[l] = np.random.randint(2)
    for l1 in range(I.matriz.shape[0]):
        for l2 in range(I.matriz.shape[1]):        
            if np.random.random()<=p_m:
                I.matriz[l1][l2] = np.random.randint(2) 

### Progreso del algoritmo

In [13]:
def IniciaProgreso(best_individuo,G):
    best_outputs = [0]*(G+1) # Para visualizar la gr√°fica de evoluci√≥n al final    
    progression_bar = tqdm(total=G, leave=False)
    ActualizaProgreso(best_individuo,0,best_outputs,progression_bar)    
    return best_outputs,progression_bar
    
def ActualizaProgreso(best_individuo,gen,best_outputs,progression_bar):
    best_fitness = best_individuo.f
    best_rmse = best_fitness
    progression_bar.set_description("Gen: %d | Error (1-Acc): %.4f" % (gen, best_fitness))
    best_outputs[gen] = best_fitness # A√±adir mejor fitness (para visualizaci√≥n)
    progression_bar.update(1)

### Algoritmo evolutivo

In [14]:
def EA(G, N, p_c, p_m, X, y, Phi, random_state, voting='soft'):
    # Aserciones
    assert N >= 2 and not N % 2, "El tama√±o de la poblaci√≥n debe ser par y mayor que 1."
    assert 0.0 <= p_c <= 1.0, "La probablidad de cruce debe estar entre 0 y 1."
    assert 0.0 <= p_m <= 1.0, "La probablidad de mutaci√≥n debe estar entre 0 y 1."

    # ------- SPLIT ESTRATIFICADO -------
    # CAMBIO IMPORTANTE: stratify=y asegura que la validaci√≥n tenga
    # la misma proporci√≥n de clases que el train (vital en clasificaci√≥n).
    X_train, X_val, y_train, y_val = train_test_split(
        X, y,
        test_size=0.2,
        random_state=random_state,
        shuffle=True,
        stratify=y
    )

    # Problema con todo precomputado (Pasamos el modo de votaci√≥n)
    problem = Problem(X_train, y_train, Phi, voting=voting)

    np.random.seed(random_state)

    # Crear poblaci√≥n inicial con N individuos
    P = [Individuo(problem) for _ in range(N)]
    InitializePopulation(P)

    # Reparar y evaluar poblaci√≥n inicial
    for I in P:
        repair(I)
        evaluate(I, problem)

    # Crear poblaci√≥n auxiliar de tama√±o 2N (Padres + Hijos)
    Q = [Individuo(problem) for _ in range(2*N)]

    # Barra de progreso
    best_outputs, progression_bar = IniciaProgreso(min(P), G)

    # --- BUCLE EVOLUTIVO ---
    for gen in range(1, G+1):

        # 1. Copiar padres a la primera mitad de Q
        for i in range(N):
            copia(Q[i], P[i])

        # 2. Generar hijos en la segunda mitad de Q
        i = N
        while i < 2*N:
            # Selecci√≥n
            p1 = binary_tournament_selection(P)
            p2 = binary_tournament_selection(P)

            copia(Q[i],   p1)
            copia(Q[i+1], p2)

            # Operadores
            crossover(Q[i],   Q[i+1], p_c)
            mutation(Q[i],    p_m)
            mutation(Q[i+1],  p_m)

            # Reparaci√≥n y Evaluaci√≥n
            repair(Q[i])
            repair(Q[i+1])

            evaluate(Q[i],   problem)
            evaluate(Q[i+1], problem)

            i += 2

        # 3. Supervivencia Elitista (mu + lambda)
        # Seleccionamos los N mejores de Q (Padres + Hijos)
        # Usamos nsmallest porque minimizamos el Error (1 - Accuracy)
        R = heapq.nsmallest(N, Q)
        for i in range(N):
            copia(P[i], R[i])

        # 4. Actualizar visualizaci√≥n
        ActualizaProgreso(P[0], gen, best_outputs, progression_bar)

    # Devolvemos resultados
    best_individuo = P[0]
    return best_outputs, best_individuo.f, best_individuo

### Ejecuci√≥n del algoritmo evolutivo

In [15]:
 import pandas as pd
import time
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.base import clone

# --- DICCIONARIO GLOBAL (Guardaremos las mejores runs separadas por modo) ---
best_runs_per_dataset = {}

for name, path in files.items():
    print(f"\n{'='*60}")
    print(f"üîµ PROCESANDO: {name}")
    print(f"{'='*60}")

    if not os.path.exists(path):
        print(f"‚ö†Ô∏è Archivo no encontrado: {path}")
        continue

    X, y = LoadData(path)
    if X is None: continue

    # Listas separadas para resultados
    results_soft = []
    results_hard = []

    # Variables para trackear el mejor de cada modo
    min_error_soft = float('inf')
    min_error_hard = float('inf')

    # 2. Bucle de Semillas (0 a 9)
    for seed in range(10):
        print(f"   üå± Semilla {seed}/9...", end=" ")
        start_t_total = time.time()

        # A) Divisi√≥n Global
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=seed, stratify=y
        )

        # B) Modelos Base (Comunes para ambos modos)
        Phi = get_base_models(random_state=seed)

        # C) C√ÅLCULO DEL BASELINE (Se hace una vez, sirve para comparar ambos)
        baseline_errors = []
        for model in Phi:
            try:
                m_base = clone(model)
                m_base.fit(X_train, y_train)
                pred_base = m_base.predict(X_test)
                acc_base = accuracy_score(y_test, pred_base)
                baseline_errors.append(1.0 - acc_base) # Error
            except:
                baseline_errors.append(float('inf'))

        best_base_err = min(baseline_errors)
        best_base_acc = 1.0 - best_base_err

        # --- D) BUCLE DE MODOS (SOFT y HARD) ---
        modes = ['soft', 'hard']

        for mode in modes:
            start_t = time.time()

            # 1. EJECUTAR EA (Pasamos el modo 'soft' o 'hard')
            best_outputs, best_f, best_solution = EA(G, N, p_c, p_m, X_train, y_train, Phi, random_state=seed, voting=mode)

            # 2. EVALUACI√ìN FINAL (con nueva API CV)
            problem_test = Problem(X_train, y_train, Phi, voting=mode, n_splits=3)
            test_error = problem_test.evaluate_on(best_solution.matriz, best_solution.vector, X_train, y_train, X_test, y_test)
            test_acc = 1.0 - test_error

            elapsed = time.time() - start_t

            # 3. GUARDAR RESULTADOS EN SU LISTA CORRESPONDIENTE
            res_dict = {
                'Dataset': name,
                'Semilla': seed,
                'Mode': mode,
                'Error_EA_Test': test_error,
                'Accuracy_EA_Test': test_acc,
                'Error_Best_Baseline': best_base_err,
                'Accuracy_Best_Baseline': best_base_acc,
                'Error_EA_Val': best_f,
                'N_Modelos': np.sum(best_solution.vector),
                'N_Features': np.sum(best_solution.matriz),
                'Time_s': elapsed
            }

            if mode == 'soft':
                results_soft.append(res_dict)
                if test_error < min_error_soft:
                    min_error_soft = test_error
                    best_runs_per_dataset[f"{name}_soft"] = {
                        'seed': seed, 'solution': best_solution, 'outputs': best_outputs, 'Phi': Phi,
                        'X_train': X_train, 'y_train': y_train, 'X_test': X_test, 'y_test': y_test
                    }
            else:
                results_hard.append(res_dict)
                if test_error < min_error_hard:
                    min_error_hard = test_error
                    best_runs_per_dataset[f"{name}_hard"] = {
                        'seed': seed, 'solution': best_solution, 'outputs': best_outputs, 'Phi': Phi,
                        'X_train': X_train, 'y_train': y_train, 'X_test': X_test, 'y_test': y_test
                    }

        # Print resumen de la semilla
        soft_err = results_soft[-1]['Error_EA_Test']
        hard_err = results_hard[-1]['Error_EA_Test']
        print(f"-> Soft Err: {soft_err:.4f} | Hard Err: {hard_err:.4f} | Base Err: {best_base_err:.4f}")

    # --- GUARDAR DOS ARCHIVOS CSV POR DATASET ---
    # Archivo Soft
    path_soft = os.path.join(RESULTS_SOFT_DIR, f"{name}_soft_results.csv")
    pd.DataFrame(results_soft).to_csv(path_soft, index=False)

    # Archivo Hard
    path_hard = os.path.join(RESULTS_HARD_DIR, f"{name}_hard_results.csv")
    pd.DataFrame(results_hard).to_csv(path_hard, index=False)

    print(f"‚úÖ Guardados en subcarpetas: {os.path.abspath(path_soft)} y {os.path.abspath(path_hard)}")

print(f"\nüöÄ ¬°EXPERIMENTO DE CLASIFICACI√ìN (SOFT & HARD) COMPLETADO!")


üîµ PROCESANDO: BreastW
   üå± Semilla 0/9... 

                                                                                

-> Soft Err: 0.0286 | Hard Err: 0.0429 | Base Err: 0.0214
   üå± Semilla 1/9... 

                                                                                

-> Soft Err: 0.0429 | Hard Err: 0.0357 | Base Err: 0.0429
   üå± Semilla 2/9... 

                                                                                

-> Soft Err: 0.0357 | Hard Err: 0.0286 | Base Err: 0.0143
   üå± Semilla 3/9... 

                                                                                

-> Soft Err: 0.0500 | Hard Err: 0.0571 | Base Err: 0.0500
   üå± Semilla 4/9... 

                                                                                

-> Soft Err: 0.0500 | Hard Err: 0.0429 | Base Err: 0.0357
   üå± Semilla 5/9... 

                                                                                

-> Soft Err: 0.0429 | Hard Err: 0.0714 | Base Err: 0.0429
   üå± Semilla 6/9... 

                                                                                

-> Soft Err: 0.0357 | Hard Err: 0.0357 | Base Err: 0.0214
   üå± Semilla 7/9... 

                                                                                

-> Soft Err: 0.0143 | Hard Err: 0.0286 | Base Err: 0.0071
   üå± Semilla 8/9... 

                                                                                

-> Soft Err: 0.0500 | Hard Err: 0.0357 | Base Err: 0.0286
   üå± Semilla 9/9... 

                                                                                

-> Soft Err: 0.0500 | Hard Err: 0.0500 | Base Err: 0.0357
‚úÖ Guardados en subcarpetas: C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\soft\BreastW_soft_results.csv y C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\hard\BreastW_hard_results.csv

üîµ PROCESANDO: Wine
   üå± Semilla 0/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0000 | Base Err: 0.0000
   üå± Semilla 1/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0000 | Base Err: 0.0000
   üå± Semilla 2/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0000 | Base Err: 0.0000
   üå± Semilla 3/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0000 | Base Err: 0.0000
   üå± Semilla 4/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0000 | Base Err: 0.0000
   üå± Semilla 5/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0278 | Base Err: 0.0000
   üå± Semilla 6/9... 

                                                                                

-> Soft Err: 0.0278 | Hard Err: 0.0556 | Base Err: 0.0278
   üå± Semilla 7/9... 

                                                                                

-> Soft Err: 0.0278 | Hard Err: 0.0556 | Base Err: 0.0000
   üå± Semilla 8/9... 

                                                                                

-> Soft Err: 0.0556 | Hard Err: 0.0000 | Base Err: 0.0000
   üå± Semilla 9/9... 

                                                                                

-> Soft Err: 0.0000 | Hard Err: 0.0000 | Base Err: 0.0000
‚úÖ Guardados en subcarpetas: C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\soft\Wine_soft_results.csv y C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\hard\Wine_hard_results.csv

üîµ PROCESANDO: Glass
   üå± Semilla 0/9... 

                                                                                

-> Soft Err: 0.1860 | Hard Err: 0.2791 | Base Err: 0.1860
   üå± Semilla 1/9... 

                                                                                

-> Soft Err: 0.2558 | Hard Err: 0.3256 | Base Err: 0.2791
   üå± Semilla 2/9... 

                                                                                

-> Soft Err: 0.2326 | Hard Err: 0.2558 | Base Err: 0.2093
   üå± Semilla 3/9... 

                                                                                

-> Soft Err: 0.2558 | Hard Err: 0.2326 | Base Err: 0.1628
   üå± Semilla 4/9... 

                                                                                

-> Soft Err: 0.3023 | Hard Err: 0.3256 | Base Err: 0.2326
   üå± Semilla 5/9... 

                                                                                

-> Soft Err: 0.2558 | Hard Err: 0.3023 | Base Err: 0.2791
   üå± Semilla 6/9... 

                                                                                

-> Soft Err: 0.3256 | Hard Err: 0.3023 | Base Err: 0.2326
   üå± Semilla 7/9... 

                                                                                

-> Soft Err: 0.2558 | Hard Err: 0.3256 | Base Err: 0.2558
   üå± Semilla 8/9... 

                                                                                

-> Soft Err: 0.2093 | Hard Err: 0.3721 | Base Err: 0.1860
   üå± Semilla 9/9... 

                                                                                

-> Soft Err: 0.2558 | Hard Err: 0.2791 | Base Err: 0.2558
‚úÖ Guardados en subcarpetas: C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\soft\Glass_soft_results.csv y C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\hard\Glass_hard_results.csv

üîµ PROCESANDO: Sonar
   üå± Semilla 0/9... 

                                                                                

-> Soft Err: 0.1667 | Hard Err: 0.1667 | Base Err: 0.0952
   üå± Semilla 1/9... 

                                                                                

-> Soft Err: 0.1667 | Hard Err: 0.1190 | Base Err: 0.0952
   üå± Semilla 2/9... 

                                                                                

-> Soft Err: 0.0952 | Hard Err: 0.0476 | Base Err: 0.0476
   üå± Semilla 3/9... 

                                                                                

-> Soft Err: 0.1190 | Hard Err: 0.2143 | Base Err: 0.1190
   üå± Semilla 4/9... 

                                                                                

-> Soft Err: 0.2381 | Hard Err: 0.2381 | Base Err: 0.1429
   üå± Semilla 5/9... 

                                                                                

-> Soft Err: 0.1429 | Hard Err: 0.1429 | Base Err: 0.0476
   üå± Semilla 6/9... 

                                                                                

-> Soft Err: 0.1905 | Hard Err: 0.2143 | Base Err: 0.1429
   üå± Semilla 7/9... 

                                                                                

-> Soft Err: 0.1667 | Hard Err: 0.1190 | Base Err: 0.1190
   üå± Semilla 8/9... 

                                                                                

-> Soft Err: 0.1905 | Hard Err: 0.1667 | Base Err: 0.0952
   üå± Semilla 9/9... 

                                                                                

-> Soft Err: 0.1667 | Hard Err: 0.2143 | Base Err: 0.1667
‚úÖ Guardados en subcarpetas: C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\soft\Sonar_soft_results.csv y C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\hard\Sonar_hard_results.csv

üîµ PROCESANDO: Ionosphere
   üå± Semilla 0/9... 

                                                                                

-> Soft Err: 0.0845 | Hard Err: 0.0986 | Base Err: 0.0563
   üå± Semilla 1/9... 

                                                                                

-> Soft Err: 0.0563 | Hard Err: 0.0704 | Base Err: 0.0282
   üå± Semilla 2/9... 

                                                                                

-> Soft Err: 0.0423 | Hard Err: 0.0282 | Base Err: 0.0141
   üå± Semilla 3/9... 

                                                                                

-> Soft Err: 0.0423 | Hard Err: 0.0282 | Base Err: 0.0282
   üå± Semilla 4/9... 

                                                                                

-> Soft Err: 0.0704 | Hard Err: 0.0704 | Base Err: 0.0423
   üå± Semilla 5/9... 

                                                                                

-> Soft Err: 0.0423 | Hard Err: 0.0423 | Base Err: 0.0000
   üå± Semilla 6/9... 

                                                                                

-> Soft Err: 0.0704 | Hard Err: 0.0704 | Base Err: 0.0423
   üå± Semilla 7/9... 

                                                                                

-> Soft Err: 0.0845 | Hard Err: 0.0563 | Base Err: 0.0704
   üå± Semilla 8/9... 

                                                                                

-> Soft Err: 0.0141 | Hard Err: 0.0282 | Base Err: 0.0141
   üå± Semilla 9/9... 

                                                                                

-> Soft Err: 0.0563 | Hard Err: 0.0704 | Base Err: 0.0563
‚úÖ Guardados en subcarpetas: C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\soft\Ionosphere_soft_results.csv y C:\Users\pepeg\PycharmProjects\TFG-EnsembleMultiobjetivo\results\classification\hard\Ionosphere_hard_results.csv

üöÄ ¬°EXPERIMENTO DE CLASIFICACI√ìN (SOFT & HARD) COMPLETADO!




### Imprimir los algoritmos de aprendizaje seleccionados, los atributos seleccionados para cada algoritmo de aprendizaje y el fitness (rmse en un conjunto de validaci√≥n interno)

In [None]:
def PrintSolution(I, Phi):
    # √çndices de modelos activos (donde vector es 1)
    Phi_index = np.where(I.vector == 1)[0]

    print(f"   ü§ñ Modelos Seleccionados: {len(Phi_index)} de {len(Phi)}")

    for index in Phi_index:
        # Qu√© columnas (features) usa este modelo espec√≠fico
        attributes_selected = [l for l in range(I.matriz.shape[0]) if I.matriz[l, index] == 1]

        # Nombre limpio del modelo
        model_name = str(Phi[index]).split('(')[0]
        if 'Pipeline' in model_name:
            try:
                # Intento de sacar el nombre interno del pipeline
                model_name = Phi[index].steps[-1][1].__class__.__name__
            except:
                pass

        print(f"      üîπ {model_name}: Usa {len(attributes_selected)} Features -> {attributes_selected}")

### Visualizaci√≥n de la evoluci√≥n del algoritmo

In [None]:
def VisualizaEvolucion(best_outputs, title):
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.set_title(f"Evoluci√≥n de Error - {title}")
    plt.plot(best_outputs, marker='o', markersize=3, linestyle='-', color='tab:red')
    plt.xlabel("Generaci√≥n")
    plt.ylabel("Tasa de Error (1 - Accuracy)")
    plt.grid(True, alpha=0.3)
    plt.show()

### Resultados en el conjunto de test

In [None]:
for key, data in best_runs_per_dataset.items():
    parts = key.rsplit('_', 1)
    dataset_name = parts[0]
    mode = parts[1] # 'soft' o 'hard'

    print(f"\n{'#'*80}")
    print(f"üìä REPORTE: {dataset_name.upper()} ({mode.upper()} VOTING)")
    print(f"   (Mejor Semilla: {data['seed']})")
    print(f"{'#'*80}\n")

    # 1. Recuperar variables guardadas
    best_solution = data['solution']
    best_outputs = data['outputs']

    # --- CORRECCI√ìN DEL ERROR ---
    # En lugar de buscar 'f_val' en el diccionario (que no existe),
    # lo leemos directamente del objeto individuo.
    best_f_val = best_solution.f

    Phi = data['Phi']
    X_train, y_train = data['X_train'], data['y_train']
    X_test, y_test = data['X_test'], data['y_test']

    # 2. Imprimir Soluci√≥n (Modelos y Features)
    print("--- üß¨ Estructura del Mejor Individuo ---")
    PrintSolution(best_solution, Phi)
    print(f"\nüìâ Error M√≠nimo en Validaci√≥n: {best_f_val:.4f} (Accuracy Est: {1-best_f_val:.4f})")

    # 3. Gr√°fica de Evoluci√≥n
    VisualizaEvolucion(best_outputs, title=f"{dataset_name} ({mode})")

    # 4. Verificaci√≥n Final en Test
    # Recreamos modelos limpios con la misma semilla
    Phi_test = get_base_models(random_state=data['seed'])

    # IMPORTANTE: Pasamos voting=mode ('soft' o 'hard') para evaluar igual que se entren√≥
    problem_test = Problem(X_train, y_train, Phi_test, voting=mode)

    # Calculamos Error
    error_test = problem_test.evaluate_on(best_solution.matriz, best_solution.vector, X_train, y_train, X_test, y_test)
    acc_test = 1.0 - error_test

    print(f"üèÅ RESULTADO FINAL TEST ({dataset_name} - {mode}):")
    print(f"   ‚ùå Tasa de Error: {error_test:.4f}")
    print(f"   ‚úÖ Accuracy:      {acc_test:.4f}")
    print("-" * 80)