## Imports e configuração de paths do projeto.

In [12]:
import os
import json
import time
import math
import random
import inspect
import argparse
from dataclasses import dataclass
from typing import Dict, List, Tuple, Callable, Any
from pathlib import Path
from datetime import datetime
from typing import Optional

import numpy as np

# ------------------------------------------------------------------------------
# Ajuste de PATH para importar o pacote do zip hifdm_optimization
# ------------------------------------------------------------------------------
HERE = Path(os.getcwd())  # D:\TCCII\Dados\tcc_rafael
ROOT = HERE.parent  # D:\TCCII\Dados
if str(ROOT) not in os.sys.path:
    os.sys.path.insert(0, str(ROOT))

# Pasta Utilities (para o módulo st.py)
UTILS_DIR = ROOT / "hifdm_optimization" / "Utilities"
if str(UTILS_DIR) not in os.sys.path:
    os.sys.path.insert(0, str(UTILS_DIR))

# Importações do seu projeto
from hifdm_optimization.MetodoNunes2022.hifdm import hifdm as hifdm_2022_impl
from hifdm_optimization.MetodoNunes2024.hifdm import hifdm as hifdm_2024_impl
from hifdm_optimization.OpenPL4.openPL4 import readPL4, convertType
from hifdm_optimization.Utilities import st as _st  # para DIAG e ranking

# ------------------------------------------------------------------------------
# Configurações gerais
# ------------------------------------------------------------------------------
DATA_DIR = Path(r"D:\TCCII\Dados\hifdm_optimization\data\sinais_para_otimizar_v2")
RESULTS_DIR = HERE / "results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# Implementação de Algoritmo Genético

### Método de 2022

In [13]:
'''
Estrutura de Algoritmo Genético (GA)
BEGIN 
    INITIALIZE population
    EVALUATE each candidate in population
    WHILE termination criteria not met DO
        SELECT parents from population
        RECOMBINE parents to produce offspring
        MUTATE offspring
        EVALUATE each candidate in offspring
        SELECT individuals for next generation from population and offspring
    END WHILE
END
'''

'\nEstrutura de Algoritmo Genético (GA)\nBEGIN \n    INITIALIZE population\n    EVALUATE each candidate in population\n    WHILE termination criteria not met DO\n        SELECT parents from population\n        RECOMBINE parents to produce offspring\n        MUTATE offspring\n        EVALUATE each candidate in offspring\n        SELECT individuals for next generation from population and offspring\n    END WHILE\nEND\n'

### Geração da população inicial


In [14]:
# Definição da população inicial (geração 0)

# Limites dos parâmetros do método 2022
# Ordem: [gamma, alpha, beta, beta_diff]
PARAM_BOUNDS_2022: dict[str, Tuple[float, float] | Tuple[int, int]] = {
    "gamma":     (0.0005, 0.0100),  # sensibilidade do limiar diferencial
    "alpha":     (1.01,   1.20  ),  # fator dos limiares adaptativos
    "beta":      (10,     60    ),  # peso de energias (inteiro)
    "beta_diff": (1.05,   2.50  ),  # peso do limiar diferencial
}

# Duas configurações "sementes" úteis 
SEEDS_2022: List[List[float]] = [
    [0.0040, 1.05, 40, 1.50],  # semente do artigo
    [0.0035, 1.04, 30, 1.30],  # semente mais sensível
]

def _sample_individual_2022(rng: random.Random) -> List[float]:
    g_lo, g_hi = PARAM_BOUNDS_2022["gamma"]
    a_lo, a_hi = PARAM_BOUNDS_2022["alpha"]
    b_lo, b_hi = PARAM_BOUNDS_2022["beta"]
    d_lo, d_hi = PARAM_BOUNDS_2022["beta_diff"]

    gamma = round(rng.uniform(g_lo, g_hi), 6)     # 6 casas para granularidade fina
    alpha = round(rng.uniform(a_lo, a_hi), 3)     # 3 casas é suficiente
    beta  = int(rng.randint(int(b_lo), int(b_hi)))  # inteiro
    bdiff = round(rng.uniform(d_lo, d_hi), 3)

    return [gamma, alpha, beta, bdiff]


def init_population_2022(
    pop_size: int = 10,
    seed: int | None = 42,
    include_seeds: bool = True,
) -> List[List[float]]:
    """
    Gera população inicial para GA do método 2022.
    - pop_size: tamanho da população
    - seed: para reprodutibilidade
    - include_seeds: inclui as duas sementes fixas antes do sorteio aleatório

    Retorna: lista de indivíduos, cada um sendo [gamma, alpha, beta, beta_diff]
    """
    assert pop_size >= 2, "população muito pequena (>=2)"
    rng = random.Random(seed)

    pop: List[List[float]] = []
    seen: set[Tuple[float, float, int, float]] = set()

    if include_seeds:
        for s in SEEDS_2022:
            ind = [round(float(s[0]), 6), round(float(s[1]), 3), int(s[2]), round(float(s[3]), 3)]
            key = (ind[0], ind[1], ind[2], ind[3])
            if key not in seen:
                pop.append(ind)
                seen.add(key)

    while len(pop) < pop_size:
        ind = _sample_individual_2022(rng)
        key = (ind[0], ind[1], ind[2], ind[3])
        if key not in seen:
            pop.append(ind)
            seen.add(key)

    return pop

In [15]:
# Testa a geração da população inicial
pop = init_population_2022(pop_size=10, seed=123, include_seeds=True)
for i, ind in enumerate(pop, 1):
    print(f"ind {i:02d}: {ind}")

ind 01: [0.004, 1.05, 40, 1.5]
ind 02: [0.0035, 1.04, 30, 1.3]
ind 03: [0.000997, 1.027, 36, 1.437]
ind 04: [0.008466, 1.176, 34, 1.827]
ind 05: [0.003656, 1.172, 20, 1.246]
ind 06: [0.005829, 1.143, 20, 1.052]
ind 07: [0.004645, 1.027, 48, 1.598]
ind 08: [0.000563, 1.149, 16, 2.363]
ind 09: [0.001381, 1.037, 60, 2.365]
ind 10: [0.003272, 1.092, 40, 1.435]


### Avaliação de cada candidato

#### Funções auxiliares

In [16]:
# ------------------------------------------------------------------------------
# Adaptadores de chamada aos detectores
# ------------------------------------------------------------------------------
@dataclass
class DetectResult:
    detected: bool
    det_cycles: float | None  # ciclos até detecção (se disponível)
    meta: Dict[str, Any]

# ===== Novo utilitário: amostras por ciclo a partir do vetor de tempo =====
def samples_per_cycle_from_time(sig: Dict[str, Any], default_f0: float = 60.0) -> int:
    """
    Calcula de forma robusta o número de amostras por ciclo elétrico, preferindo o vetor 'time'.
    Fallback: se 'time' faltar ou vier ruim, usa fs/f0 dos metadados.
    """
    import numpy as _np

    f0 = float(sig.get("f0", default_f0))

    # 1) Tente pelo time[]
    t = sig.get("time", None)
    if t is not None:
        # mediana do passo elimina outliers ocasionais no início/fim
        dt = float(_np.median(_np.diff(_np.asarray(t, dtype=float))))
        if dt > 0:
            fs_t = 1.0 / dt
            one_cycle_t = int(round(fs_t / f0))
            if one_cycle_t > 0:
                return one_cycle_t  # << normalmente 128 nos seus arquivos

    # 2) Fallback para metadados
    fs_meta = float(sig.get("fs", 0.0))
    one_cycle_meta = int(round(fs_meta / f0)) if (fs_meta > 0 and f0 > 0) else 0
    return one_cycle_meta if one_cycle_meta > 0 else 128  # último fallback


# ----------------
# Leitura de PL4
# ----------------
def openpl4(path: str) -> Dict[str, Any]:
    dfHEAD, data, meta = readPL4(path)
    dfHEAD = convertType(dfHEAD)
    out = {"__dfHEAD__": dfHEAD, "__data__": data, "__meta__": {**meta, "filename": str(path)}}
    for idx, row in dfHEAD.iterrows():
        key = f"{row['TYPE']}:{row['FROM']}-{row['TO']}"
        out[key] = data[:, idx + 1]
    out["time"] = data[:, 0]
    out["fs"] = 1.0 / meta["deltat"]
    out["f0"] = 60.0
    return out

# ---------------------------------------------
# Dataset (assume estrutura do seu repositório)
# ---------------------------------------------
def iter_dataset_pl4(data_dir: Path) -> List[Tuple[Path, int]]:
    data_dir = Path(data_dir)
    positives = []
    for sub in ["FAI", "FAI_com_forno", "FAI_com_gd", "FAI_retificador"]:
        d = data_dir / sub
        if d.exists():
            positives += list(d.rglob("*.pl4"))
    negatives = list((data_dir / "NFAI").glob("*.pl4"))
    return [(p, 1) for p in positives] + [(p, 0) for p in negatives]

# -----------------------------------------------------------
# Descoberta de canais e seleção automática (método 2022)
# -----------------------------------------------------------
def _list_phase_currents(sig: Dict[str, Any]) -> List[str]:
    """
    Lista chaves de corrente de fase do tipo:
      - 'I-bran:BUSxA-MEDyA' / ...B / ...C
      - Fallback: chaves 'IA','IB','IC' se existirem
    """
    keys = []
    for k in sig.keys():
        if not isinstance(k, str):
            continue
        if k.startswith("I-bran:") and (k.endswith("A") or k.endswith("B") or k.endswith("C")):
            # filtro: parece corrente (não 'V-node', não '2:TACS', etc.)
            if k.split(":")[0] == "I-bran":
                keys.append(k)
    if not keys:
        for alt in ["IA","IB","IC"]:
            if alt in sig:
                keys.append(alt)
    return keys

def _choose_best_channel_for_2022(sig: Dict[str, Any], one_cycle: int) -> Tuple[str, np.ndarray]:
    """
    Critério simples e rápido: escolhe o canal com maior razão de energia (último ciclo / primeiro ciclo).
    Ideia: um HIF genuíno tende a alterar (não linearizar) o espectro/energia ao longo dos ciclos.
    """
    cand_keys = _list_phase_currents(sig)
    if not cand_keys:
        raise RuntimeError("Não encontrei canais de corrente de fase para o método 2022.")

    def energy(x: np.ndarray) -> float:
        x = np.asarray(x, dtype=float)
        return float(np.sum(x*x))

    best_key, best_score = None, -1.0
    for k in cand_keys:
        v = np.asarray(sig[k], dtype=float)
        if len(v) < 2*one_cycle:
            continue
        first = v[:one_cycle]
        last  = v[-one_cycle:]
        e1 = energy(first) + 1e-12
        e2 = energy(last)  + 1e-12
        score = e2 / e1
        if score > best_score:
            best_key, best_score = k, score

    if best_key is None:
        # Fallback: pega o primeiro candidato
        best_key = cand_keys[0]
    return best_key, np.asarray(sig[best_key], dtype=float)


def _build_args_hifdm_2022(sig: Dict[str, Any], params: List[float]) -> Tuple[Tuple, Dict]:
    # >>> Usar o time[] para estimar corretamente as amostras por ciclo
    one_cycle = samples_per_cycle_from_time(sig)

    # Escolha automática do melhor canal de corrente de fase
    ch_key, sinal_1d = _choose_best_channel_for_2022(sig, one_cycle)

    # Remoção de média (ajuda com offset/DC)
    sinal_1d = sinal_1d - np.mean(sinal_1d)

    args  = (sinal_1d,)
    kwargs= {"janela": one_cycle, "parametros": params}
    return args, kwargs, ch_key, one_cycle
# def run_hifdm_2022(sig: Dict[str, Any], params: List[float], verbose: bool=False) -> Tuple[int, int, Dict[str, Any]]:
#     """
#     Chama o hifdm 2022 corretamente: retorna (trip, time, meta)
#     """
#     args, kwargs, ch_key, one_cycle = _build_args_hifdm_2022(sig, params)
#     if verbose:
#         print(f"[2022] canal='{ch_key}'  one_cycle={one_cycle}  params={params}")
#     out = hifdm_2022_impl(*args, **kwargs)  # (trip, time) — assinatura do código original (paper 2022) :contentReference[oaicite:1]{index=1}
#     if verbose:
#         print(f"[2022] retorno hifdm: {out}")
#     if not (isinstance(out, tuple) and len(out)>=2):
#         raise RuntimeError(f"Retorno inesperado do hifdm_2022: {out}")
#     trip, t = int(out[0]), int(out[1])
#     return trip, t, {"channel": ch_key, "one_cycle": one_cycle}

# -----------------------------------------
# Smoke test (1 HIF + 1 NFAI) — OPCIONAL
# -----------------------------------------
def smoke_test_2022(data_dir: Path, params: List[float]) -> None:
    items = iter_dataset_pl4(data_dir)
    p_hif = next((p for p,y in items if y==1), None)
    items = iter_dataset_pl4(data_dir)
    p_nfi = next((p for p,y in items if y==0), None)
    if not p_hif or not p_nfi:
        print("[SMOKE] Não achei HIF e/ou NFAI no diretório.")
        return

    print("\n[SMOKE] HIF:")
    sig = openpl4(str(p_hif))
    trip, t, meta = run_hifdm_2022(sig, params, verbose=True)
    print(f"  arquivo={p_hif.name}  trip={trip}  time={t}  meta={meta}")

    print("\n[SMOKE] NFAI:")
    sig = openpl4(str(p_nfi))
    trip, t, meta = run_hifdm_2022(sig, params, verbose=True)
    print(f"  arquivo={p_nfi.name}  trip={trip}  time={t}  meta={meta}")
    
# ----------------------------------------------------------
# Avaliação (para usar no 'EVALUATE each candidate' do GA)
# ----------------------------------------------------------
def evaluate_params_2022_on_dataset(
    params: List[float],
    data_dir: Path,
    max_pos: int|None = 10,
    max_neg: int|None = 10,
    seed: int = 0,
    verbose_every: int = 0,
) -> Dict[str, Any]:
    rng = random.Random(seed)
    all_items = iter_dataset_pl4(data_dir)
    pos = [p for p,y in all_items if y==1]
    neg = [p for p,y in iter_dataset_pl4(data_dir) if y==0]
    rng.shuffle(pos); rng.shuffle(neg)
    if max_pos is not None: pos = pos[:max_pos]
    if max_neg is not None: neg = neg[:max_neg]
    batch = [(p,1) for p in pos] + [(p,0) for p in neg]
    rng.shuffle(batch)

    TP=TN=FP=FN=0
    det_times=[]
    used=errors=0
    t0=time.time()

    for i,(path,label) in enumerate(batch,1):
        try:
            sig = openpl4(str(path))
            trip, tc, meta = run_hifdm_2022(sig, params, verbose=False)
            used += 1
            if trip:
                if label==1: TP+=1; det_times.append(tc)
                else: FP+=1
            else:
                if label==1: FN+=1
                else: TN+=1

            if verbose_every and (i % verbose_every == 0):
                print(f"[{i}/{len(batch)}] {path.name}  label={label}  trip={trip}  t={tc}  ch={meta['channel']}")

        except Exception as e:
            errors += 1
            if verbose_every:
                print(f"[ERR] {path.name}: {e}")

    elapsed = time.time()-t0
    tot = max(1, TP+TN+FP+FN)
    acc = (TP+TN)/tot
    tmean = (sum(det_times)/len(det_times)) if det_times else None
    return {
        "TP":TP,"TN":TN,"FP":FP,"FN":FN,
        "accuracy":acc, "tmean_cycles":tmean,
        "n_eval":tot, "n_used":used, "n_errors":errors,
        "elapsed_sec":elapsed,
    }
    
# ------------------------------------------------------------------------------
# Descoberta de triplets do tipo "BUSx-MEDy" a partir das chaves I-bran:FROM-TO
# ------------------------------------------------------------------------------
def extract_triplet_bases(sig: Dict[str, Any], max_items: int | None = 5) -> List[str]:
    """
    Varre as chaves 'I-bran:FROM-TO' e retorna bases únicas 'BUSx-MEDy' (sem a fase A/B/C).
    Ex.: 'I-bran:BUS10A-MED4A' -> base 'BUS10-MED4'
    """
    bases = []
    for k in list(sig.keys()):
        if not isinstance(k, str):
            continue
        if not k.startswith("I-bran:"):
            continue
        try:
            after = k.split("I-bran:")[1]
            FROM, TO = after.split("-")
            base_from = FROM[:-1] if FROM and FROM[-1].isalpha() else FROM
            base_to = TO[:-1] if TO and TO[-1].isalpha() else TO
            base = f"{base_from}-{base_to}"
            if base not in bases:
                bases.append(base)
        except Exception:
            continue
    if max_items is not None:
        bases = bases[:max_items]
    return bases

def inject_triplet_phases(sig: Dict[str, Any], base: str) -> Dict[str, Any]:
    """
    Dado 'BUSx-MEDy', injeta IA/IB/IC no dicionário a partir de:
      I-bran:BUSxA-MEDyA, I-bran:BUSxB-MEDyB, I-bran:BUSxC-MEDyC
    """
    from_id, to_id = base.split("-")
    keyA = f"I-bran:{from_id}A-{to_id}A"
    keyB = f"I-bran:{from_id}B-{to_id}B"
    keyC = f"I-bran:{from_id}C-{to_id}C"
    IA = sig.get(keyA)
    IB = sig.get(keyB)
    IC = sig.get(keyC)
    if IA is not None:
        sig["IA"] = IA
    if IB is not None:
        sig["IB"] = IB
    if IC is not None:
        sig["IC"] = IC
    if "IN" not in sig and IA is not None and IB is not None and IC is not None:
        sig["IN"] = IA + IB + IC
    return sig

def _build_hifdm_kwargs(
    impl: Callable, signal_dict: Dict[str, Any], parametros: List[float]
) -> tuple[tuple, dict]:
    """
    Retorna (args, kwargs) corretos p/ assinatura:
      - 2022: hifdm(sinal_1d, janela, parametros[, show])
      - 2024: hifdm(Ia, Ib, Ic, amostras, parametros)
    """
    import numpy as _np

    sig = inspect.signature(impl)
    params = list(sig.parameters.values())
    names = [p.name for p in params]

    fs = float(signal_dict.get("fs", 0.0))
    f0 = float(signal_dict.get("f0", 60.0))
    one_cycle = int(round(fs / f0)) if (fs > 0 and f0 > 0) else 0
    if one_cycle <= 0:
        raise ValueError(
            f"Não foi possível calcular amostras por ciclo (fs={fs}, f0={f0})."
        )

    IA = signal_dict.get("IA")
    IB = signal_dict.get("IB")
    IC = signal_dict.get("IC")

    # ====== 2022: hifdm(sinal, janela, parametros) — precisa de vetor 1D ======
    if names and names[0] == "sinal":
        candidates = [(IA, "IA"), (IB, "IB"), (IC, "IC")]
        candidates = [(v, n) for v, n in candidates if v is not None]
        if not candidates:
            raise ValueError("Faltam IA/IB/IC para chamar o método 2022.")
        rms_vals = [
            (float(_np.sqrt(_np.mean(_np.square(c)))), n, c) for c, n in candidates
        ]
        rms_vals.sort(reverse=True)
        sinal_1d = rms_vals[0][2]
        args = [sinal_1d]
        kwargs = {"janela": one_cycle, "parametros": parametros}
        if "show" in names:
            kwargs["show"] = False
        return tuple(args), kwargs

    # ====== 2024 original: hifdm(Ia, Ib, Ic, amostras, parametros) ======
    if names and names[0] == "Ia":
        if IA is None or IB is None or IC is None:
            raise ValueError("IA/IB/IC ausentes para chamar hifdm 2024.")
        args = [IA, IB, IC, one_cycle, parametros]
        kwargs = {}
        return tuple(args), kwargs

    # Fallback genérico (se houver variações)
    kwargs = {}
    if "parametros" in names:
        kwargs["parametros"] = parametros
    if "janela" in names:
        kwargs["janela"] = one_cycle
    if "amostras" in names:
        kwargs["amostras"] = one_cycle
    if "Ia" in names and IA is not None:
        kwargs["Ia"] = IA
    if "Ib" in names and IB is not None:
        kwargs["Ib"] = IB
    if "Ic" in names and IC is not None:
        kwargs["Ic"] = IC
    if "show" in names:
        kwargs["show"] = False
    return tuple(), kwargs

def _call_hifdm_impl(
    impl: Callable, signal_dict: Dict[str, Any], parametros: List[float]
) -> DetectResult:
    try:
        args, kwargs = _build_hifdm_kwargs(impl, signal_dict, parametros)
        print(f"Chamando {impl.__name__} com args={args} kwargs={kwargs}")
        out = impl(*args, **kwargs)
        print(f"{impl.__name__} retornou: {out}")
        if isinstance(out, tuple) and len(out) >= 1:
            detected = bool(out[0])
            det_cycles = float(out[1]) if len(out) > 1 and out[1] is not None else None
            return DetectResult(detected, det_cycles, {"raw": out})

        if isinstance(out, dict):
            detected = bool(
                out.get("detected")
                or out.get("is_hif")
                or out.get("resultado")
                or out.get("trip", False)
            )
            det_cycles = (
                out.get("cycles") or out.get("n_ciclos") or out.get("tempo_ciclos")
            )
            det_cycles = float(det_cycles) if det_cycles is not None else None
            return DetectResult(detected, det_cycles, out)

        return DetectResult(bool(out), None, {"raw": out})

    except Exception as e:
        meta = signal_dict.get("__meta__", {})
        fname = meta.get("filename") or signal_dict.get("source") or "?"
        print(f"[ERR] {impl.__name__} on {fname}: {e}")
        return DetectResult(False, None, {"error": str(e)})
# ------------------------------------------------------------------------------
# WRAPPER 2024 com 5 parâmetros [alpha, zeta, eta, C, N]
# ------------------------------------------------------------------------------
def hifdm_2024_config(
    Ia, Ib, Ic, amostras: int, parametros: List[float]
) -> Tuple[int, int]:
    """
    Implementa o método 2024 permitindo otimizar:
      parametros = [alfa, zeta, eta, C, N]
        - alfa: fator do limiar de energia da fundamental
        - zeta: fator do limiar de rugosidade (3º harmônico)
        - eta: janela (em ciclos) para cálculo da rugosidade
        - C: ciclos de espera após ruptura antes de iniciar confirmação de HIF
        - N: confirmações necessárias para trip
    Retorna (trip, ciclos_processados)
    """
    import numpy as _np
    from statistics import median, stdev

    alfa = float(parametros[0])  # ~[1.01..2.5]
    zeta = float(parametros[1])  # ~[1.01..3.0]
    eta = int(parametros[2])  # janela rugosidade (ciclos) ~[6..18]
    C = int(parametros[3])  # espera pós-ruptura ~[3..20]
    N = int(parametros[4])  # confirmações ~[3..12]

    # buffers de tamanho N (confirmações) como no original
    E1 = [0.0 for _ in range(N)]
    E3 = [0.0 for _ in range(N)]
    GAMMA = [0.0 for _ in range(N)]

    gamma_ene = 0.01
    gamma_r = 0.01

    Ia = _np.array(Ia)
    Ib = _np.array(Ib)
    Ic = _np.array(Ic)
    In = Ia + Ib + Ic

    def energia(espectro):
        return float(np_sum_abs2(espectro))

    # rugosidade a partir das últimas 'eta' energias do 3º harmônico
    def rugosidade(energias: List[float]) -> float:
        n = len(energias)
        if n < 2:
            return 1e-2
        acc = 0.0
        for i in range(1, n):
            d = energias[i] - energias[i - 1]
            acc += d * d
        return max(acc / n, 1e-2)

    tau_1 = 0
    tau_2 = 0
    cont_rupt = 0
    detect_rupt = False
    trip = 0
    ciclo = 0

    # histórico deslizante para rugosidade (eta amostras de E3)
    hist_E3: List[float] = []

    # varre janelas de 1 ciclo
    for ciclo in range(0, len(In) - amostras, amostras):
        # ST do neutro nesta janela
        espectro = _st(In[ciclo : ciclo + amostras], 2)
        fund = espectro[1]
        h3 = espectro[3]

        e1 = energia(fund)
        e3 = energia(h3)

        # atualiza históricos
        E1 = [e1] + E1[:-1]
        E3 = [e3] + E3[:-1]
        hist_E3.append(e3)
        if len(hist_E3) > eta:
            hist_E3.pop(0)

        # rugosidade do 3º harmônico nos últimos 'eta' ciclos
        R = rugosidade(hist_E3)

        # --- Atualização de limiares (igual à lógica do paper/código, mas com α, ζ variáveis)
        if e1 <= max(E1):
            med = median(E1)
            sd = stdev(E1) if len(set(E1)) > 1 else 0.0
            gamma_ene = alfa * (med + sd)

        if len(GAMMA) > 0:
            if R <= max(GAMMA) + (stdev(GAMMA) if len(set(GAMMA)) > 1 else 0.0):
                GAMMA = [R] + GAMMA[:-1]
                sdg = stdev(GAMMA) if len(set(GAMMA)) > 1 else 0.0
                gamma_r = zeta * (max(GAMMA) + sdg)

        # --- Detecção de ruptura (aumento de energia fundamental sustentado)
        if e1 >= gamma_ene:
            tau_1 += 1
        else:
            tau_1 = 0

        if tau_1 >= N:
            detect_rupt = True

        if detect_rupt:
            cont_rupt += 1

        # --- Após aguardar C ciclos, confirma HIF usando rugosidade do 3º
        if cont_rupt >= C:
            if R >= gamma_r:
                tau_2 += 1
            else:
                tau_2 = max(tau_2 - 1, 0)

            if tau_2 >= N:
                trip = 1
                break

    return trip, ciclo // amostras

# ------------------------------------------------------------------------------
# Avaliação em lote (dataset)
# ------------------------------------------------------------------------------
def evaluate_params_on_dataset(
    method: str,
    parametros: List[float],
    data_dir: Path,
    max_pos: int | None = None,
    max_neg: int | None = None,
    seed: int = 42,
) -> Dict[str, Any]:
    """
    Executa o detector em um subconjunto (ou no total) do dataset e produz métricas.
    """
    rng = random.Random(seed)
    all_items = iter_dataset_pl4(data_dir)

    pos = [(p, y) for p, y in all_items if y == 1]
    neg = [(p, y) for p, y in all_items if y == 0]
    rng.shuffle(pos)
    rng.shuffle(neg)
    if max_pos is not None:
        pos = pos[:max_pos]
    if max_neg is not None:
        neg = neg[:max_neg]

    batch = pos + neg
    rng.shuffle(batch)

    TP = FP = TN = FN = 0
    det_times = []
    errors = 0
    used = 0
    t0 = time.time()

    for path, label in batch:
        sig = openpl4(str(path))

        # Normalização mínima: se não houver neutro explícito, compute IN = IA+IB+IC quando possível
        IA = sig.get("IA")
        IB = sig.get("IB")
        IC = sig.get("IC")
        if IA is None or IB is None or IC is None:
            bases = extract_triplet_bases(sig, max_items=1)
            if bases:
                inject_triplet_phases(sig, bases[0])
                IA, IB, IC = sig.get("IA"), sig.get("IB"), sig.get("IC")
        if (
            sig.get("IN") is None
            and IA is not None
            and IB is not None
            and IC is not None
        ):
            sig["IN"] = IA + IB + IC

        fs = float(sig.get("fs", 0.0))
        f0 = float(sig.get("f0", 60.0))
        one_cycle = int(round(fs / f0)) if (fs > 0 and f0 > 0) else 0
        if one_cycle <= 0:
            errors += 1
            continue

        if method == "2024":
            # parametros = [alpha, zeta, eta, C, N]
            if len(parametros) >= 5:
                C = int(parametros[3])
                N = int(parametros[4])
                n_cycles_total = int(
                    len(sig.get("IN", IA if IA is not None else [])) // one_cycle
                )
                if n_cycles_total < (C + N + 5):
                    continue

        if method == "2022":
            print("passou aqui 1", sig)
            print("passou aqui 2", parametros)
            res = _call_hifdm_impl(hifdm_2022_impl, sig, parametros)
        elif method == "2024":
            if IA is None or IB is None or IC is None:
                errors += 1
                continue
            trip, cyc = hifdm_2024_config(IA, IB, IC, one_cycle, parametros)
            res = DetectResult(bool(trip), float(cyc), {"raw": (trip, cyc)})
        else:
            raise ValueError("method deve ser '2022' ou '2024'")

        used += 1
        detected = res.detected
        if "error" in res.meta:
            errors += 1

        if label == 1 and detected:
            TP += 1
            if res.det_cycles is not None:
                det_times.append(res.det_cycles)
        elif label == 1 and not detected:
            FN += 1
        elif label == 0 and detected:
            FP += 1
        else:
            TN += 1

    elapsed = time.time() - t0
    tot = max(1, TP + TN + FP + FN)
    acc = (TP + TN) / tot
    tmean = (sum(det_times) / len(det_times)) if det_times else None

    return {
        "TP": TP,
        "TN": TN,
        "FP": FP,
        "FN": FN,
        "accuracy": acc,
        "tmean_cycles": tmean,
        "n_errors": errors,
        "n_eval": tot,
        "n_used": used,
        "elapsed_sec": elapsed,
    }

# ------------------------------------------------------------------------------
# Função-objetivo
# ------------------------------------------------------------------------------
def objective_from_metrics(
    m: Dict[str, Any], w_fn: float = 2.0, w_fp: float = 1.0, w_time: float = 0.05
) -> float:
    """
    Penaliza mais forte os falsos negativos (perder HIF), depois falsos positivos,
    e levemente o tempo médio de detecção (em ciclos).
    """
    TP, TN, FP, FN = m["TP"], m["TN"], m["FP"], m["FN"]
    tot_pos = max(1, TP + FN)
    tot_neg = max(1, TN + FP)

    fn_rate = FN / tot_pos
    fp_rate = FP / tot_neg
    tmean = m["tmean_cycles"] if m["tmean_cycles"] is not None else 9.0

    return (w_fn * fn_rate) + (w_fp * fp_rate) + (w_time * (tmean / 9.0))

#### Função de avaliação

In [17]:
# ----------------
# Leitura de PL4
# ----------------
def openpl4(path: str) -> Dict[str, Any]:
    dfHEAD, data, meta = readPL4(path)
    dfHEAD = convertType(dfHEAD)
    out = {"__dfHEAD__": dfHEAD, "__data__": data, "__meta__": {**meta, "filename": str(path)}}
    for idx, row in dfHEAD.iterrows():
        key = f"{row['TYPE']}:{row['FROM']}-{row['TO']}"
        out[key] = data[:, idx + 1]
    out["time"] = data[:, 0]
    out["fs"] = 1.0 / meta["deltat"]
    out["f0"] = 60.0
    return out

# ---------------------------------------------
# Dataset (assume estrutura do seu repositório)
# ---------------------------------------------
def iter_dataset_pl4(data_dir: Path) -> List[Tuple[Path, int]]:
    data_dir = Path(data_dir)
    positives = []
    for sub in ["FAI", "FAI_com_forno", "FAI_com_gd", "FAI_retificador"]:
        d = data_dir / sub
        if d.exists():
            positives += list(d.rglob("*.pl4"))
    negatives = list((data_dir / "NFAI").glob("*.pl4"))
    return [(p, 1) for p in positives] + [(p, 0) for p in negatives]

# -----------------------------------------------------------
# Descoberta de canais e seleção automática (método 2022)
# -----------------------------------------------------------
def _list_phase_currents(sig: Dict[str, Any]) -> List[str]:
    """
    Lista chaves de corrente de fase do tipo:
      - 'I-bran:BUSxA-MEDyA' / ...B / ...C
      - Fallback: chaves 'IA','IB','IC' se existirem
    """
    keys = []
    for k in sig.keys():
        if not isinstance(k, str):
            continue
        if k.startswith("I-bran:") and (k.endswith("A") or k.endswith("B") or k.endswith("C")):
            # filtro: parece corrente (não 'V-node', não '2:TACS', etc.)
            if k.split(":")[0] == "I-bran":
                keys.append(k)
    if not keys:
        for alt in ["IA","IB","IC"]:
            if alt in sig:
                keys.append(alt)
    return keys

def _choose_best_channel_for_2022(sig: Dict[str, Any], one_cycle: int) -> Tuple[str, np.ndarray]:
    """
    Critério simples e rápido: escolhe o canal com maior razão de energia (último ciclo / primeiro ciclo).
    Ideia: um HIF genuíno tende a alterar (não linearizar) o espectro/energia ao longo dos ciclos.
    """
    cand_keys = _list_phase_currents(sig)
    if not cand_keys:
        raise RuntimeError("Não encontrei canais de corrente de fase para o método 2022.")

    def energy(x: np.ndarray) -> float:
        x = np.asarray(x, dtype=float)
        return float(np.sum(x*x))

    best_key, best_score = None, -1.0
    for k in cand_keys:
        v = np.asarray(sig[k], dtype=float)
        if len(v) < 2*one_cycle:
            continue
        first = v[:one_cycle]
        last  = v[-one_cycle:]
        e1 = energy(first) + 1e-12
        e2 = energy(last)  + 1e-12
        score = e2 / e1
        if score > best_score:
            best_key, best_score = k, score

    if best_key is None:
        # Fallback: pega o primeiro candidato
        best_key = cand_keys[0]
    return best_key, np.asarray(sig[best_key], dtype=float)

# def _build_args_hifdm_2022(sig: Dict[str, Any], params: List[float]) -> Tuple[Tuple, Dict]:
#     fs = float(sig.get("fs", 0.0))
#     f0 = float(sig.get("f0", 60.0))
#     one_cycle = int(round(fs / f0)) if (fs>0 and f0>0) else 0
#     if one_cycle <= 0:
#         raise ValueError(f"fs/f0 inválidos para janela (fs={fs}, f0={f0}).")

#     ch_key, sinal_1d = _choose_best_channel_for_2022(sig, one_cycle)

#     # Remoção de média ajuda um pouco com variações DC
#     sinal_1d = sinal_1d - np.mean(sinal_1d)

#     args  = (sinal_1d,)
#     kwargs= {"janela": one_cycle, "parametros": params}
#     return args, kwargs, ch_key, one_cycle




def run_hifdm_2022(sig: Dict[str, Any], params: List[float], verbose: bool=False) -> Tuple[int, int, Dict[str, Any]]:
    """
    Chama o hifdm 2022 corretamente: retorna (trip, time, meta)
    """
    args, kwargs, ch_key, one_cycle = _build_args_hifdm_2022(sig, params)
    if verbose:
        print(f"[2022] canal='{ch_key}'  one_cycle={one_cycle}  params={params}")
    out = hifdm_2022_impl(*args, **kwargs)  # (trip, time) — assinatura do código original (paper 2022) :contentReference[oaicite:1]{index=1}
    if verbose:
        print(f"[2022] retorno hifdm: {out}")
    if not (isinstance(out, tuple) and len(out)>=2):
        raise RuntimeError(f"Retorno inesperado do hifdm_2022: {out}")
    trip, t = int(out[0]), int(out[1])
    return trip, t, {"channel": ch_key, "one_cycle": one_cycle}

# -----------------------------------------
# Smoke test (1 HIF + 1 NFAI) — OPCIONAL
# -----------------------------------------
def smoke_test_2022(data_dir: Path, params: List[float]) -> None:
    items = iter_dataset_pl4(data_dir)
    p_hif = next((p for p,y in items if y==1), None)
    items = iter_dataset_pl4(data_dir)
    p_nfi = next((p for p,y in items if y==0), None)
    if not p_hif or not p_nfi:
        print("[SMOKE] Não achei HIF e/ou NFAI no diretório.")
        return

    print("\n[SMOKE] HIF:")
    sig = openpl4(str(p_hif))
    trip, t, meta = run_hifdm_2022(sig, params, verbose=True)
    print(f"  arquivo={p_hif.name}  trip={trip}  time={t}  meta={meta}")

    print("\n[SMOKE] NFAI:")
    sig = openpl4(str(p_nfi))
    trip, t, meta = run_hifdm_2022(sig, params, verbose=True)
    print(f"  arquivo={p_nfi.name}  trip={trip}  time={t}  meta={meta}")

# ----------------------------------------------------------
# Avaliação (para usar no 'EVALUATE each candidate' do GA)
# ----------------------------------------------------------
def evaluate_params_2022_on_dataset(
    params: List[float],
    data_dir: Path,
    max_pos: int|None = 10,
    max_neg: int|None = 10,
    seed: int = 0,
    verbose_every: int = 0,
) -> Dict[str, Any]:
    rng = random.Random(seed)
    all_items = iter_dataset_pl4(data_dir)
    pos = [p for p,y in all_items if y==1]
    neg = [p for p,y in iter_dataset_pl4(data_dir) if y==0]
    rng.shuffle(pos); rng.shuffle(neg)
    if max_pos is not None: pos = pos[:max_pos]
    if max_neg is not None: neg = neg[:max_neg]
    batch = [(p,1) for p in pos] + [(p,0) for p in neg]
    rng.shuffle(batch)

    TP=TN=FP=FN=0
    det_times=[]
    used=errors=0
    t0=time.time()

    for i,(path,label) in enumerate(batch,1):
        try:
            sig = openpl4(str(path))
            trip, tc, meta = run_hifdm_2022(sig, params, verbose=False)
            used += 1
            if trip:
                if label==1: TP+=1; det_times.append(tc)
                else: FP+=1
            else:
                if label==1: FN+=1
                else: TN+=1

            if verbose_every and (i % verbose_every == 0):
                print(f"[{i}/{len(batch)}] {path.name}  label={label}  trip={trip}  t={tc}  ch={meta['channel']}")

        except Exception as e:
            errors += 1
            if verbose_every:
                print(f"[ERR] {path.name}: {e}")

    elapsed = time.time()-t0
    tot = max(1, TP+TN+FP+FN)
    acc = (TP+TN)/tot
    tmean = (sum(det_times)/len(det_times)) if det_times else None
    return {
        "TP":TP,"TN":TN,"FP":FP,"FN":FN,
        "accuracy":acc, "tmean_cycles":tmean,
        "n_eval":tot, "n_used":used, "n_errors":errors,
        "elapsed_sec":elapsed,
    }

def samples_per_cycle_from_time(sig: Dict[str, Any], default_f0: float = 60.0) -> int:
    """
    Calcula de forma robusta o número de amostras por ciclo a partir do vetor 'time'.
    Fallback: se 'time' faltar, tenta fs/f0; último fallback: 128.
    """
    import numpy as _np

    f0 = float(sig.get("f0", default_f0))
    t = sig.get("time", None)

    # 1) Preferir o passo mediano do vetor de tempo (robusto a outliers)
    if t is not None:
        arr_t = _np.asarray(t, dtype=float).reshape(-1)
        if arr_t.size >= 3:
            dt = float(_np.median(_np.diff(arr_t)))
            if dt > 0:
                fs_est = 1.0 / dt
                one_cycle = int(round(fs_est / f0))
                if one_cycle > 0:
                    return one_cycle  # << nos seus arquivos deve dar 128

    # 2) Fallback: metadados
    fs_meta = float(sig.get("fs", 0.0))
    if fs_meta > 0 and f0 > 0:
        oc = int(round(fs_meta / f0))
        if oc > 0:
            return oc

    # 3) Último recurso
    return 128



#### Teste avaliação da população

In [18]:
# ====== TESTE LOCAL ======
# 1) gera população inicial de 10 indivíduos
pop = init_population_2022(pop_size=10, seed=123, include_seeds=True)
print("População inicial (10 indivíduos):")

for i, ind in enumerate(pop, 1):
    print(f"  ind {i:02d}: {ind}")

# 2) avalia a população (método 2022) em um subset curto para rodar rápido
# print("\nAvaliando população (subset_pos=10, subset_neg=10)...")
# Semente inicialmente usada no seu projeto (paper 2022): [gamma, alfa, beta, beta_diff]
#seed_params_2022 = [0.004, 1.05, 40, 1.5]  # população inicial também usava este indivíduo :contentReference[oaicite:2]{index=2}

# 1) Smoke test rápido (um HIF e um NFAI)
#smoke_test_2022(DATA_DIR, seed_params_2022)

# 2) Avaliar em um subset pequeno (para o "EVALUATE" do GA)
'''
metrics = evaluate_params_2022_on_dataset(
    seed_params_2022, DATA_DIR, max_pos=10, max_neg=10, seed=42, verbose_every=4
)
print("\n[MÉTRICAS 2022]", metrics)
'''



População inicial (10 indivíduos):
  ind 01: [0.004, 1.05, 40, 1.5]
  ind 02: [0.0035, 1.04, 30, 1.3]
  ind 03: [0.000997, 1.027, 36, 1.437]
  ind 04: [0.008466, 1.176, 34, 1.827]
  ind 05: [0.003656, 1.172, 20, 1.246]
  ind 06: [0.005829, 1.143, 20, 1.052]
  ind 07: [0.004645, 1.027, 48, 1.598]
  ind 08: [0.000563, 1.149, 16, 2.363]
  ind 09: [0.001381, 1.037, 60, 2.365]
  ind 10: [0.003272, 1.092, 40, 1.435]


'\nmetrics = evaluate_params_2022_on_dataset(\n    seed_params_2022, DATA_DIR, max_pos=10, max_neg=10, seed=42, verbose_every=4\n)\nprint("\n[MÉTRICAS 2022]", metrics)\n'

## Fitness Function

In [19]:
# ===============================
# FITNESS para o GA (método 2022)
# ===============================

from functools import lru_cache
from typing import Optional

# 1) Definição do "custo" (menor é melhor) a partir das métricas
def cost_from_metrics(
    m: Dict[str, Any],
    w_fn: float = 3.0,     # penaliza FN mais forte (perder HIF é pior)
    w_fp: float = 1.5,     # penaliza FP (nunca queremos alarmes falsos)
    w_time: float = 0.03,  # penaliza tempo médio de detecção (fraco)
    max_cycles_norm: int = 300,  # normaliza tmean (~5 s @60Hz ≈ 300 ciclos)
) -> float:
    TP, TN, FP, FN = m["TP"], m["TN"], m["FP"], m["FN"]
    tot_pos = max(1, TP + FN)
    tot_neg = max(1, TN + FP)

    fn_rate = FN / tot_pos
    fp_rate = FP / tot_neg

    # se não houve detecções, use um default conservador
    tmean = m.get("tmean_cycles", None)
    if tmean is None:
        tmean = max_cycles_norm

    time_term = min(1.0, float(tmean) / float(max_cycles_norm))
    cost = (w_fn * fn_rate) + (w_fp * fp_rate) + (w_time * time_term)
    return float(cost)


# 2) Converte custo → fitness (maior é melhor)
def fitness_from_cost(cost: float) -> float:
    # função suave e limitada (0,1]; custo=0 → fitness=1
    return 1.0 / (1.0 + max(0.0, cost))


# 3) Avalia um indivíduo (parâmetros) com 1..N repetições para reduzir variância
def evaluate_candidate_fitness_2022(
    params: List[float],
    data_dir: Path,
    subset_pos: Optional[int] = 10,
    subset_neg: Optional[int] = 10,
    repeats: int = 2,         # repetições com seeds diferentes para estabilizar
    base_seed: int = 0,
    verbose: bool = False,
) -> Tuple[float, Dict[str, Any]]:
    """
    Retorna (fitness_médio, metrics_agregadas)
    """
    assert repeats >= 1
    agg = {"TP":0,"TN":0,"FP":0,"FN":0, "tmean_cycles":0.0, "tmean_count":0}
    costs = []

    for r in range(repeats):
        seed = base_seed + (r * 9973)
        m = evaluate_params_2022_on_dataset(
            params, data_dir, max_pos=subset_pos, max_neg=subset_neg, seed=seed, verbose_every=0
        )
        c = cost_from_metrics(m)
        costs.append(c)

        # agrega contagens
        for k in ["TP","TN","FP","FN"]:
            agg[k] += int(m.get(k,0))

        # agrega tempos de detecção (média das médias, ponderada por nº de detecções)
        if m.get("tmean_cycles") is not None and m["TP"] > 0:
            agg["tmean_cycles"] += float(m["tmean_cycles"]) * float(m["TP"])
            agg["tmean_count"]  += int(m["TP"])

        if verbose:
            print(f"[rep {r+1}/{repeats}] cost={c:.4f}  metrics={m}")

    # custo médio nas repetições
    mean_cost = sum(costs)/len(costs)
    fitness = fitness_from_cost(mean_cost)

    # fecha métricas agregadas
    agg_tot = max(1, agg["TP"] + agg["TN"] + agg["FP"] + agg["FN"])
    metrics_agg = {
        "TP": agg["TP"], "TN": agg["TN"], "FP": agg["FP"], "FN": agg["FN"],
        "accuracy": (agg["TP"] + agg["TN"]) / agg_tot,
        "tmean_cycles": (agg["tmean_cycles"]/agg["tmean_count"]) if agg["tmean_count"]>0 else None,
        "repeats": repeats,
        "mean_cost": mean_cost,
        "fitness": fitness,
    }
    return fitness, metrics_agg


# 4) Avalia a população inteira e imprime ranking
def evaluate_population_fitness_2022(
    population: List[List[float]],
    data_dir: Path,
    subset_pos: Optional[int] = 10,
    subset_neg: Optional[int] = 10,
    repeats: int = 2,
    base_seed: int = 0,
) -> List[Tuple[List[float], float, Dict[str, Any]]]:
    """
    Retorna lista de tuplas (individuo, fitness, metrics_agg), ordenada por fitness decrescente.
    """
    results = []
    for i, ind in enumerate(population, 1):
        fit, m_agg = evaluate_candidate_fitness_2022(
            ind, data_dir, subset_pos=subset_pos, subset_neg=subset_neg,
            repeats=repeats, base_seed=base_seed+i*13, verbose=False
        )
        results.append((ind, fit, m_agg))
        #print(f"[EVAL] ind {i:02d} fit={fit:.4f}  TP={m_agg['TP']} TN={m_agg['TN']} FP={m_agg['FP']} FN={m_agg['FN']}  acc={m_agg['accuracy']:.3f}")

    results.sort(key=lambda x: x[1], reverse=True)
    # print("\n== RANKING ==")
    # for rk,(ind,fit,m_agg) in enumerate(results, 1):
    #     print(f"#{rk:02d} fit={fit:.4f}  ind={ind}  acc={m_agg['accuracy']:.3f}  TP={m_agg['TP']} FP={m_agg['FP']} FN={m_agg['FN']}")
    return results


### Teste Fitness Function

In [20]:
# Exemplo: avaliar toda a população inicial com repeats=2
''' 
results = evaluate_population_fitness_2022(
    pop, DATA_DIR, subset_pos=8, subset_neg=8, repeats=2, base_seed=42
)
best_ind, best_fit, best_metrics = results[0]
print("\nMELHOR ATÉ AGORA:")
print("indivíduo:", best_ind)
print("fitness  :", round(best_fit, 4))
print("métricas :", best_metrics)
'''

' \nresults = evaluate_population_fitness_2022(\n    pop, DATA_DIR, subset_pos=8, subset_neg=8, repeats=2, base_seed=42\n)\nbest_ind, best_fit, best_metrics = results[0]\nprint("\nMELHOR ATÉ AGORA:")\nprint("indivíduo:", best_ind)\nprint("fitness  :", round(best_fit, 4))\nprint("métricas :", best_metrics)\n'

## Loop do Algoritmo Genético

In [None]:
# ==========================================
# GA COMPLETO para otimizar o método 2022
# ==========================================


# ---------- Espaço de busca ----------
BOUNDS_2022 = {
    0: (5e-6, 1e-1),   # gamma
    1: (1.01, 2),   # alfa
    2: (10, 90),       # beta (inteiro)
    3: (1.05, 2.50),   # beta_diff
}

def clamp_gene(idx: int, val: float):
    lo, hi = BOUNDS_2022[idx]
    if idx == 2:  # beta inteiro
        return int(min(hi, max(lo, round(val))))
    return float(min(hi, max(lo, val)))

# ---------- População inicial ----------
def init_population_2022(
    pop_size=10,
    seed: int = 123,
    include_seeds: bool = True,
) -> list[list[float]]:
    rng = random.Random(seed)
    pop = []

    if include_seeds:
        pop.append([0.0040, 1.05, 40, 1.50])
        pop.append([0.0035, 1.04, 30, 1.30])
        pop.append([0.003765718335758652, 1.0252561383191456, 30, 1.05])
        pop.append([0.005419895645744238, 1.027009354023173, 26, 1.0909437959596677])
        pop.append([0.005419895645744238, 1.027009354023173, 55, 2.2822170682854335])

    while len(pop) < pop_size:
        gamma = rng.uniform(*BOUNDS_2022[0])
        alfa  = rng.uniform(*BOUNDS_2022[1])
        beta  = rng.randint(int(BOUNDS_2022[2][0]), int(BOUNDS_2022[2][1]))
        bdiff = rng.uniform(*BOUNDS_2022[3])
        pop.append([gamma, alfa, int(beta), bdiff])

    return pop

# ---------- Seleção ----------
def tournament_select(
    scored_pop: list[tuple[list[float], float, dict]],
    k: int = 3,
    rng: Optional[random.Random] = None,
) -> list[float]:
    """Seleciona 1 pai via torneio (maior fitness vence)."""
    rng = rng or random
    contestants = rng.sample(scored_pop, k=min(k, len(scored_pop)))
    contestants.sort(key=lambda x: x[1], reverse=True)
    return contestants[0][0] 

# ---------- Crossover ----------
def crossover_one_point(p1: list[float], p2: list[float], rng: Optional[random.Random]=None) -> tuple[list[float], list[float]]:
    """1-ponto simples em 4 genes; trata beta (índice 2) como inteiro."""
    rng = rng or random
    if len(p1) != 4 or len(p2) != 4:
        raise ValueError("Indivíduos devem ter 4 genes.")
    cx = rng.randint(1, 3)  # ponto de corte entre 1..3
    c1 = p1[:cx] + p2[cx:]
    c2 = p2[:cx] + p1[cx:]
    # clamping (e beta inteiro)
    c1 = [clamp_gene(i, c1[i]) for i in range(4)]
    c2 = [clamp_gene(i, c2[i]) for i in range(4)]
    return c1, c2

# ---------- Mutação ----------
def mutate_2022(ind: list[float], pm: float = 0.25, rng: Optional[random.Random]=None) -> list[float]:
    """
    pm = prob. de mutar cada gene (independente).
    passos: suaves; beta em passos inteiros pequenos.
    """
    rng = rng or random
    out = ind[:]
    # gamma: ruído proporcional
    if rng.random() < pm:
        step = 0.25 * (BOUNDS_2022[0][1] - BOUNDS_2022[0][0])
        out[0] = clamp_gene(0, out[0] + rng.uniform(-step, step))
    # alfa
    if rng.random() < pm:
        step = 0.20 * (BOUNDS_2022[1][1] - BOUNDS_2022[1][0])
        out[1] = clamp_gene(1, out[1] + rng.uniform(-step, step))
    # beta (inteiro)
    if rng.random() < pm:
        step = rng.choice([-4, -3, -2, -1, 1, 2, 3, 4])
        out[2] = clamp_gene(2, out[2] + step)
    # beta_diff
    if rng.random() < pm:
        step = 0.20 * (BOUNDS_2022[3][1] - BOUNDS_2022[3][0])
        out[3] = clamp_gene(3, out[3] + rng.uniform(-step, step))
    return out

def _auto_seed():
    # entropia de sistema + relógio
    return int.from_bytes(os.urandom(8), "little") ^ int(time.time_ns() & 0xFFFFFFFF)

# ---------- GA Loop ----------
@dataclass
class GALogEntry:
    gen: int
    best_ind: list[float]
    best_fit: float
    best_metrics: dict
    mean_fit: float

def run_ga_2022(
    data_dir: Path,
    pop_size: int = 10,
    generations: int = 12,
    subset_pos: int = 8,
    subset_neg: int = 8,
    repeats: int = 2,            # repetições para reduzir variância
    base_seed: int | None = 42,
    tournament_k: int = 3,
    cx_prob: float = 0.9,
    mut_prob: float = 0.25,
    elitism: int = 2,            # quantos melhores passam direto
    patience: int = 5,           # early-stop se não melhorar
) -> tuple[list[float], float, dict, list[GALogEntry], list[tuple[list[float], float, dict]]]:

    if base_seed is None:
        base_seed = _auto_seed() 
        
    rng = random.Random(base_seed)

    # 1) População inicial
    population = init_population_2022(pop_size=pop_size, seed=base_seed, include_seeds=True)

    # print(f"=== GA 2022 - População inicial: pop_size={pop_size}  generations={generations}  repeats={repeats} ===")
    # for i, ind in enumerate(population, 1):
    #     print(f"indivíduo {i:02d}: {ind}")

    history: list[GALogEntry] = []
    best_global = (None, -1.0, None)  # (ind, fitness, metrics)
    no_improve = 0

    for gen in range(1, generations+1):
        # 2) Avaliar população
        scored = evaluate_population_fitness_2022(
            population,
            data_dir,
            subset_pos=subset_pos,
            subset_neg=subset_neg,
            repeats=repeats,
            base_seed=base_seed + gen*131,
        )
        # scored: list[(ind, fit, metrics_agg)] ordenada por fit desc
        best_ind, best_fit, best_metrics = scored[0]
        mean_fit = sum(f for _,f,_ in scored) / len(scored)
        history.append(GALogEntry(gen, best_ind, best_fit, best_metrics, mean_fit))

        #print(f"\n[GEN {gen:02d}] best_fit={best_fit:.4f}  best_ind={best_ind}  acc={best_metrics['accuracy']:.3f}  TP={best_metrics['TP']} FP={best_metrics['FP']} FN={best_metrics['FN']}  mean_fit={mean_fit:.4f}")

        # 3) Early stopping (opcional)
        if best_fit > (best_global[1] + 1e-6):
            best_global = (best_ind, best_fit, best_metrics)
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                #print(f"[EARLY-STOP] Sem melhora por {patience} gerações.")
                break

        # 4) Elitismo
        elites = [ind for ind,_,_ in scored[:elitism]]

        # 5) Nova população via seleção+crossover+mutação
        new_pop: list[list[float]] = []
        # mantém elites
        new_pop.extend(elites)

        # gera filhos até completar população
        while len(new_pop) < pop_size:
            p1 = tournament_select(scored, k=tournament_k, rng=rng)
            p2 = tournament_select(scored, k=tournament_k, rng=rng)
            if rng.random() < cx_prob:
                c1, c2 = crossover_one_point(p1, p2, rng=rng)
            else:
                c1, c2 = p1[:], p2[:]
            # mutação
            if rng.random() < 0.1:
                c1 = mutate_2022(c1, pm=mut_prob, rng=rng)
            if rng.random() < 0.1:
                c2 = mutate_2022(c2, pm=mut_prob, rng=rng)

            new_pop.append(c1)
            if len(new_pop) < pop_size:
                new_pop.append(c2)

        population = new_pop

    # resultado final
    # reavalia a população final para retornar um ranking final
    final_scored = evaluate_population_fitness_2022(
        population, data_dir,
        subset_pos=subset_pos, subset_neg=subset_neg,
        repeats=repeats, base_seed=base_seed + 999
    )
    best_ind, best_fit, best_metrics = final_scored[0]
    return best_ind, best_fit, best_metrics, history, final_scored


### Execução do Algoritmo Genético

In [None]:
# ===== Executar o GA ===== #
best_ind, best_fit, best_metrics, history, final_scored = run_ga_2022(
    DATA_DIR,
    pop_size=10,
    generations=50,      
    subset_pos=10,
    subset_neg=10,
    repeats=2,          
    base_seed=2000,
    tournament_k=3,
    cx_prob=0.9,
    mut_prob=0.1,
    elitism=4,
    patience=4,         
)

print("\n=== MELHOR INDIVÍDUO ===")
print("params   :", best_ind)       # [gamma, alfa, beta, beta_diff]
print("fitness  :", round(best_fit, 4))
print("metrics  :", best_metrics)



=== MELHOR INDIVÍDUO ===
params   : [5e-06, 1.01, 30, 1.05]
fitness  : 0.504
metrics  : {'TP': 17, 'TN': 13, 'FP': 7, 'FN': 3, 'accuracy': 0.75, 'tmean_cycles': 89.58823529411765, 'repeats': 2, 'mean_cost': 0.9840742857142857, 'fitness': 0.504013386595548}
