In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import json
from sklearn.impute import KNNImputer
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import StackingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge, ElasticNet
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from typing import Dict, Any, List, Tuple
import warnings
warnings.filterwarnings('ignore')

# ========== PREPARAÇÃO DE DADOS ==========

def get_clean_data(df: pd.DataFrame, target_col: str = "Vazao_BBR") -> pd.DataFrame:
    """
    Remove apenas linhas com valores inválidos (-1) no target.
    """
    df_clean = df[df[target_col] != -1].copy().reset_index(drop=True)
    print(f"  [DADOS LIMPOS] {len(df_clean)} amostras válidas (removidos {len(df) - len(df_clean)} com -1)")
    return df_clean

def apply_random_mask(df: pd.DataFrame, missing_fraction: float, seed: int = None) -> pd.DataFrame:
    """
    Aplica máscara aleatória para BASELINES - marca valores a serem 'escondidos' para teste.
    """
    df_masked = df.copy()
    n_samples = len(df_masked)
    n_mask = max(1, int(missing_fraction * n_samples))
    
    if seed is not None:
        np.random.seed(seed)
    
    mask_indices = np.random.choice(df_masked.index, size=n_mask, replace=False)
    df_masked['mask_applied'] = 0
    df_masked.loc[mask_indices, 'mask_applied'] = 1
    
    print(f"    Máscara aplicada: {n_mask}/{n_samples} amostras ({missing_fraction*100:.0f}%)")
    return df_masked

# ========== FEATURE ENGINEERING ==========

def engineer_features_for_imputation(df: pd.DataFrame, target_col: str = "Vazao_BBR") -> pd.DataFrame:
    """
    Feature engineering SEM data leakage para imputação.
    Regras:
    - Nunca usar o valor ATUAL do target para criar features.
    - Substitui inf/NaN por valores numéricos seguros.
    """
    df = df.copy()
    
    # ✅ Garantir que Data está como datetime e criar features temporais básicas
    if 'Data' in df.columns:
        df['Data'] = pd.to_datetime(df['Data'])
        df['hour'] = df['Data'].dt.hour
        df['day_of_week'] = df['Data'].dt.dayofweek
        df['day_of_month'] = df['Data'].dt.day

    # ✅ Features derivadas de outras colunas (não do target)
    df["Atraso_log"] = np.log1p(df["Atraso(ms)"].clip(lower=0))
    df["Hop_inv"] = 1 / (df["Hop_count"] + 1)
    df["Atraso_x_Hop"] = df["Atraso(ms)"] * df["Hop_count"]
    df["Atraso_sq"] = df["Atraso(ms)"] ** 2
    df["Hop_sq"] = df["Hop_count"] ** 2

    if 'hour' in df.columns:
        df["Atraso_x_hour"] = df["Atraso(ms)"] * df["hour"]
        df["Hop_x_hour"] = df["Hop_count"] * df["hour"]
        df["hour_sin"] = np.sin(2 * np.pi * df["hour"] / 24)
        df["hour_cos"] = np.cos(2 * np.pi * df["hour"] / 24)

    # ✅ Features do target baseadas em valores passados
    valid_mask = df[target_col] != -1
    target_series = df[target_col].copy()
    target_series[~valid_mask] = np.nan

    # Lags
    for lag in [1, 2, 3, 6]:
        df[f"Vazao_lag{lag}"] = target_series.shift(lag)

    # Diferenças e variações percentuais
    df["Vazao_diff1"] = target_series.diff(1)
    df["Vazao_diff2"] = target_series.diff(2)
    df["Vazao_pct_change"] = target_series.pct_change()

    # Rolling features (sem vazamento)
    for w in [3, 6]:
        shifted = target_series.shift(1)
        df[f"Vazao_roll_mean_{w}"] = shifted.rolling(window=w, min_periods=w).mean()
        df[f"Vazao_roll_std_{w}"] = shifted.rolling(window=w, min_periods=w).std()
    df["Vazao_roll_max_6"] = shifted.rolling(window=6, min_periods=6).max()
    df["Vazao_roll_min_6"] = shifted.rolling(window=6, min_periods=6).min()

    # Relações derivadas
    lag1 = target_series.shift(1)
    df["Vazao_lag1_div_Atraso"] = lag1 / (df["Atraso(ms)"] + 1)
    df["Vazao_lag1_div_Hops"] = lag1 / (df["Hop_count"] + 1)
    df["Efficiency_lag1"] = lag1 / ((df["Atraso(ms)"] + 1) * (df["Hop_count"] + 1))

    # Transformações seguras
    df["Vazao_lag1_log"] = np.log1p(lag1.clip(lower=0))
    df["Vazao_lag1_sqrt"] = np.sqrt(lag1.clip(lower=0))

    # Estatísticas de janela expandida
    df["Vazao_expanding_mean"] = target_series.shift(1).expanding(min_periods=1).mean()
    df["Vazao_expanding_std"] = target_series.shift(1).expanding(min_periods=3).std()

    # Medidas globais do target
    df['Feature_Vazao_bbr_median'] = target_series.median()
    df['Feature_Vazao_bbr_mean'] = target_series.mean()

    # Restaurar valores -1 originais
    df.loc[~valid_mask, target_col] = -1

    # ✅ Tratamento final: substituir infinitos e NaNs
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    for col in df.select_dtypes(include=[np.number]).columns:
        if col == target_col:
            continue
        if df[col].isna().any():
            if any(k in col for k in ['lag', 'roll', 'diff', 'pct', 'expanding']):
                df[col].fillna(0, inplace=True)
            else:
                df[col].fillna(df[col].median(), inplace=True)

    return df


# ========== CÁLCULO DE MÉTRICAS ==========

def calculate_metrics(y_true: np.ndarray, y_pred: np.ndarray, prediction_time: float = None) -> Dict[str, Any]:
    """
    Calcula métricas de regressão de forma robusta.
    """
    # Garantir arrays numpy
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()
    
    # Remover valores inválidos
    mask = ~(np.isnan(y_true) | np.isnan(y_pred) | np.isinf(y_true) | np.isinf(y_pred))
    
    if mask.sum() < 2:
        return {"rmse": None, "nrmse": None, "r2": None, "mape": None, "prediction_time_per_sample": None}
    
    y_true = y_true[mask]
    y_pred = y_pred[mask]
    
    try:
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        nrmse = (rmse / (np.mean(y_true) + 1e-8)) * 100
        rmse_normalized = rmse / 1_000_000  # Para milhões
        
        r2 = r2_score(y_true, y_pred)
        r2 = r2 if not np.isnan(r2) and np.isfinite(r2) else None
        
        mape = np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + 1e-8))) * 100
        
        # Calcular tempo por amostra (em milissegundos)
        time_per_sample = None
        if prediction_time is not None and len(y_true) > 0:
            time_per_sample = round((prediction_time / len(y_true)) * 1000, 4)  # ms por amostra
        
        return {
            "rmse": round(rmse_normalized, 2),
            "nrmse": round(nrmse, 2),
            "r2": r2,
            "mape": round(mape, 2),
            "prediction_time_per_sample": time_per_sample  # ms por amostra
        }
    except:
        return {"rmse": None, "nrmse": None, "r2": None, "mape": None, "prediction_time_per_sample": None}

# ========== AVALIAÇÃO BASELINES ==========

def evaluate_baselines(df_clean: pd.DataFrame, missing_fraction: float, target_col: str = "Vazao_BBR") -> Dict:
    """
    Avalia métodos baseline:
    1. Aplica máscara aleatória (esconde valores)
    2. Salva valores originais
    3. Imputa com cada método
    4. Calcula métricas comparando com valores originais
    """
    print(f"  [BASELINE] Avaliando fração {missing_fraction:.0%}")
    
    # Aplicar máscara
    df_masked = apply_random_mask(df_clean, missing_fraction, seed=42)
    
    # Guardar valores originais
    mask_indices = df_masked[df_masked['mask_applied'] == 1].index
    y_true = df_masked.loc[mask_indices, target_col].values
    
    results = {}
    
    # Criar cópia com NaN nos valores mascarados
    df_with_nan = df_masked.copy()
    df_with_nan.loc[mask_indices, target_col] = np.nan
    
    # ===== MÉDIA =====
    try:
        df_mean = df_with_nan.copy()
        mean_value = df_mean[target_col].mean()
        df_mean[target_col] = df_mean[target_col].fillna(mean_value)
        y_pred = df_mean.loc[mask_indices, target_col].values
        results['Mean'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['Mean'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    # ===== MEDIANA =====
    try:
        df_median = df_with_nan.copy()
        median_value = df_median[target_col].median()
        df_median[target_col] = df_median[target_col].fillna(median_value)
        y_pred = df_median.loc[mask_indices, target_col].values
        results['Median'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['Median'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    # ===== KNN IMPUTER =====
    try:
        df_knn = df_with_nan.copy()
        imputer = KNNImputer(n_neighbors=min(5, len(df_clean) // 2))
        df_knn[target_col] = imputer.fit_transform(df_knn[[target_col]]).ravel()
        y_pred = df_knn.loc[mask_indices, target_col].values
        results['KNNImputer'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['KNNImputer'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    # ===== FORWARD FILL =====
    try:
        df_ffill = df_with_nan.copy()
        df_ffill[target_col] = df_ffill[target_col].ffill().bfill()
        y_pred = df_ffill.loc[mask_indices, target_col].values
        results['ForwardFill'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['ForwardFill'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    # ===== BACKWARD FILL =====
    try:
        df_bfill = df_with_nan.copy()
        df_bfill[target_col] = df_bfill[target_col].bfill().ffill()
        y_pred = df_bfill.loc[mask_indices, target_col].values
        results['BackwardFill'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['BackwardFill'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    # ===== ROLLING MEAN =====
    try:
        df_rolling = df_with_nan.copy()
        df_rolling[target_col] = df_rolling[target_col].rolling(window=3, min_periods=1).mean()
        df_rolling[target_col] = df_rolling[target_col].ffill().bfill()
        y_pred = df_rolling.loc[mask_indices, target_col].values
        results['RollingMean'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['RollingMean'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    # ===== INTERPOLAÇÃO LINEAR =====
    try:
        df_linear = df_with_nan.copy()
        df_linear[target_col] = df_linear[target_col].interpolate(method='linear').ffill().bfill()
        y_pred = df_linear.loc[mask_indices, target_col].values
        results['LinearInterpolation'] = calculate_metrics(y_true, y_pred)
    except Exception as e:
        results['LinearInterpolation'] = {"rmse": None, "nrmse": None, "r2": None, "mape": None}
    
    return results

# ========== TREINAMENTO DE MODELOS STACKING ==========

def train_stacking_models(X_train, y_train, X_test, y_test):
    """
    Treina modelos base e stacking de forma simplificada.
    """
    # Escalar dados
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    
    y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
    
    # Modelos base otimizados
    base_models = [
        ('xgb', XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.1, 
                             random_state=42, verbosity=0)),
        ('rf', RandomForestRegressor(n_estimators=100, max_depth=10, 
                                      random_state=42, n_jobs=-1)),
        ('gb', GradientBoostingRegressor(n_estimators=100, max_depth=3, 
                                         learning_rate=0.1, random_state=42)),
        ('knn', KNeighborsRegressor(n_neighbors=min(5, len(X_train) // 3), 
                                    weights='distance')),
    ]
    
    # Meta-modelo
    meta_model = Ridge(alpha=1.0)
    
    # Stacking
    stacking = StackingRegressor(
        estimators=base_models,
        final_estimator=meta_model,
        cv=min(3, len(X_train) // 10),
        n_jobs=-1
    )
    
    # Treinar
    stacking.fit(X_train_scaled, y_train_scaled)
    
    # Predição
    y_pred_scaled = stacking.predict(X_test_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
    
    return stacking, scaler_X, scaler_y

# ========== AVALIAÇÃO STACKING COM CROSS-VALIDATION ==========

def evaluate_stacking_cv(df_clean: pd.DataFrame, test_fraction: float, target_col: str = "Vazao_BBR") -> Dict:
    """
    Avalia Stacking usando Time Series Split (cross-validation temporal),
    evitando vazamento de dados (features calculadas separadamente em cada fold).
    O test_fraction determina o tamanho do conjunto de teste em cada fold.
    """
    import time 
    
    print(f"  [STACKING] Avaliando com test_fraction={test_fraction:.0%}")
    
    # Configurar cross-validation temporal
    n_splits = max(3, min(5, int(1 / test_fraction))) 
    tscv = TimeSeriesSplit(n_splits=n_splits)
    
    all_metrics = []
    all_prediction_times = []  # ✅ Nova lista para tempos
    print(f"    Cross-validation com {n_splits} splits")

    for fold, (train_idx, test_idx) in enumerate(tscv.split(df_clean)):
        # Ajustar tamanho do teste conforme test_fraction
        desired_test_size = int(len(df_clean) * test_fraction)
        if len(test_idx) > desired_test_size:
            test_idx = test_idx[-desired_test_size:]

        df_train = df_clean.iloc[train_idx].copy()
        df_test = df_clean.iloc[test_idx].copy()

        # --- ✅ Ajuste crítico: engenharia de features separada por fold ---
        df_train = engineer_features_for_imputation(df_train, target_col)
        df_test = engineer_features_for_imputation(df_test, target_col)

        # Selecionar features (excluir target e colunas não numéricas)
        exclude_cols = {target_col, 'Data', 'mask_applied'}
        feature_cols = [c for c in df_train.columns if c not in exclude_cols]

        df_train = df_train.dropna(subset=feature_cols + [target_col])
        df_test = df_test.dropna(subset=feature_cols + [target_col])

        if df_train.empty or df_test.empty:
            print(f"      ⚠️ Fold {fold+1} ignorado (dados insuficientes após limpeza).")
            continue

        X_train, y_train = df_train[feature_cols].fillna(0).values, df_train[target_col].values
        X_test, y_test = df_test[feature_cols].fillna(0).values, df_test[target_col].values

        print(f"      Fold {fold+1}: treino={len(X_train)}, teste={len(X_test)} ({len(X_test)/len(df_clean)*100:.1f}%)")

        try:
            # Escalonamento
            scaler_X = StandardScaler()
            scaler_y = StandardScaler()

            X_train_scaled = scaler_X.fit_transform(X_train)
            X_test_scaled = scaler_X.transform(X_test)
            y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()

            # Modelos base
            base_models = [
                ('xgb', XGBRegressor(
                    n_estimators=100, max_depth=3, learning_rate=0.1,
                    random_state=42, verbosity=0
                )),
                ('rf', RandomForestRegressor(
                    n_estimators=100, max_depth=10,
                    random_state=42, n_jobs=-1
                )),
                ('gb', GradientBoostingRegressor(
                    n_estimators=100, max_depth=3,
                    learning_rate=0.1, random_state=42
                )),
                ('knn', KNeighborsRegressor(
                    n_neighbors=min(5, len(X_train)//3), weights='distance'
                )),
            ]

            # Modelo de stacking
            stacking = StackingRegressor(
                estimators=base_models,
                final_estimator=Ridge(alpha=1.0),
                cv=min(3, len(X_train)//10),
                n_jobs=-1
            )

            # Treinar
            stacking.fit(X_train_scaled, y_train_scaled)
            
            # ✅ MEDIR TEMPO DE PREDIÇÃO
            start_time = time.time()
            y_pred_scaled = stacking.predict(X_test_scaled)
            prediction_time = time.time() - start_time
            
            y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

            # Calcular métricas com tempo
            metrics = calculate_metrics(y_test, y_pred, prediction_time)

            if metrics['rmse'] is not None:
                all_metrics.append(metrics)
                all_prediction_times.append(prediction_time)
                print(f"        ✅ RMSE: {metrics['rmse']:.2f}M, R²: {metrics['r2']:.4f}, "
                      f"Tempo: {metrics['prediction_time_per_sample']:.4f}ms/amostra")

        except Exception as e:
            print(f"        ⚠️ Erro no fold {fold+1}: {e}")
            continue

    # Cálculo da média das métricas
    if all_metrics:
        avg_metrics = {}
        for metric in ['rmse', 'nrmse', 'r2', 'mape', 'prediction_time_per_sample']:
            values = [m[metric] for m in all_metrics if m[metric] is not None]
            avg_metrics[metric] = round(np.mean(values), 4) if values else None
        
        # ✅ Adicionar estatísticas de tempo total
        if all_prediction_times:
            avg_metrics['total_prediction_time'] = round(np.mean(all_prediction_times), 4)  # segundos
            avg_metrics['total_samples_predicted'] = sum(len(m) for m in [
                df_clean.iloc[test_idx] for _, test_idx in tscv.split(df_clean)
            ] if len(test_idx) <= int(len(df_clean) * test_fraction))

        print(f"    📊 Média - RMSE: {avg_metrics['rmse']:.2f}M, R²: {avg_metrics['r2']:.4f}, "
              f"Tempo: {avg_metrics['prediction_time_per_sample']:.4f}ms/amostra")
        return avg_metrics
    else:
        print(f"    ❌ Nenhuma métrica válida calculada")
        return {"rmse": None, "nrmse": None, "r2": None, "mape": None, "prediction_time_per_sample": None}


# ========== PIPELINE COMPLETO DE AVALIAÇÃO ==========

def evaluate_file(file_path: Path, missing_fractions: List[float]) -> Dict:
    """
    Pipeline completo de avaliação para um arquivo CSV.
    """
    print(f"\n{'='*80}")
    print(f"[AVALIANDO] {file_path.name}")
    print(f"{'='*80}")
    
    # Carregar dados
    df = pd.read_csv(file_path)
    source = file_path.stem.replace("_merged", "").replace("_largest_subseries", "")
    target_col = "Vazao_BBR"
    
    # Limpar dados (remover apenas -1)
    df_clean = get_clean_data(df, target_col)
    
    if len(df_clean) < 20:
        print(f"  ⚠️ Dados insuficientes: apenas {len(df_clean)} amostras")
        return None
    
    results = {}
    
    # Avaliar para cada fração de missing
    for frac in missing_fractions:
        print(f"\n  {'─'*60}")
        print(f"  FRAÇÃO DE MISSING: {frac:.0%}")
        print(f"  {'─'*60}")
        
        # 1. BASELINES: máscara aleatória + imputação
        baseline_results = evaluate_baselines(df_clean, frac, target_col)
        
        # 2. STACKING: cross-validation com test_fraction
        stacking_results = evaluate_stacking_cv(df_clean, frac, target_col)
        
        # Armazenar resultados
        results[str(frac)] = {
            "baseline": baseline_results,
            "stacking": {
                "mean": {
                    "StackingRegressor": stacking_results
                }
            }
        }
    
    return {
        "source": source,
        "results": results,
        "n_samples": len(df_clean)
    }

# ========== ANÁLISE DE DESEMPENHO ==========

def analyze_stacking_performance(results: Dict) -> Dict[str, Any]:
    """
    Analisa se o stacking foi melhor que as baselines.
    Retorna estatísticas de vitórias e métricas médias.
    """
    stacking_wins = 0
    total_comparisons = 0
    stacking_rmse_list = []
    best_baseline_rmse_list = []
    prediction_times = []  # ✅ Nova lista
    
    for frac, data in results.items():
        baseline_data = data.get("baseline", {})
        stacking_data = data.get("stacking", {}).get("mean", {}).get("StackingRegressor", {})
        
        if not baseline_data or not stacking_data:
            continue
        
        stacking_rmse = stacking_data.get("rmse")
        if stacking_rmse is None:
            continue
        
        # ✅ Coletar tempo de predição
        pred_time = stacking_data.get("prediction_time_per_sample")
        if pred_time is not None:
            prediction_times.append(pred_time)
        
        # Encontrar melhor baseline
        baseline_rmses = [
            metrics["rmse"] for metrics in baseline_data.values() 
            if metrics.get("rmse") is not None
        ]
        
        if not baseline_rmses:
            continue
        
        best_baseline_rmse = min(baseline_rmses)
        
        total_comparisons += 1
        stacking_rmse_list.append(stacking_rmse)
        best_baseline_rmse_list.append(best_baseline_rmse)
        
        if stacking_rmse < best_baseline_rmse:
            stacking_wins += 1
    
    if total_comparisons == 0:
        return {
            "should_impute": False,
            "win_rate": 0.0,
            "avg_improvement": 0.0,
            "total_comparisons": 0,
            "avg_prediction_time_per_sample": None
        }
    
    win_rate = stacking_wins / total_comparisons
    avg_stacking_rmse = np.mean(stacking_rmse_list)
    avg_best_baseline_rmse = np.mean(best_baseline_rmse_list)
    avg_improvement = ((avg_best_baseline_rmse - avg_stacking_rmse) / avg_best_baseline_rmse) * 100
    
    # ✅ Calcular tempo médio
    avg_pred_time = round(np.mean(prediction_times), 4) if prediction_times else None
    
    return {
        "should_impute": win_rate >= 0.5,  # Stacking melhor em >= 50% dos casos
        "win_rate": win_rate,
        "avg_improvement": avg_improvement,
        "total_comparisons": total_comparisons,
        "stacking_wins": stacking_wins,
        "avg_stacking_rmse": avg_stacking_rmse,
        "avg_baseline_rmse": avg_best_baseline_rmse,
        "avg_prediction_time_per_sample": avg_pred_time  # ✅ ms por amostra
    }

# ========== IMPUTAÇÃO DE DADOS ==========

def impute_with_baselines(df: pd.DataFrame, target_col: str, output_dir: Path, source: str):
    """
    Imputa valores -1 usando métodos baseline.
    """
    mask_missing = df[target_col] == -1
    n_missing = mask_missing.sum()
    
    if n_missing == 0:
        return
    
    print(f"    [BASELINES] Imputando {n_missing} valores...")
    
    # Dados válidos para calcular estatísticas
    df_clean = df[df[target_col] != -1].copy()
    
    # Adicionar coluna de rastreamento
    cols_to_save = ["Data", "Atraso(ms)", "Hop_count", "Bottleneck", target_col, "is_imputed"]
    
    # MÉDIA
    try:
        df_mean = df.copy()
        df_mean['is_imputed'] = 0
        df_mean.loc[mask_missing, 'is_imputed'] = 1
        df_mean.loc[mask_missing, target_col] = df_clean[target_col].mean()
        df_mean[cols_to_save].to_csv(output_dir / f"{source}_baseline_mean.csv", index=False)
        print(f"      ✓ Mean: {output_dir / f'{source}_baseline_mean.csv'}")
    except Exception as e:
        print(f"      ✗ Mean falhou: {e}")
    
    # MEDIANA
    try:
        df_median = df.copy()
        df_median['is_imputed'] = 0
        df_median.loc[mask_missing, 'is_imputed'] = 1
        df_median.loc[mask_missing, target_col] = df_clean[target_col].median()
        df_median[cols_to_save].to_csv(output_dir / f"{source}_baseline_median.csv", index=False)
        print(f"      ✓ Median: {output_dir / f'{source}_baseline_median.csv'}")
    except Exception as e:
        print(f"      ✗ Median falhou: {e}")
    
    # KNN
    try:
        df_knn = df.copy()
        df_knn['is_imputed'] = 0
        df_knn.loc[mask_missing, 'is_imputed'] = 1
        imputer = KNNImputer(n_neighbors=min(5, len(df_clean) // 2))
        df_knn[target_col] = imputer.fit_transform(df[[target_col]].replace(-1, np.nan)).ravel()
        df_knn[cols_to_save].to_csv(output_dir / f"{source}_baseline_knn.csv", index=False)
        print(f"      ✓ KNN: {output_dir / f'{source}_baseline_knn.csv'}")
    except Exception as e:
        print(f"      ✗ KNN falhou: {e}")
    
    # FORWARD FILL
    try:
        df_ffill = df.copy()
        df_ffill['is_imputed'] = 0
        df_ffill.loc[mask_missing, 'is_imputed'] = 1
        df_ffill[target_col] = df_ffill[target_col].replace(-1, np.nan).ffill().bfill()
        df_ffill[cols_to_save].to_csv(output_dir / f"{source}_baseline_ffill.csv", index=False)
        print(f"      ✓ ForwardFill: {output_dir / f'{source}_baseline_ffill.csv'}")
    except Exception as e:
        print(f"      ✗ ForwardFill falhou: {e}")
    
    # BACKWARD FILL
    try:
        df_bfill = df.copy()
        df_bfill['is_imputed'] = 0
        df_bfill.loc[mask_missing, 'is_imputed'] = 1
        df_bfill[target_col] = df_bfill[target_col].replace(-1, np.nan).bfill().ffill()
        df_bfill[cols_to_save].to_csv(output_dir / f"{source}_baseline_bfill.csv", index=False)
        print(f"      ✓ BackwardFill: {output_dir / f'{source}_baseline_bfill.csv'}")
    except Exception as e:
        print(f"      ✗ BackwardFill falhou: {e}")

def impute_with_stacking(df: pd.DataFrame, target_col: str, output_dir: Path, source: str):
    """
    Imputa valores -1 usando Stacking Regressor - VERSÃO MELHORADA.
    """
    mask_missing = df[target_col] == -1
    n_missing = mask_missing.sum()
    
    if n_missing == 0:
        return
    
    print(f"    [STACKING] Imputando {n_missing} valores...")
    
    try:
        # Separar dados válidos e missing
        df_clean = df[df[target_col] != -1].copy()
        
        if len(df_clean) < 10:
            print(f"      ✗ Dados insuficientes para treinar stacking")
            return
        
        # Feature engineering MELHORADO nos dados válidos
        df_clean_feat = engineer_features_for_imputation(df_clean, target_col)
        
        # Preparar features (excluir colunas não numéricas e target)
        exclude_cols = {target_col, 'Data', 'mask_applied'}
        feature_cols = [c for c in df_clean_feat.columns 
                       if c not in exclude_cols and pd.api.types.is_numeric_dtype(df_clean_feat[c])]
        
        X_train = df_clean_feat[feature_cols].fillna(0).values
        y_train = df_clean_feat[target_col].values
        
        print(f"      Treinando com {len(X_train)} amostras e {len(feature_cols)} features")
        
        # Treinar modelo completo
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()
        
        X_train_scaled = scaler_X.fit_transform(X_train)
        y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
        
        # Modelos base com mais diversidade
        base_models = [
            ('xgb', XGBRegressor(n_estimators=150, max_depth=4, learning_rate=0.05, 
                                random_state=42, verbosity=0, subsample=0.8)),
            ('rf', RandomForestRegressor(n_estimators=150, max_depth=15, 
                                         random_state=42, n_jobs=-1, min_samples_split=3)),
            ('gb', GradientBoostingRegressor(n_estimators=150, max_depth=4, 
                                            learning_rate=0.05, random_state=42, subsample=0.8)),
            ('knn', KNeighborsRegressor(n_neighbors=min(7, len(X_train) // 4), 
                                       weights='distance', p=1)),
        ]
        
        # Meta-modelo com regularização
        stacking = StackingRegressor(
            estimators=base_models,
            final_estimator=Ridge(alpha=10.0),  # Mais regularização
            cv=min(5, len(X_train) // 10),
            n_jobs=-1,
            passthrough=True  # Incluir features originais
        )
        
        print(f"      Treinando ensemble...")
        stacking.fit(X_train_scaled, y_train_scaled)
        
        # IMPUTAR LINHA POR LINHA para capturar dependências temporais
        df_imputed = df.copy()
        df_imputed['is_imputed'] = 0
        
        missing_indices = df[mask_missing].index.tolist()
        imputed_values = []
        
        print(f"      Imputando {len(missing_indices)} valores...")
        
        for idx in missing_indices:
            # Criar DataFrame temporário até este ponto
            df_temp = df_imputed.iloc[:idx+1].copy()
            
            # Para a última linha, usar valor já imputado (se houver) ou mediana temporária
            if df_temp.loc[idx, target_col] == -1:
                # Usar mediana dos valores válidos até agora
                valid_values = df_temp[df_temp[target_col] != -1][target_col]
                if len(valid_values) > 0:
                    df_temp.loc[idx, target_col] = valid_values.median()
                else:
                    df_temp.loc[idx, target_col] = df_clean[target_col].median()
            
            # Feature engineering
            df_temp_feat = engineer_features_for_imputation(df_temp, target_col)
            
            # Extrair features da última linha
            X_pred = df_temp_feat.iloc[-1:][feature_cols].fillna(0).values
            X_pred_scaled = scaler_X.transform(X_pred)
            
            # Predizer
            y_pred_scaled = stacking.predict(X_pred_scaled)
            y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()[0]
            
            # Aplicar predição
            df_imputed.loc[idx, target_col] = y_pred
            df_imputed.loc[idx, 'is_imputed'] = 1
            imputed_values.append(y_pred)
        
        # Salvar resultado
        cols_to_save = ["Data", "Atraso(ms)", "Hop_count", "Bottleneck", target_col, "is_imputed"]
        output_file = output_dir / f"{source}_stacking.csv"
        df_imputed[cols_to_save].to_csv(output_file, index=False)
        
        # Estatísticas das imputações
        imputed_array = np.array(imputed_values)
        print(f"      ✓ Stacking: {output_file}")
        print(f"      ✓ Valores imputados: {n_missing}")
        print(f"      ✓ Média: {np.mean(imputed_array)/1e6:.2f}M")
        print(f"      ✓ Desvio: {np.std(imputed_array)/1e6:.2f}M")
        print(f"      ✓ Min: {np.min(imputed_array)/1e6:.2f}M, Max: {np.max(imputed_array)/1e6:.2f}M")
        print(f"      ✓ Valores únicos: {len(np.unique(imputed_array))}/{n_missing}")
        
    except Exception as e:
        print(f"      ✗ Stacking falhou: {e}")
        import traceback
        traceback.print_exc()

def intelligent_imputation(file_path: Path, results: Dict, output_dir: Path):
    """
    Decide se deve imputar baseado nos resultados da avaliação.
    Se stacking for melhor, imputa com todos os métodos (baselines + stacking).
    Caso contrário, apenas salva baselines.
    """
    source = results["source"]
    analysis = analyze_stacking_performance(results["results"])
    
    print(f"\n{'='*80}")
    print(f"[ANÁLISE DE DESEMPENHO] {source}")
    print(f"{'='*80}")
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
    print(f"  RMSE Médio Stacking: {analysis['avg_stacking_rmse']:.2f}M")
    print(f"  RMSE Médio Melhor Baseline: {analysis['avg_baseline_rmse']:.2f}M")
    print(f"  Melhoria Média: {analysis['avg_improvement']:.2f}%")
    
    # ✅ Exibir tempo de predição
    if analysis.get('avg_prediction_time_per_sample') is not None:
        print(f"  ⏱️  Tempo Médio de Predição: {analysis['avg_prediction_time_per_sample']:.4f}ms/amostra")
    
    # Carregar dados originais
    df = pd.read_csv(file_path)
    target_col = "Vazao_BBR"
    
    # Verificar se há valores para imputar
    n_missing = (df[target_col] == -1).sum()
    
    if n_missing == 0:
        print(f"  ℹ️  Nenhum valor faltante para imputar")
        return
    
    print(f"  📊 {n_missing} valores faltantes encontrados")
    
    # Criar diretório de saída
    output_dir.mkdir(parents=True, exist_ok=True)
    
    if analysis['should_impute']:
        print(f"  ✅ DECISÃO: Stacking melhor - IMPUTANDO COM TODOS OS MÉTODOS")
        print(f"\n  {'─'*60}")
        
        # Imputar com baselines
        impute_with_baselines(df, target_col, output_dir, source)
        
        # Imputar com stacking
        impute_with_stacking(df, target_col, output_dir, source)
        
        print(f"  {'─'*60}")
        print(f"  ✅ Imputação concluída para {source}")
        
    else:
        print(f"  ❌ DECISÃO: Stacking NÃO foi melhor - APENAS BASELINES")
        print(f"\n  {'─'*60}")
        
        # Imputar apenas com baselines
        impute_with_baselines(df, target_col, output_dir, source)
        
        print(f"  {'─'*60}")
        print(f"  ⚠️  Stacking não aplicado para {source}")

# ========== EXECUÇÃO PRINCIPAL ==========

def main():
    """
    Executa avaliação completa e imputação inteligente em todos os arquivos CSV.
    """
    # Configuração
    data_path = Path("../../datasets/multivariada-post-process")
    results_path = Path("../../results")
    imputed_path = Path("../../datasets/multivariada-imputed")
    
    results_path.mkdir(exist_ok=True, parents=True)
    imputed_path.mkdir(exist_ok=True, parents=True)
    
    csv_files = list(data_path.glob("*_merged.csv"))
    missing_fractions = [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    
    print(f"\n{'='*80}")
    print(f"PIPELINE COMPLETO: AVALIAÇÃO + IMPUTAÇÃO INTELIGENTE")
    print(f"{'='*80}")
    print(f"Arquivos encontrados: {len(csv_files)}")
    print(f"Frações de missing: {missing_fractions}")
    print(f"Pasta de resultados: {results_path}")
    print(f"Pasta de dados imputados: {imputed_path}")
    print(f"{'='*80}\n")
    
    all_results = {}
    summary = {
        "total_files": len(csv_files),
        "processed": 0,
        "stacking_used": 0,
        "baseline_only": 0,
        "failed": 0
    }
    
    for i, file_path in enumerate(csv_files, 1):
        print(f"\n{'#'*80}")
        print(f"[{i}/{len(csv_files)}] Processando: {file_path.name}")
        print(f"{'#'*80}")
        
        try:
            # FASE 1: AVALIAÇÃO
            print(f"\n{'='*80}")
            print(f"FASE 1: AVALIAÇÃO")
            print(f"{'='*80}")
            
            result = evaluate_file(file_path, missing_fractions)
            
            if result is None:
                print(f"  ⚠️ Arquivo ignorado (dados insuficientes)")
                summary["failed"] += 1
                continue
            
            source = result["source"]
            all_results[source] = result["results"]
            
            # Salvar resultados parciais da avaliação
            evaluation_file = results_path / "metrics_summary_complete.json"
            with open(evaluation_file, 'w') as f:
                json.dump(all_results, f, indent=4)
            
            print(f"\n  💾 Avaliação salva em: {evaluation_file}")
            
            # FASE 2: IMPUTAÇÃO INTELIGENTE
            print(f"\n{'='*80}")
            print(f"FASE 2: IMPUTAÇÃO INTELIGENTE")
            print(f"{'='*80}")
            
            intelligent_imputation(file_path, result, imputed_path)
            
            # Atualizar sumário
            analysis = analyze_stacking_performance(result["results"])
            summary["processed"] += 1
            if analysis['should_impute']:
                summary["stacking_used"] += 1
            else:
                summary["baseline_only"] += 1
            
            print(f"\n  ✅ Concluído: {source}")
            
        except Exception as e:
            print(f"\n  ❌ Erro processando {file_path.name}: {e}")
            import traceback
            traceback.print_exc()
            summary["failed"] += 1
            continue
    
    # RELATÓRIO FINAL
    print(f"\n{'='*80}")
    print(f"RELATÓRIO FINAL")
    print(f"{'='*80}")
    print(f"Total de arquivos: {summary['total_files']}")
    print(f"Processados com sucesso: {summary['processed']}")
    print(f"  ├─ Stacking usado: {summary['stacking_used']} ({summary['stacking_used']/max(1,summary['processed'])*100:.1f}%)")
    print(f"  └─ Apenas baselines: {summary['baseline_only']} ({summary['baseline_only']/max(1,summary['processed'])*100:.1f}%)")
    print(f"Falhas: {summary['failed']}")
    print(f"\n📁 Resultados salvos em:")
    print(f"  ├─ Avaliação: {results_path / 'metrics_summary_complete.json'}")
    print(f"  └─ Dados imputados: {imputed_path}")
    print(f"{'='*80}\n")
    
    # Salvar sumário
    summary_file = results_path / "imputation_summary.json"
    with open(summary_file, 'w') as f:
        json.dump(summary, f, indent=4)
    
    print(f"📊 Sumário salvo em: {summary_file}\n")

if __name__ == "__main__":
    main()


PIPELINE COMPLETO: AVALIAÇÃO + IMPUTAÇÃO INTELIGENTE
Arquivos encontrados: 615
Frações de missing: [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
Pasta de resultados: ..\..\results
Pasta de dados imputados: ..\..\datasets\multivariada-imputed


################################################################################
[1/615] Processando: ac-am_merged.csv
################################################################################

FASE 1: AVALIAÇÃO

[AVALIANDO] ac-am_merged.csv
  [DADOS LIMPOS] 1326 amostras válidas (removidos 885 com -1)

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 20%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 20%
    Máscara aplicada: 265/1326 amostras (20%)
  [STACKING] Avaliando com test_fraction=20%
    Cross-validation com 5 splits
      Fold 1: treino=221, teste=221 (16.7%)
        ✅ RMSE: 89.85M, R²: 0.3506, Tempo: 0.2843ms/amostra
      Fold 2: treino=442, teste=221 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

        ✅ RMSE: 139.31M, R²: -3.4892, Tempo: 0.2845ms/amostra
      Fold 2: treino=441, teste=218 (16.6%)
        ✅ RMSE: 18.65M, R²: 0.8306, Tempo: 0.2229ms/amostra
      Fold 3: treino=659, teste=218 (16.6%)
        ✅ RMSE: 53.74M, R²: 0.4323, Tempo: 0.2242ms/amostra
      Fold 4: treino=877, teste=218 (16.6%)
        ✅ RMSE: 75.00M, R²: 0.5654, Tempo: 0.2666ms/amostra
      Fold 5: treino=1095, teste=218 (16.6%)
        ✅ RMSE: 66.89M, R²: 0.2123, Tempo: 0.2212ms/amostra
    📊 Média - RMSE: 70.72M, R²: -0.2897, Tempo: 0.2439ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 328/1313 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=265, teste=262 (20.0%)
        ✅ RMSE: 107.30M, R²: -1.7155, Tempo: 0.1969ms/amostra
      Fold 2: treino=527, teste=262 (20

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

        ✅ RMSE: 43.77M, R²: 0.7224, Tempo: 0.2874ms/amostra
      Fold 2: treino=411, teste=205 (16.7%)
        ✅ RMSE: 99.58M, R²: 0.6875, Tempo: 0.2452ms/amostra
      Fold 3: treino=616, teste=205 (16.7%)
        ✅ RMSE: 111.53M, R²: 0.7569, Tempo: 0.2253ms/amostra
      Fold 4: treino=821, teste=205 (16.7%)
        ✅ RMSE: 49.25M, R²: 0.9464, Tempo: 0.2480ms/amostra
      Fold 5: treino=1026, teste=205 (16.7%)
        ✅ RMSE: 45.75M, R²: 0.9441, Tempo: 0.3133ms/amostra
    📊 Média - RMSE: 69.98M, R²: 0.8115, Tempo: 0.2638ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 307/1231 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=247, teste=246 (20.0%)
        ✅ RMSE: 71.76M, R²: 0.7338, Tempo: 0.1878ms/amostra
      Fold 2: treino=493, teste=246 (20.0%)

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

        ✅ RMSE: 91.50M, R²: 0.3542, Tempo: 0.2177ms/amostra
      Fold 2: treino=449, teste=224 (16.7%)
        ✅ RMSE: 92.84M, R²: 0.6581, Tempo: 0.2189ms/amostra
      Fold 3: treino=673, teste=224 (16.7%)
        ✅ RMSE: 50.49M, R²: 0.9242, Tempo: 0.2216ms/amostra
      Fold 4: treino=897, teste=224 (16.7%)
        ✅ RMSE: 53.95M, R²: 0.9260, Tempo: 0.2279ms/amostra
      Fold 5: treino=1121, teste=224 (16.7%)
        ✅ RMSE: 51.78M, R²: 0.9367, Tempo: 0.2793ms/amostra
    📊 Média - RMSE: 68.11M, R²: 0.7598, Tempo: 0.2331ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 336/1345 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=269, teste=269 (20.0%)
        ✅ RMSE: 91.24M, R²: 0.4805, Tempo: 0.2033ms/amostra
      Fold 2: treino=538, teste=269 (20.0%)


Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 45.86M, R²: 0.6835, Tempo: 0.2841ms/amostra
      Fold 2: treino=337, teste=167 (16.6%)
        ✅ RMSE: 78.99M, R²: 0.5693, Tempo: 0.3831ms/amostra
      Fold 3: treino=504, teste=167 (16.6%)
        ✅ RMSE: 35.26M, R²: 0.8834, Tempo: 0.3009ms/amostra
      Fold 4: treino=671, teste=167 (16.6%)
        ✅ RMSE: 28.19M, R²: 0.9360, Tempo: 0.3105ms/amostra
      Fold 5: treino=838, teste=167 (16.6%)
        ✅ RMSE: 76.79M, R²: 0.8348, Tempo: 0.3931ms/amostra
    📊 Média - RMSE: 53.02M, R²: 0.7814, Tempo: 0.3343ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 251/1005 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=201, teste=201 (20.0%)
        ✅ RMSE: 36.93M, R²: 0.8212, Tempo: 0.2403ms/amostra
      Fold 2: treino=402, teste=201 (20.0%)
 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

  ────────────────────────────────────────────────────────────
  ✅ Imputação concluída para ap-pb

  ✅ Concluído: ap-pb

################################################################################
[62/615] Processando: ap-pe_merged.csv
################################################################################

FASE 1: AVALIAÇÃO

[AVALIANDO] ap-pe_merged.csv
  [DADOS LIMPOS] 82 amostras válidas (removidos 265 com -1)

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 20%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 20%
    Máscara aplicada: 16/82 amostras (20%)
  [STACKING] Avaliando com test_fraction=20%
    Cross-validation com 5 splits
      Fold 1: treino=17, teste=13 (15.9%)
        ⚠️ Erro no fold 1: The 'cv' parameter of StackingRegressor must be an int in the range [2, inf), an object implementing 'split' and 'get_n_splits', an iterable or None or a str among {'prefit'}. Got 1 instead.
 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

  ────────────────────────────────────────────────────────────
  ✅ Imputação concluída para ap-ro

  ✅ Concluído: ap-ro

################################################################################
[68/615] Processando: ap-rr_merged.csv
################################################################################

FASE 1: AVALIAÇÃO

[AVALIANDO] ap-rr_merged.csv
  [DADOS LIMPOS] 1029 amostras válidas (removidos 1106 com -1)

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 20%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 20%
    Máscara aplicada: 205/1029 amostras (20%)
  [STACKING] Avaliando com test_fraction=20%
    Cross-validation com 5 splits
      Fold 1: treino=174, teste=171 (16.6%)
        ✅ RMSE: 52.86M, R²: 0.9356, Tempo: 0.3728ms/amostra
      Fold 2: treino=345, teste=171 (16.6%)
        ✅ RMSE: 37.76M, R²: 0.9736, Tempo: 0.3783ms/amostra
      Fold 3: treino=516, teste=171 (16.6%)
   

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 311.46M, R²: 0.6945, Tempo: 0.6654ms/amostra
      Fold 2: treino=155, teste=76 (16.6%)
        ✅ RMSE: 246.74M, R²: 0.8622, Tempo: 0.7754ms/amostra
      Fold 3: treino=231, teste=76 (16.6%)
        ✅ RMSE: 166.94M, R²: 0.9189, Tempo: 0.6406ms/amostra
      Fold 4: treino=307, teste=76 (16.6%)
        ✅ RMSE: 380.62M, R²: 0.7850, Tempo: 0.6189ms/amostra
      Fold 5: treino=383, teste=76 (16.6%)
        ✅ RMSE: 262.95M, R²: 0.8241, Tempo: 0.5845ms/amostra
    📊 Média - RMSE: 273.74M, R²: 0.8169, Tempo: 0.6570ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 114/459 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=95, teste=91 (19.8%)
        ✅ RMSE: 248.76M, R²: 0.8355, Tempo: 0.5328ms/amostra
      Fold 2: treino=186, teste=91 (19.8%)
  

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

        ✅ RMSE: 7.73M, R²: 0.7221, Tempo: 0.3265ms/amostra
      Fold 2: treino=302, teste=151 (16.7%)
        ✅ RMSE: 57.30M, R²: 0.4367, Tempo: 0.3034ms/amostra
      Fold 3: treino=453, teste=151 (16.7%)
        ✅ RMSE: 65.96M, R²: -7.7330, Tempo: 0.4064ms/amostra
      Fold 4: treino=604, teste=151 (16.7%)
        ✅ RMSE: 10.42M, R²: 0.4679, Tempo: 0.3149ms/amostra
      Fold 5: treino=755, teste=151 (16.7%)
        ✅ RMSE: 15.78M, R²: 0.7402, Tempo: 0.3490ms/amostra
    📊 Média - RMSE: 31.44M, R²: -1.0732, Tempo: 0.3400ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 226/906 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=182, teste=181 (20.0%)
        ✅ RMSE: 7.33M, R²: 0.8298, Tempo: 0.2479ms/amostra
      Fold 2: treino=363, teste=181 (20.0%)
  

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 13.46M, R²: 0.7528, Tempo: 0.2909ms/amostra
      Fold 2: treino=429, teste=212 (16.6%)
        ✅ RMSE: 18.20M, R²: 0.4732, Tempo: 0.2933ms/amostra
      Fold 3: treino=641, teste=212 (16.6%)
        ✅ RMSE: 3.88M, R²: 0.8867, Tempo: 0.2406ms/amostra
      Fold 4: treino=853, teste=212 (16.6%)
        ✅ RMSE: 11.33M, R²: 0.7946, Tempo: 0.3163ms/amostra
      Fold 5: treino=1065, teste=212 (16.6%)
        ✅ RMSE: 7.64M, R²: 0.6971, Tempo: 0.2975ms/amostra
    📊 Média - RMSE: 10.90M, R²: 0.7209, Tempo: 0.2877ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 319/1277 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=257, teste=255 (20.0%)
        ✅ RMSE: 12.73M, R²: 0.8486, Tempo: 0.1881ms/amostra
      Fold 2: treino=512, teste=255 (20.0%)
  

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 47.31M, R²: 0.9267, Tempo: 0.3410ms/amostra
      Fold 2: treino=304, teste=150 (16.6%)
        ✅ RMSE: 61.12M, R²: 0.9203, Tempo: 0.3485ms/amostra
      Fold 3: treino=454, teste=150 (16.6%)
        ✅ RMSE: 102.04M, R²: 0.8165, Tempo: 0.3592ms/amostra
      Fold 4: treino=604, teste=150 (16.6%)
        ✅ RMSE: 32.85M, R²: 0.9718, Tempo: 0.4490ms/amostra
      Fold 5: treino=754, teste=150 (16.6%)
        ✅ RMSE: 38.98M, R²: 0.9705, Tempo: 0.3648ms/amostra
    📊 Média - RMSE: 56.46M, R²: 0.9212, Tempo: 0.3725ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 226/904 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=184, teste=180 (19.9%)
        ✅ RMSE: 48.41M, R²: 0.9392, Tempo: 0.2479ms/amostra
      Fold 2: treino=364, teste=180 (19.9%)
 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 54.18M, R²: -0.5341, Tempo: 0.4803ms/amostra
      Fold 2: treino=245, teste=121 (16.6%)
        ✅ RMSE: 20.06M, R²: 0.3454, Tempo: 0.3648ms/amostra
      Fold 3: treino=366, teste=121 (16.6%)
        ✅ RMSE: 3.43M, R²: 0.9331, Tempo: 0.4214ms/amostra
      Fold 4: treino=487, teste=121 (16.6%)
        ✅ RMSE: 12.80M, R²: 0.8665, Tempo: 0.3982ms/amostra
      Fold 5: treino=608, teste=121 (16.6%)
        ✅ RMSE: 14.91M, R²: 0.8526, Tempo: 0.4055ms/amostra
    📊 Média - RMSE: 21.08M, R²: 0.4927, Tempo: 0.4140ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 182/729 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=149, teste=145 (19.9%)
        ✅ RMSE: 61.50M, R²: -0.9145, Tempo: 0.3141ms/amostra
      Fold 2: treino=294, teste=145 (19.9%)
 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 10.57M, R²: 0.7190, Tempo: 0.4137ms/amostra
      Fold 2: treino=241, teste=119 (16.6%)
        ✅ RMSE: 22.30M, R²: 0.5783, Tempo: 0.4078ms/amostra
      Fold 3: treino=360, teste=119 (16.6%)
        ✅ RMSE: 51.67M, R²: 0.7356, Tempo: 0.4188ms/amostra
      Fold 4: treino=479, teste=119 (16.6%)
        ✅ RMSE: 52.70M, R²: 0.7239, Tempo: 0.4504ms/amostra
      Fold 5: treino=598, teste=119 (16.6%)
        ✅ RMSE: 13.15M, R²: 0.7505, Tempo: 0.4284ms/amostra
    📊 Média - RMSE: 30.08M, R²: 0.7015, Tempo: 0.4238ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 179/717 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=145, teste=143 (19.9%)
        ✅ RMSE: 15.33M, R²: 0.5673, Tempo: 0.3349ms/amostra
      Fold 2: treino=288, teste=143 (19.9%)
  

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 71.64M, R²: 0.8717, Tempo: 0.3900ms/amostra
      Fold 2: treino=267, teste=133 (16.6%)
        ✅ RMSE: 109.20M, R²: 0.8098, Tempo: 0.3668ms/amostra
      Fold 3: treino=400, teste=133 (16.6%)
        ✅ RMSE: 60.93M, R²: 0.9242, Tempo: 0.4575ms/amostra
      Fold 4: treino=533, teste=133 (16.6%)
        ✅ RMSE: 43.61M, R²: 0.9473, Tempo: 0.4664ms/amostra
      Fold 5: treino=666, teste=133 (16.6%)
        ✅ RMSE: 38.51M, R²: 0.9635, Tempo: 0.3783ms/amostra
    📊 Média - RMSE: 64.78M, R²: 0.9033, Tempo: 0.4118ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 199/799 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=163, teste=159 (19.9%)
        ✅ RMSE: 64.08M, R²: 0.8956, Tempo: 0.3076ms/amostra
      Fold 2: treino=322, teste=159 (19.9%)
 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 66.49M, R²: 0.8515, Tempo: 0.3047ms/amostra
      Fold 2: treino=323, teste=159 (16.6%)
        ✅ RMSE: 26.76M, R²: 0.9336, Tempo: 0.4022ms/amostra
      Fold 3: treino=482, teste=159 (16.6%)
        ✅ RMSE: 53.27M, R²: 0.8775, Tempo: 0.3173ms/amostra
      Fold 4: treino=641, teste=159 (16.6%)
        ✅ RMSE: 51.12M, R²: 0.9237, Tempo: 0.3222ms/amostra
      Fold 5: treino=800, teste=159 (16.6%)
        ✅ RMSE: 49.97M, R²: 0.9374, Tempo: 0.3313ms/amostra
    📊 Média - RMSE: 49.52M, R²: 0.9047, Tempo: 0.3355ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 239/959 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=195, teste=191 (19.9%)
        ✅ RMSE: 53.28M, R²: 0.8877, Tempo: 0.2422ms/amostra
      Fold 2: treino=386, teste=191 (19.9%)
  

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 767, in impute_with_stacking
    X_train_scaled = scaler_X.fit_transform(X_train)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform
    return self.fit(X, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\LARCES_MALU\Desktop\research\ENHANCED-IMPUTATION-PREDICTION\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users

        ✅ RMSE: 17.84M, R²: 0.6436, Tempo: 0.2666ms/amostra
      Fold 2: treino=386, teste=193 (16.7%)
        ✅ RMSE: 16.80M, R²: 0.0563, Tempo: 0.3201ms/amostra
      Fold 3: treino=579, teste=193 (16.7%)
        ✅ RMSE: 11.59M, R²: 0.8957, Tempo: 0.2625ms/amostra
      Fold 4: treino=772, teste=193 (16.7%)
        ✅ RMSE: 23.65M, R²: 0.5422, Tempo: 0.2412ms/amostra
      Fold 5: treino=965, teste=193 (16.7%)
        ✅ RMSE: 12.61M, R²: 0.7445, Tempo: 0.2402ms/amostra
    📊 Média - RMSE: 16.50M, R²: 0.5765, Tempo: 0.2661ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 289/1158 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=234, teste=231 (19.9%)
        ✅ RMSE: 18.09M, R²: 0.5942, Tempo: 0.1898ms/amostra
      Fold 2: treino=465, teste=231 (19.9%)
 

Traceback (most recent call last):
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 979, in main
    intelligent_imputation(file_path, result, imputed_path)
  File "C:\Users\LARCES_MALU\AppData\Local\Temp\ipykernel_89184\2726637646.py", line 863, in intelligent_imputation
    print(f"  Vitórias do Stacking: {analysis['stacking_wins']}/{analysis['total_comparisons']} ({analysis['win_rate']*100:.1f}%)")
                                     ~~~~~~~~^^^^^^^^^^^^^^^^^
KeyError: 'stacking_wins'


        ✅ RMSE: 20.67M, R²: 0.7334, Tempo: 0.2308ms/amostra
      Fold 2: treino=426, teste=213 (16.7%)
        ✅ RMSE: 18.93M, R²: -0.1878, Tempo: 0.2347ms/amostra
      Fold 3: treino=639, teste=213 (16.7%)
        ✅ RMSE: 9.66M, R²: 0.8111, Tempo: 0.2847ms/amostra
      Fold 4: treino=852, teste=213 (16.7%)
        ✅ RMSE: 16.16M, R²: 0.7583, Tempo: 0.2508ms/amostra
      Fold 5: treino=1065, teste=213 (16.7%)
        ✅ RMSE: 86.89M, R²: 0.5694, Tempo: 0.2327ms/amostra
    📊 Média - RMSE: 30.46M, R²: 0.5369, Tempo: 0.2467ms/amostra

  ────────────────────────────────────────────────────────────
  FRAÇÃO DE MISSING: 25%
  ────────────────────────────────────────────────────────────
  [BASELINE] Avaliando fração 25%
    Máscara aplicada: 319/1278 amostras (25%)
  [STACKING] Avaliando com test_fraction=25%
    Cross-validation com 4 splits
      Fold 1: treino=258, teste=255 (20.0%)
        ✅ RMSE: 18.87M, R²: 0.7674, Tempo: 0.2209ms/amostra
      Fold 2: treino=513, teste=255 (20.0%)
