### Analisando o json Metrics_summary.json

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path

sns.set(style="whitegrid")
plt.rcParams['font.family'] = 'DejaVu Sans'


def final_robust_analysis_with_links(json_file_path, target_links=None):
    """Lê o JSON e cria DataFrame, podendo filtrar por links específicos."""
    with open(json_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    records = []
    all_sources = set()

    for link, missing_data in data.items():
        if target_links and link not in target_links:
            continue
            
        all_sources.add(link)
        for missing_rate, methods in missing_data.items():
            missing_rate_float = float(missing_rate)

            for method_name, metrics in methods['baseline'].items():
                records.append({
                    'link': link,
                    'missing_rate': missing_rate_float,
                    'method_type': 'baseline',
                    'method': method_name,
                    'rmse': metrics['rmse'],
                    'nrmse': metrics['nrmse'],
                    'r2': metrics['r2'],
                    'mape': metrics['mape']
                })

            stacking_metrics = methods['stacking']['mean']['StackingRegressor']
            records.append({
                'link': link,
                'missing_rate': missing_rate_float,
                'method_type': 'stacking',
                'method': 'StackingRegressor',
                'rmse': stacking_metrics['rmse'],
                'nrmse': stacking_metrics['nrmse'],
                'r2': stacking_metrics['r2'],
                'mape': stacking_metrics['mape']
            })

    df = pd.DataFrame(records)

    print("\nFINAL ANALYSIS (WITH LINK FILTERING)")
    print("=" * 80)
    print(f"Target links: {target_links}")
    print(f"Links found: {len(all_sources)}")
    print(f"Total records: {len(df)}")
    print(f"Methods: {df['method'].nunique()}")
    print(f"Missing rates: {sorted(df['missing_rate'].unique())}")

    return df

def print_average_table_by_source_imputation(df_clean, metodos_selecionados):
    """Gera tabela com média por source e método de imputação."""
    
    print("\n" + "=" * 80)
    print("AVERAGE PERFORMANCE BY SOURCE AND IMPUTATION METHOD")
    print("=" * 80)
    
    metrics = ['rmse', 'nrmse', 'r2', 'mape']
    metric_names = {'rmse': 'RMSE', 'nrmse': 'NRMSE', 'r2': 'R²', 'mape': 'MAPE'}
    missing_rates = sorted(df_clean['missing_rate'].unique())
    
    for metric in metrics:
        print(f"\n{metric_names[metric]} (Average across sources):")
        print("-" * 60)
        
        pivot_data = df_clean.pivot_table(
            values=metric, 
            index='missing_rate', 
            columns='method', 
            aggfunc='mean'
        )
        
        for method in metodos_selecionados:
            if method not in pivot_data.columns:
                pivot_data[method] = np.nan
        pivot_data = pivot_data[metodos_selecionados]
        
        header = "Missing Rate | " + " | ".join([f"{method:>15}" for method in metodos_selecionados])
        print(header)
        print("-" * len(header))
        
        for rate in missing_rates:
            if rate in pivot_data.index:
                row_values = []
                for method in metodos_selecionados:
                    value = pivot_data.loc[rate, method]
                    if pd.notna(value):
                        if metric == 'r2':
                            row_values.append(f"{value:>15.3f}")
                        elif metric == 'mape':
                            row_values.append(f"{value:>15.1f}")
                        else:
                            row_values.append(f"{value:>15.2f}")
                    else:
                        row_values.append(f"{'N/A':>15}")
                print(f"{rate:>11.2f} | " + " | ".join(row_values))


json_path = Path("../../results/metrics_summary.json")

target_links = [
    "sc-rn",
    "am-rn",
    "ac-rn" ,
    "mt-sc",
    "pr-go", 
]

metodos_selecionados = ['StackingRegressor', 'Mean', 'Median', 'KNNImputer', 'ForwardFill', 'BackwardFill']

df_clean = final_robust_analysis_with_links(json_path, target_links)

print_average_table_by_source_imputation(df_clean, metodos_selecionados)



FINAL ANALYSIS (WITH LINK FILTERING)
Target links: ['sc-rn', 'am-rn', 'ac-rn', 'mt-sc', 'pr-go']
Links found: 5
Total records: 280
Methods: 8
Missing rates: [np.float64(0.2), np.float64(0.25), np.float64(0.3), np.float64(0.35), np.float64(0.4), np.float64(0.45), np.float64(0.5)]

AVERAGE PERFORMANCE BY SOURCE AND IMPUTATION METHOD

RMSE (Average across sources):
------------------------------------------------------------
Missing Rate | StackingRegressor |            Mean |          Median |      KNNImputer |     ForwardFill |    BackwardFill
--------------------------------------------------------------------------------------------------------------------------
       0.20 |          117.72 |          615.92 |          605.01 |          615.92 |          513.76 |          462.81
       0.25 |          123.48 |          629.08 |          634.87 |          629.08 |          518.12 |          479.27
       0.30 |          112.85 |          625.00 |          628.32 |          625.00 |  

### Vericando o tempo de predição do stacking 

In [6]:
import json
import numpy as np
from pathlib import Path

# Caminho para o arquivo JSON
json_path = Path("../../results/metrics_summary.json")

# Lista de links de interesse
target_links = [
    "sc-rn",
    "am-rn",
    "ac-rn",
    "mt-sc",
    "pr-go",
]

# === Cálculo da média de prediction_time_per_sample ===
with open(json_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

times = []

for link, missing_data in data.items():
    if link not in target_links:
        continue

    for missing_rate, methods in missing_data.items():
        try:
            t = methods['stacking']['mean']['StackingRegressor']['prediction_time_per_sample']
            times.append(t)
        except KeyError:
            continue 

if times:
    media_tempo = np.mean(times)
    print(f"Média de prediction_time_per_sample para os links selecionados: {media_tempo:.4f}")
    print(f"Total de medições consideradas: {len(times)}")
else:
    print("Nenhum valor de prediction_time_per_sample encontrado para os links selecionados.")


Média de prediction_time_per_sample para os links selecionados: 1.6190
Total de medições consideradas: 35
