In [6]:
import pandas as pd
import numpy as np
import re

class AbsolutePercentageErrorOnProductTotal:
    def __init__(self, df_eval, product_id_col='product_id', target_col='tn_real_futuro'):
        self.df_eval = df_eval.copy()
        self.product_id_col = product_id_col
        self.target_col = target_col
        self.metric_name = 'AbsPercErrProdTotal'

    def __call__(self, preds, labels):
        preds = np.asarray(preds, dtype=float)
        labels = np.asarray(labels, dtype=float)

        df_temp = pd.DataFrame({
            self.product_id_col: self.df_eval[self.product_id_col].values,
            'preds': preds,
            'labels': labels
        })

        por_producto = df_temp.groupby(self.product_id_col, observed=True).agg(
            {'labels': 'sum', 'preds': 'sum'}
        )

        sum_total_real_global = por_producto['labels'].sum()
        sum_abs_errors_per_product = np.abs(por_producto['labels'] - por_producto['preds']).sum()

        if sum_total_real_global == 0:
            metric_value = 0 if sum_abs_errors_per_product == 0 else sum_abs_errors_per_product
        else:
            metric_value = sum_abs_errors_per_product / sum_total_real_global * 100

        return self.metric_name, metric_value, False

# 📂 Lista de archivos a evaluar
rutas_csv = [
    "val_predictions5.csv",
    "test_predictions5.csv",
    "val_predictions6.csv",
    "test_predictions6.csv",
    "val_predictions7.csv",
    "test_predictions7.csv",
    "val_predictions8.csv",
    "test_predictions8.csv",
    "val_predictions9.csv",
    "test_predictions9.csv",
    "val_predictions10.csv",
    "test_predictions10.csv",
    "val_predictions11.csv",
    "test_predictions11.csv",
    "val_predictions12.csv",
    "test_predictions12.csv",
    "val_predictions13.csv",
    "test_predictions13.csv",
]

# 📊 Diccionario de resultados indexado por número de predicción
resultados = {}

# 🔄 Evaluación
for ruta in rutas_csv:
    print(f"📂 Procesando: {ruta}")
    try:
        df = pd.read_csv(ruta)

        if not all(col in df.columns for col in ['product_id', 'tn_predicha', 'tn_real_futuro']):
            print(f"❌ Faltan columnas necesarias en {ruta}")
            continue

        metrica = AbsolutePercentageErrorOnProductTotal(df)
        _, valor, _ = metrica(df['tn_predicha'], df['tn_real_futuro'])
        valor = round(valor, 2)

        # Extraer tipo (val/test) y número
        match = re.match(r"(val|test)_predictions(\d+)\.csv", ruta)
        if match:
            tipo = match.group(1)
            num = int(match.group(2))

            if num not in resultados:
                resultados[num] = {}

            resultados[num][tipo] = valor
        else:
            print(f"⚠️ Nombre no reconocido: {ruta}")

    except Exception as e:
        print(f"⚠️ Error al procesar {ruta}: {e}")

# 📋 Crear DataFrame a partir de resultados
df_resultados = pd.DataFrame.from_dict(resultados, orient='index')
df_resultados = df_resultados.sort_index()  # orden por número

# ✅ Mostrar resultado
df_resultados


📂 Procesando: val_predictions5.csv
📂 Procesando: test_predictions5.csv
📂 Procesando: val_predictions6.csv
📂 Procesando: test_predictions6.csv
📂 Procesando: val_predictions7.csv
📂 Procesando: test_predictions7.csv
📂 Procesando: val_predictions8.csv
📂 Procesando: test_predictions8.csv
📂 Procesando: val_predictions9.csv
📂 Procesando: test_predictions9.csv
📂 Procesando: val_predictions10.csv
📂 Procesando: test_predictions10.csv
📂 Procesando: val_predictions11.csv
📂 Procesando: test_predictions11.csv
📂 Procesando: val_predictions12.csv
📂 Procesando: test_predictions12.csv
📂 Procesando: val_predictions13.csv
📂 Procesando: test_predictions13.csv


Unnamed: 0,val,test
5,23.53,25.62
6,23.1,29.27
7,20.31,27.55
8,20.05,22.86
9,18.66,24.51
10,17.79,25.52
11,23.93,23.2
12,19.28,32.59
13,23.35,22.46
