# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [1]:

import numpy as np
import pandas as pd

# Caminho para o arquivo CSV
caminho = r'C:\Users\Murilo\Documents\dados_desafio_fiap\hash\df_t.csv'

# Leitura do arquivo CSV
df = pd.read_csv(caminho)

# Exibe a tabela original
print("Tabela original:")
print(df.head())

# =========================
# CURA E TRANSFORMAÇÃO
# =========================

df_curado = df.copy()

# 1. Garantir que os IDs são strings
df_curado['nk_ota_localizer_id'] = df_curado['nk_ota_localizer_id'].astype(str)
df_curado['fk_contact'] = df_curado['fk_contact'].astype(str)

# 2. Criar o campo datetime unificando data e hora
df_curado['data_hora_compra'] = pd.to_datetime(
    df_curado['date_purchase'] + ' ' + df_curado['time_purchase'], errors='coerce'
)

# 3. Trocar "0" por np.nan nos campos de retorno
campos_com_zero_para_nulo = ['place_origin_return', 'place_destination_return']
for col in campos_com_zero_para_nulo:
    df_curado[col] = df_curado[col].replace("0", np.nan)

# Trocar "0" e "1" por np.nan na fk_return_ota_bus_company
df_curado['fk_return_ota_bus_company'] = df_curado['fk_return_ota_bus_company'].replace(["0", "1"], np.nan)

# 4. Criar coluna "classificacao_viagem"
df_curado['classificacao_viagem'] = df_curado['place_origin_return'].apply(
    lambda x: 'ida_e_volta' if pd.notna(x) else 'ida'
)

# 5. Garantir que os tipos de gmv e tickets estão corretos
df_curado['gmv_success'] = df_curado['gmv_success'].astype(float)
df_curado['total_tickets_quantity_success'] = df_curado['total_tickets_quantity_success'].astype('Int64')

# 6. Remover colunas antigas de data e hora
df_curado.drop(['date_purchase', 'time_purchase'], axis=1, inplace=True)

# 7. Criar colunas adicionais
df_curado['tipo_compra'] = df_curado['total_tickets_quantity_success'].apply(
    lambda x: 'individual' if x == 1 else 'coletiva'
)
df_curado['sem_retorno_flag'] = df_curado['place_origin_return'].isnull()
df_curado['compra_dia_util'] = df_curado['data_hora_compra'].dt.weekday < 5

# Classificação por período do dia
def classificar_periodo(hora):
    if 0 <= hora < 6:
        return 'madrugada'
    elif 6 <= hora < 12:
        return 'manhã'
    elif 12 <= hora < 18:
        return 'tarde'
    else:
        return 'noite'

df_curado['hora_periodo'] = df_curado['data_hora_compra'].dt.hour.apply(classificar_periodo)

# 8. Verifica a data da primeira compra por cliente
primeiras_compras = df_curado.groupby('fk_contact')['data_hora_compra'].min().reset_index()
primeiras_compras['primeira_compra'] = True

df_curado = df_curado.merge(primeiras_compras, on=['fk_contact', 'data_hora_compra'], how='left')
df_curado['primeira_compra'] = df_curado['primeira_compra'].fillna(False)

# 9. Renomear colunas
df_curado.rename(columns={
    'nk_ota_localizer_id': 'order_id',
    'fk_contact': 'client_id',
    'place_origin_departure': 'origin_departure',
    'place_destination_departure': 'destination_departure',
    'place_origin_return': 'origin_return',
    'place_destination_return': 'destination_return',
    'fk_departure_ota_bus_company': 'bus_company_departure',
    'fk_return_ota_bus_company': 'bus_company_return',
    'gmv_success': 'total_value',
    'total_tickets_quantity_success': 'tickets_quantity',
    'data_hora_compra': 'purchase_datetime',
    'classificacao_viagem': 'trip_type',
    'tipo_compra': 'purchase_type',
    'sem_retorno_flag': 'no_return_flag',
    'compra_dia_util': 'purchase_weekday_flag',
    'hora_periodo': 'purchase_time_period',
    'primeira_compra': 'first_purchase_flag'
}, inplace=True)

# 10. Reordenar colunas
nova_ordem = [
    'purchase_datetime', 'order_id', 'client_id', 'purchase_weekday_flag', 'purchase_time_period',
    'first_purchase_flag', 'purchase_type', 'tickets_quantity', 'total_value', 'trip_type', 'no_return_flag',
    'origin_departure', 'destination_departure', 'origin_return', 'destination_return',
    'bus_company_departure', 'bus_company_return'
]

df_curado = df_curado[nova_ordem]

# =========================
# RESULTADOS
# =========================

print("\nTabela curada:")
print(df_curado.head())

# Horas únicas por período
horas_por_periodo = df_curado.groupby('purchase_time_period')['purchase_datetime'].apply(
    lambda x: sorted(x.dt.hour.unique())
)

print("\nHoras por período:")
for periodo, horas in horas_por_periodo.items():
    print(f"{periodo}: {horas}")



Tabela original:
                                 nk_ota_localizer_id  \
0  bc02d5245bec63b30ff1102fa273fc03f58bc9cc3f674e...   
1  5432f12612dd5d749b3be880e779989cf63b5efa4bcc4e...   
2  fb3caed9b2f1b6016d45ccddb19095476e61a2c85faa8e...   
3  4dc44a6dd592b702feccb493d192210c86965aee684529...   
4  aa34ed7fd0a6b405df2df1bf9f8d68e6df9b9a868a6181...   

                                          fk_contact date_purchase  \
0  a7218ff4ee7d37d48d2b4391b955627cb089870b934912...    2018-12-26   
1  37228485e0dc83d84d1bcd1bef3dc632301bf6cb22c8b5...    2018-12-05   
2  3467ec081e2421e72c96e7203b929d21927fd00b6b5f28...    2018-12-21   
3  ab3251a2be0f69713b8f97b0e9d1579e31551f4fd4facf...    2018-12-06   
4  ceea0de820a6379f2c4215bddaec66c33994b304607e56...    2021-02-23   

  time_purchase                             place_origin_departure  \
0      15:33:35  6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...   
1      15:07:57  10e4e7caf8b078429bb1c80b1a10118ac6f963eff098fd...   
2      18:41:54

  df_curado['primeira_compra'] = df_curado['primeira_compra'].fillna(False)



Tabela curada:
    purchase_datetime                                           order_id  \
0 2018-12-26 15:33:35  bc02d5245bec63b30ff1102fa273fc03f58bc9cc3f674e...   
1 2018-12-05 15:07:57  5432f12612dd5d749b3be880e779989cf63b5efa4bcc4e...   
2 2018-12-21 18:41:54  fb3caed9b2f1b6016d45ccddb19095476e61a2c85faa8e...   
3 2018-12-06 14:01:38  4dc44a6dd592b702feccb493d192210c86965aee684529...   
4 2021-02-23 20:08:25  aa34ed7fd0a6b405df2df1bf9f8d68e6df9b9a868a6181...   

                                           client_id  purchase_weekday_flag  \
0  a7218ff4ee7d37d48d2b4391b955627cb089870b934912...                   True   
1  37228485e0dc83d84d1bcd1bef3dc632301bf6cb22c8b5...                   True   
2  3467ec081e2421e72c96e7203b929d21927fd00b6b5f28...                   True   
3  ab3251a2be0f69713b8f97b0e9d1579e31551f4fd4facf...                   True   
4  ceea0de820a6379f2c4215bddaec66c33994b304607e56...                   True   

  purchase_time_period  first_purchase_flag purchase

In [13]:
 # ================================================
# TREINO RÁPIDO + SALVAR MODELO + CSVs (holdout/full)
# ================================================
import os, json, math, time, warnings
warnings.filterwarnings("ignore")

import joblib
import numpy as np
import pandas as pd

from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

from lightgbm import LGBMClassifier
try:
    from lightgbm import early_stopping, log_evaluation
    _HAS_EARLY = True
except Exception:
    _HAS_EARLY = False
    early_stopping = None
    log_evaluation = None

# -----------------
# CONFIG (rápido)
# -----------------
RANDOM_STATE   = 42
TEST_SIZE      = 0.20       # holdout por cliente
VAL_SIZE       = 0.10       # validação interna
TOP_K          = 3

# redução de classes (quanto mais agressivo, mais rápido)
COVERAGE       = 0.90       # cobre ~90% do volume
MIN_COUNT      = 300        # classes com < MIN_COUNT -> "__OUTROS__"

# subamostra do conjunto de treino (acelera muito o early stopping)
TRAIN_FRAC     = 0.30       # 30% do treino

# lightgbm (rápido)
EARLY_ROUNDS   = 50         # para cedo se não melhorar
N_ESTIMATORS   = 3000
LEARNING_RATE  = 0.12
NUM_LEAVES     = 63
MAX_BIN        = 63
MIN_DATA_LEAF  = 200
FEATURE_FRAC   = 0.8

# saída
ARTS_DIR       = "artifacts_destino_model"
CSV_HOLDOUT    = "predicoes_holdout.csv"
CSV_FULL       = "predicoes_full.csv"
os.makedirs(ARTS_DIR, exist_ok=True)

# -----------------
# Helpers
# -----------------
def pick_base_df(df_curado, df_sample=None):
    if ('df_sample' in globals()
        and isinstance(df_sample, pd.DataFrame)
        and not df_sample.empty):
        print(f"[INFO] Usando df_sample: {len(df_sample):,} linhas")
        return df_sample.copy()
    if df_curado is None or not isinstance(df_curado, pd.DataFrame) or df_curado.empty:
        raise ValueError("df_curado não encontrado ou vazio. Rode o bloco de limpeza antes.")
    print(f"[INFO] Usando df_curado: {len(df_curado):,} linhas")
    return df_curado.copy()

def engineer_features(df):
    df = df.copy()
    df['purchase_datetime'] = pd.to_datetime(df['purchase_datetime'], errors='coerce')
    df = df.sort_values(['client_id','purchase_datetime']).reset_index(drop=True)

    # days_since_last
    df['days_since_last'] = df.groupby('client_id')['purchase_datetime'].diff().dt.days
    df['days_since_last'] = df['days_since_last'].fillna(
        df.groupby('client_id')['days_since_last'].transform('median')
    )
    df['days_since_last'] = df['days_since_last'].fillna(df['days_since_last'].median())

    # freq e ticket médio
    freq = (df.groupby('client_id', as_index=False)['order_id']
              .count().rename(columns={'order_id':'client_total_orders'}))
    avgv = (df.groupby('client_id', as_index=False)['total_value']
              .mean().rename(columns={'total_value':'client_avg_value'}))
    df = df.merge(freq, on='client_id', how='left').merge(avgv, on='client_id', how='left')

    # flags / tempo
    if df['purchase_weekday_flag'].dtype == bool:
        df['purchase_weekday_flag'] = df['purchase_weekday_flag'].astype(int)
    df['purchase_month'] = df['purchase_datetime'].dt.month.astype('Int64')
    df['purchase_hour']  = df['purchase_datetime'].dt.hour.astype('Int64')
    return df

def reduce_classes(df, target):
    counts  = df[target].astype(str).value_counts()
    cumcov  = counts.cumsum() / counts.sum()
    keep    = set(counts.index[(cumcov <= COVERAGE) | (counts >= MIN_COUNT)])
    df[target] = df[target].astype(str).where(df[target].astype(str).isin(keep), "__OUTROS__")
    cov_reached = float(counts[counts.index.isin(keep)].sum() / counts.sum())
    print(f"[INFO] Destinos mantidos: {len(keep)} | cobertura≈{cov_reached:.3f} | '__OUTROS__' aplicado ao restante")
    return df

def make_X(df, features, cat_cols):
    X = df[features].copy()
    for c in cat_cols:
        X[c] = X[c].astype('category')
    return X

def top_k_acc(y_true, proba, model_classes, k=3):
    """Top-k robusto sem depender de sklearn >=1.0"""
    y_true = np.asarray(y_true)
    k = min(k, proba.shape[1])
    idx_map = {c:i for i, c in enumerate(model_classes)}
    ac = 0
    for yt, p in zip(y_true, proba):
        if yt in idx_map:
            if idx_map[yt] in np.argpartition(p, -k)[-k:]:
                ac += 1
    return ac / len(y_true) if len(y_true) else np.nan

def score_dataframe_in_batches(model, X_like, df_base_for_ids, id_cols,
                               classes_model, le_dest, topk=3, batch_size=250_000):
    import json
    out_frames = []
    n = len(X_like)
    for start in range(0, n, batch_size):
        end = min(start + batch_size, n)
        Xi = X_like.iloc[start:end]
        ids = df_base_for_ids.loc[Xi.index, list(id_cols)].copy()

        proba = model.predict_proba(Xi)
        top1_idx   = proba.argmax(axis=1)
        top1_codes = classes_model[top1_idx]
        ids["pred_top1"]      = le_dest.inverse_transform(top1_codes)
        ids["pred_top1_prob"] = np.round(proba[np.arange(len(proba)), top1_idx] * 100, 2)

        k = min(topk, proba.shape[1])
        topk_idx   = np.argsort(proba, axis=1)[:, -k:][:, ::-1]
        topk_codes = classes_model[topk_idx]
        topk_lbls  = [le_dest.inverse_transform(row).tolist() for row in topk_codes]
        topk_probs = [list(np.round(proba[i, topk_idx[i]]*100, 2)) for i in range(len(proba))]
        ids["pred_topk_labels"] = [json.dumps(v) for v in topk_lbls]
        ids["pred_topk_probs"]  = [json.dumps(v) for v in topk_probs]

        out_frames.append(ids.reset_index(drop=True))
        print(f"[SCORE] {end:,}/{n:,} linhas processadas...")
    return pd.concat(out_frames, ignore_index=True)

# -----------------
# 1) Base + features
# -----------------
t0 = time.time()
df_base = pick_base_df(df_curado=df_curado, df_sample=globals().get('df_sample', None))
df = engineer_features(df_base)

# -----------------
# 2) Redução de classes + encoders
# -----------------
target = 'destination_departure'
df = reduce_classes(df, target)

le_dest   = LabelEncoder(); df[target] = le_dest.fit_transform(df[target].astype(str))
le_period = LabelEncoder(); df['purchase_time_period'] = le_period.fit_transform(df['purchase_time_period'].astype(str))
le_origin = LabelEncoder(); df['origin_enc']            = le_origin.fit_transform(df['origin_departure'].astype(str))
le_bus    = LabelEncoder(); df['bus_enc']               = le_bus.fit_transform(df['bus_company_departure'].astype(str))

features = [
    'tickets_quantity','total_value',
    'purchase_weekday_flag','purchase_time_period',
    'days_since_last','client_total_orders','client_avg_value',
    'purchase_month','purchase_hour','origin_enc','bus_enc'
]
cat_cols = ['purchase_time_period','purchase_month','purchase_hour','origin_enc','bus_enc']

X_all = make_X(df, features, cat_cols)
y_all = df[target].astype(int).copy()
groups_all = df['client_id'].astype(str)

if y_all.nunique() < 2:
    raise ValueError("Target com < 2 classes após redução. Ajuste COVERAGE/MIN_COUNT.")

# -----------------
# 3) Split (holdout por cliente) + validação interna
# -----------------
gss = GroupShuffleSplit(n_splits=1, test_size=TEST_SIZE, random_state=RANDOM_STATE)
tr_idx, te_idx = next(gss.split(X_all, y_all, groups=groups_all))
X_train_all, X_test = X_all.iloc[tr_idx], X_all.iloc[te_idx]
y_train_all, y_test = y_all.iloc[tr_idx], y_all.iloc[te_idx]
groups_train = groups_all.iloc[tr_idx]

gss_val = GroupShuffleSplit(n_splits=1, test_size=VAL_SIZE, random_state=RANDOM_STATE)
tr_i, val_i = next(gss_val.split(X_train_all, y_train_all, groups=groups_train))
X_tr, X_val = X_train_all.iloc[tr_i], X_train_all.iloc[val_i]
y_tr, y_val = y_train_all.iloc[tr_i], y_train_all.iloc[val_i]

# remove classes não vistas no treino na validação (evita logloss congelado)
mask_val = y_val.isin(set(y_tr.unique()))
if not mask_val.all():
    removed = int((~mask_val).sum())
    X_val, y_val = X_val[mask_val], y_val[mask_val]
    print(f"[INFO] Removidas {removed} amostras de validação com classes não vistas no treino.")

# subamostra do treino (acelera MUITO)
if TRAIN_FRAC < 1.0:
    rs = np.random.RandomState(RANDOM_STATE)
    sel = rs.choice(X_tr.index, size=int(len(X_tr)*TRAIN_FRAC), replace=False)
    X_tr, y_tr = X_tr.loc[sel], y_tr.loc[sel]
    print(f"[INFO] Subamostrando treino: {len(X_tr):,} linhas")

# -----------------
# 4) Modelo e treino rápido (GOSS)
# -----------------
model = LGBMClassifier(
    objective='multiclass',
    boosting_type='goss',
    n_estimators=N_ESTIMATORS,
    learning_rate=LEARNING_RATE,
    num_leaves=NUM_LEAVES,
    max_depth=-1,
    max_bin=MAX_BIN,
    min_data_in_leaf=MIN_DATA_LEAF,
    feature_fraction=FEATURE_FRAC,
    n_jobs=-1,
    random_state=RANDOM_STATE,
    verbosity=-1
)

fit_kwargs = dict(
    X=X_tr, y=y_tr,
    eval_set=[(X_val, y_val)],
    eval_metric='multi_logloss',
    categorical_feature=cat_cols
)
if _HAS_EARLY:
    fit_kwargs["callbacks"] = [early_stopping(EARLY_ROUNDS), log_evaluation(25)]
model.fit(**fit_kwargs)

# -----------------
# 5) Métrica rápida no holdout
# -----------------
classes_model = model.classes_
mask_test = y_test.isin(set(classes_model))
if not mask_test.all():
    print(f"[INFO] Holdout: ignorando {int((~mask_test).sum())} linhas com classes fora do treino para métricas.")
X_test_m, y_test_m = X_test[mask_test], y_test[mask_test]

proba_test = model.predict_proba(X_test_m)
y_pred_codes = classes_model[proba_test.argmax(axis=1)]

acc  = accuracy_score(y_test_m, y_pred_codes)
tka  = top_k_acc(y_test_m, proba_test, classes_model, k=TOP_K)
print(f"\n[HOLDOUT] Accuracy: {acc:.4f}")
print(f"[HOLDOUT] Top-{TOP_K} Accuracy: {tka:.4f}")

# -----------------
# 6) Salvar artefatos p/ inferência futura
# -----------------
artifacts = {
    "model": model,
    "features": features,
    "cat_cols": cat_cols,
    "classes_model": classes_model,
    "le_dest": le_dest,
    "le_period": le_period,
    "le_origin": le_origin,
    "le_bus": le_bus,
    "config": {
        "COVERAGE": COVERAGE, "MIN_COUNT": MIN_COUNT,
        "TRAIN_FRAC": TRAIN_FRAC, "TOP_K": TOP_K
    }
}
joblib.dump(artifacts, os.path.join(ARTS_DIR, "artifacts.joblib"))
print(f"[OK] Artefatos salvos em: {ARTS_DIR}/artifacts.joblib")

# -----------------
# 7) CSV do HOLDOUT
# -----------------
ID_COLS = ('order_id','client_id','purchase_datetime','origin_departure','destination_departure')

def score_dataframe(model, X_like, df_base_for_ids, id_cols, classes_model, le_dest, topk=3):
    import json
    proba = model.predict_proba(X_like)
    top1_idx   = proba.argmax(axis=1)
    top1_codes = classes_model[top1_idx]
    top1_lbls  = le_dest.inverse_transform(top1_codes)
    top1_prob  = np.round(proba[np.arange(len(proba)), top1_idx] * 100, 2)

    k = min(topk, proba.shape[1])
    topk_idx     = np.argsort(proba, axis=1)[:, -k:][:, ::-1]
    topk_codes   = classes_model[topk_idx]
    topk_labels  = [le_dest.inverse_transform(row).tolist() for row in topk_codes]
    topk_probs   = [list(np.round(proba[i, topk_idx[i]]*100, 2)) for i in range(len(proba))]

    res = df_base_for_ids.loc[X_like.index, list(id_cols)].copy()
    res["pred_top1"]        = top1_lbls
    res["pred_top1_prob"]   = top1_prob
    res["pred_topk_labels"] = [json.dumps(v) for v in topk_labels]
    res["pred_topk_probs"]  = [json.dumps(v) for v in topk_probs]
    return res.reset_index(drop=True)

holdout_preds = score_dataframe(model, X_test, df, ID_COLS, classes_model, le_dest, topk=TOP_K)
holdout_preds.to_csv(CSV_HOLDOUT, index=False, encoding="utf-8")
print(f"[OK] {CSV_HOLDOUT} salvo ({len(holdout_preds):,} linhas)")

# -----------------
# 8) CSV da base inteira (em batches para não travar)
# -----------------
full_X = make_X(df, features, cat_cols)  # já temos as features da base inteira
full_preds = score_dataframe_in_batches(
    model=model,
    X_like=full_X,
    df_base_for_ids=df,
    id_cols=ID_COLS,
    classes_model=classes_model,
    le_dest=le_dest,
    topk=TOP_K,
    batch_size=250_000  # aumente/diminua conforme a RAM
)
full_preds.to_csv(CSV_FULL, index=False, encoding="utf-8")
print(f"[OK] {CSV_FULL} salvo ({len(full_preds):,} linhas)")

print(f"\n[Done] Tempo total: {time.time() - t0:.1f}s")


[INFO] Usando df_sample: 870,672 linhas
[INFO] Destinos mantidos: 332 | cobertura≈0.900 | '__OUTROS__' aplicado ao restante
[INFO] Subamostrando treino: 187,257 linhas
Training until validation scores don't improve for 50 rounds
[25]	valid_0's multi_logloss: 32.1666
[50]	valid_0's multi_logloss: 32.1666
Early stopping, best iteration is:
[1]	valid_0's multi_logloss: 6.83479

[HOLDOUT] Accuracy: 0.2077
[HOLDOUT] Top-3 Accuracy: 0.4723
[OK] Artefatos salvos em: artifacts_destino_model/artifacts.joblib
[OK] predicoes_holdout.csv salvo (176,010 linhas)
[SCORE] 250,000/870,672 linhas processadas...
[SCORE] 500,000/870,672 linhas processadas...
[SCORE] 750,000/870,672 linhas processadas...
[SCORE] 870,672/870,672 linhas processadas...
[OK] predicoes_full.csv salvo (870,672 linhas)

[Done] Tempo total: 258.6s


In [14]:
# Baselines (usam apenas o treino!)
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

# X_train_all, y_train_all, X_test, y_test, df, le_dest, model, classes_model já existem do script

# A) Baseline global: sempre predizer o destino mais frequente do TREINO
major_code = int(pd.Series(y_train_all).value_counts().idxmax())
base_global_acc = accuracy_score(y_test, np.full_like(y_test, major_code, dtype=int))
print(f"[BASE] Global (sempre maioria do treino) - Top-1: {base_global_acc:.4f}")

# B) Baseline por origem: predizer, para cada origem, o destino mais frequente no TREINO
train_idx = X_train_all.index
test_idx  = X_test.index

df_train = df.loc[train_idx, ['origin_departure','destination_departure']].copy()
df_train['y'] = y_train_all.values
# mapeia origem -> destino (código) mais frequente
mode_by_origin = (df_train.groupby('origin_departure')['y']
                  .agg(lambda s: s.value_counts().index[0]))

df_test  = df.loc[test_idx, ['origin_departure']].copy()
pred_by_origin = df_test['origin_departure'].map(mode_by_origin).fillna(major_code).astype(int).values
base_origin_acc = accuracy_score(y_test, pred_by_origin)
print(f"[BASE] Por origem (moda no treino) - Top-1: {base_origin_acc:.4f}")

# C) Seu modelo (já calculado no script principal)
# acc, tka já existem — só reimprime para comparar:
print(f"[MODEL] LightGBM - Top-1: {acc:.4f} | Top-3: {tka:.4f}")


[BASE] Global (sempre maioria do treino) - Top-1: 0.1126
[BASE] Por origem (moda no treino) - Top-1: 0.3082
[MODEL] LightGBM - Top-1: 0.2077 | Top-3: 0.4723


In [15]:
# ============================================
# BLENDING com PRIOR P(dest | origem) + CSVs
# ============================================
import numpy as np, pandas as pd, json, os
from sklearn.metrics import accuracy_score

TOP_K = 3
OUT_HOLD = "predicoes_holdout_blend.csv"
OUT_FULL = "predicoes_full_blend.csv"

# --- helper: top-k (robusto) ---
def top_k_acc(y_true, proba, model_classes, k=3):
    y_true = np.asarray(y_true)
    k = min(k, proba.shape[1])
    idx_map = {c:i for i, c in enumerate(model_classes)}
    ac = 0
    for yt, p in zip(y_true, proba):
        if yt in idx_map:
            if idx_map[yt] in np.argpartition(p, -k)[-k:]:
                ac += 1
    return ac / len(y_true) if len(y_true) else np.nan

# --- 1) PRIOR por origem usando SOMENTE TREINO ---
n_classes = len(classes_model)
train_idx = X_train_all.index
orig_train = df.loc[train_idx, "origin_departure"].astype(str)
ytr = y_train_all.loc[train_idx].astype(int)

prior_map = {}
for orig, idxs in orig_train.groupby(orig_train).groups.items():
    yy = ytr.loc[idxs].values
    cnt = np.bincount(yy, minlength=n_classes).astype(float)
    p = cnt / cnt.sum() if cnt.sum() > 0 else np.full(n_classes, 1.0/n_classes)
    prior_map[orig] = p

glob_cnt = np.bincount(ytr.values, minlength=n_classes).astype(float)
global_prior = glob_cnt / glob_cnt.sum()

# --- 2) Grid de alpha para achar melhor mistura no HOLDOUT ---
mask_test = y_test.isin(set(classes_model))
X_test_m  = X_test[mask_test]
y_test_m  = y_test[mask_test]
orig_test = df.loc[X_test_m.index, "origin_departure"].astype(str).values

proba_model = model.predict_proba(X_test_m)
prior_mat   = np.vstack([prior_map.get(o, global_prior) for o in orig_test])

alphas = [0.0, 0.25, 0.5, 0.75, 1.0]   # pode refinar depois
best = {"alpha": None, "acc": -1, "topk": -1}
for a in alphas:
    blend = a*proba_model + (1-a)*prior_mat
    y_pred_codes = classes_model[blend.argmax(axis=1)]
    acc  = accuracy_score(y_test_m, y_pred_codes)
    tka  = top_k_acc(y_test_m, blend, classes_model, k=TOP_K)
    print(f"[BLEND] alpha={a:.2f} -> Top-1={acc:.4f} | Top-{TOP_K}={tka:.4f}")
    if acc > best["acc"]:
        best.update({"alpha": a, "acc": acc, "topk": tka})

print(f"\n[BLEND] Melhor alpha={best['alpha']:.2f} | Top-1={best['acc']:.4f} | Top-{TOP_K}={best['topk']:.4f}")

# --- 3) CSV do HOLDOUT com BLEND ---
blend = best["alpha"]*proba_model + (1-best["alpha"])*prior_mat
top1_idx   = blend.argmax(axis=1)
top1_codes = classes_model[top1_idx]
top1_lbls  = le_dest.inverse_transform(top1_codes)
top1_prob  = np.round(blend[np.arange(len(blend)), top1_idx]*100, 2)

k = min(TOP_K, blend.shape[1])
topk_idx   = np.argsort(blend, axis=1)[:, -k:][:, ::-1]
topk_codes = classes_model[topk_idx]
topk_lbls  = [le_dest.inverse_transform(row).tolist() for row in topk_codes]
topk_probs = [list(np.round(blend[i, topk_idx[i]]*100, 2)) for i in range(len(blend))]

hold = df.loc[X_test_m.index, ['order_id','client_id','purchase_datetime','origin_departure','destination_departure']].copy()
hold["pred_top1_blend"]        = top1_lbls
hold["pred_top1_prob_blend"]   = top1_prob
hold["pred_topk_labels_blend"] = [json.dumps(v) for v in topk_lbls]
hold["pred_topk_probs_blend"]  = [json.dumps(v) for v in topk_probs]
hold.to_csv(OUT_HOLD, index=False, encoding="utf-8")
print(f"[OK] {OUT_HOLD} salvo ({len(hold):,} linhas)")

# --- 4) CSV da BASE INTEIRA com BLEND (em batches) ---
def score_full_blended(model, X_like, df_base, id_cols, classes_model, le_dest, prior_map, global_prior, alpha=0.5, batch=250_000):
    out = []
    n = len(X_like)
    for s in range(0, n, batch):
        e = min(s+batch, n)
        Xi  = X_like.iloc[s:e]
        ids = df_base.loc[Xi.index, list(id_cols)].copy()
        proba = model.predict_proba(Xi)
        origins = df_base.loc[Xi.index, 'origin_departure'].astype(str).values
        prior  = np.vstack([prior_map.get(o, global_prior) for o in origins])
        blend  = alpha*proba + (1-alpha)*prior

        top1_idx   = blend.argmax(axis=1)
        top1_codes = classes_model[top1_idx]
        ids["pred_top1_blend"]      = le_dest.inverse_transform(top1_codes)
        ids["pred_top1_prob_blend"] = np.round(blend[np.arange(len(blend)), top1_idx]*100, 2)

        k = min(TOP_K, blend.shape[1])
        topk_idx   = np.argsort(blend, axis=1)[:, -k:][:, ::-1]
        topk_codes = classes_model[topk_idx]
        topk_lbls  = [le_dest.inverse_transform(row).tolist() for row in topk_codes]
        topk_probs = [list(np.round(blend[i, topk_idx[i]]*100, 2)) for i in range(len(blend))]
        ids["pred_topk_labels_blend"] = [json.dumps(v) for v in topk_lbls]
        ids["pred_topk_probs_blend"]  = [json.dumps(v) for v in topk_probs]

        out.append(ids.reset_index(drop=True))
        print(f"[SCORE BLEND] {e:,}/{n:,}")
    return pd.concat(out, ignore_index=True)

ID_COLS = ('order_id','client_id','purchase_datetime','origin_departure','destination_departure')

# reaproveita as features da base inteira (full_X foi montado no treino)
full_X = X_all  # no seu script ele já existe com o mesmo nome
full_preds_blend = score_full_blended(
    model=model,
    X_like=full_X,
    df_base=df,
    id_cols=ID_COLS,
    classes_model=classes_model,
    le_dest=le_dest,
    prior_map=prior_map,
    global_prior=global_prior,
    alpha=best["alpha"],
    batch=250_000
)
full_preds_blend.to_csv(OUT_FULL, index=False, encoding="utf-8")
print(f"[OK] {OUT_FULL} salvo ({len(full_preds_blend):,} linhas)")


[BLEND] alpha=0.00 -> Top-1=0.3083 | Top-3=0.5200
[BLEND] alpha=0.25 -> Top-1=0.3156 | Top-3=0.5499
[BLEND] alpha=0.50 -> Top-1=0.2405 | Top-3=0.5514
[BLEND] alpha=0.75 -> Top-1=0.2209 | Top-3=0.5402
[BLEND] alpha=1.00 -> Top-1=0.2077 | Top-3=0.4723

[BLEND] Melhor alpha=0.25 | Top-1=0.3156 | Top-3=0.5499
[OK] predicoes_holdout_blend.csv salvo (176,010 linhas)
[SCORE BLEND] 250,000/870,672
[SCORE BLEND] 500,000/870,672
[SCORE BLEND] 750,000/870,672
[SCORE BLEND] 870,672/870,672
[OK] predicoes_full_blend.csv salvo (870,672 linhas)


In [16]:
# Salva prior_map, global_prior e best_alpha dentro do artifacts.joblib
import joblib

arts = joblib.load("artifacts_destino_model/artifacts.joblib")

arts.update({
    "prior_map": {str(k): v.tolist() for k, v in prior_map.items()},
    "global_prior": global_prior.tolist(),
    "best_alpha": float(best["alpha"]),
    # garante que as classes também estejam salvas como lista
    "classes_model": arts.get("classes_model", classes_model).tolist() if hasattr(classes_model, "tolist") else list(classes_model),
})

joblib.dump(arts, "artifacts_destino_model/artifacts.joblib")
print("[OK] prior_map, global_prior, best_alpha gravados em artifacts.joblib")


[OK] prior_map, global_prior, best_alpha gravados em artifacts.joblib


In [18]:
# === Atualizar artifacts.joblib com prior_map / global_prior / best_alpha ===
import joblib, pandas as pd, numpy as np

art_path = "artifacts_destino_model/artifacts.joblib"
arts = joblib.load(art_path)

need_prior = not all(k in arts for k in ("prior_map","global_prior"))
need_alpha = "best_alpha" not in arts

# 1) Se já existirem em memória (variáveis do blend), usa-as
try:
    prior_map     # type: ignore
    global_prior  # type: ignore
    have_in_memory = True
except NameError:
    have_in_memory = False

if have_in_memory:
    # Garantir tipos serializáveis e alinhamento
    classes = np.array(arts["classes_model"])
    arts["prior_map"] = {str(k): np.array(v, dtype=float).tolist() for k, v in prior_map.items()}
    arts["global_prior"] = np.array(global_prior, dtype=float).tolist()
else:
    # 2) Reconstruir a partir do predicoes_full_blend.csv
    # (usa rótulos verdadeiros e alinha às classes do modelo)
    dfp = pd.read_csv("predicoes_full_blend.csv",
                      usecols=["origin_departure","destination_departure"])
    le_dest = arts["le_dest"]
    classes = np.array(arts["classes_model"])

    # Mapeia rótulos "crus" para o espaço do encoder (raras -> "__OUTROS__")
    def map_to_training(lbl: str):
        return lbl if lbl in le_dest.classes_ else "__OUTROS__"

    y_mapped = dfp["destination_departure"].astype(str).map(map_to_training)
    y_codes  = le_dest.transform(y_mapped)

    # posição de cada classe ativa na matriz de probas
    pos = {c:i for i, c in enumerate(classes)}

    # prior por origem (vetor no mesmo ordenamento de classes)
    prior_map_rec = {}
    for orig, grp in dfp.assign(yc=y_codes).groupby("origin_departure"):
        counts = np.zeros(len(classes), dtype=float)
        vals, cnts = np.unique(grp["yc"].values, return_counts=True)
        for v, cnt in zip(vals, cnts):
            if v in pos: counts[pos[v]] = cnt
        s = counts.sum()
        prior_map_rec[str(orig)] = (counts/s if s>0 else np.ones(len(classes))/len(classes)).tolist()

    # prior global
    g_counts = np.zeros(len(classes), dtype=float)
    vals, cnts = np.unique(y_codes, return_counts=True)
    for v, cnt in zip(vals, cnts):
        if v in pos: g_counts[pos[v]] = cnt
    g_prior = (g_counts / g_counts.sum()).tolist()

    arts["prior_map"]   = prior_map_rec
    arts["global_prior"]= g_prior

# 3) alpha (se faltar)
if need_alpha:
    # usa seu melhor alpha conhecido
    arts["best_alpha"] = 0.25

# 4) Garantir que classes_model está como lista
if hasattr(arts.get("classes_model"), "tolist"):
    arts["classes_model"] = arts["classes_model"].tolist()

joblib.dump(arts, art_path)
print("[OK] artifacts atualizados com prior_map, global_prior e best_alpha")



[OK] artifacts atualizados com prior_map, global_prior e best_alpha


In [19]:
import joblib
arts = joblib.load("artifacts_destino_model/artifacts.joblib")
print("tem prior_map?", "prior_map" in arts, "| tem global_prior?", "global_prior" in arts, "| best_alpha:", arts.get("best_alpha"))


tem prior_map? True | tem global_prior? True | best_alpha: 0.25


In [20]:
import json, numpy as np, pandas as pd
from joblib import load

# --- 1) Carrega artefatos e predições do holdout ---
arts = load("artifacts_destino_model/artifacts.joblib")
classes = np.array(arts["classes_model"])
le_dest = arts["le_dest"]

dfh = pd.read_csv("predicoes_holdout_blend.csv")
# colunas esperadas: destination_departure (true), pred_top1_blend, pred_top1_prob_blend, pred_topk_labels_blend, pred_topk_probs_blend
assert {"destination_departure","pred_top1_blend","pred_top1_prob_blend"}.issubset(dfh.columns)

# --- 2) Métricas globais já no arquivo (reconfere) ---
y_true = dfh["destination_departure"].astype(str).values
y_pred = dfh["pred_top1_blend"].astype(str).values
acc = (y_true == y_pred).mean()

# Top-3 do arquivo
def in_topk(row):
    try:
        labels = json.loads(row["pred_topk_labels_blend"])
        return row["destination_departure"] in labels
    except Exception:
        return False

top3 = dfh.apply(in_topk, axis=1).mean()
print(f"[CHECK] Top-1={acc:.4f} | Top-3={top3:.4f}")

# --- 3) Calibração do Top-1 ---
# binning da confiança do top1 vs acerto real
bins = np.linspace(0, 1, 11)  # 10 bins
dfh["conf"] = dfh["pred_top1_prob_blend"].astype(float)/100.0
dfh["ok"] = (dfh["destination_departure"] == dfh["pred_top1_blend"]).astype(int)
dfh["bin"] = pd.cut(dfh["conf"], bins, include_lowest=True)

calib = dfh.groupby("bin").agg(
    n=("ok","size"),
    conf_m=("conf","mean"),
    acc_m=("ok","mean")
).reset_index()
calib["gap"] = calib["acc_m"] - calib["conf_m"]
ece = (calib["n"] * calib["gap"].abs()).sum() / calib["n"].sum()
print(f"[CALIB] ECE (Top-1) = {ece:.4f}")
print(calib[["bin","n","conf_m","acc_m","gap"]])

# --- 4) Limiar de confiança ---
for tau in [0.10, 0.15, 0.20, 0.25]:
    mask = dfh["conf"] >= tau
    cov = mask.mean()
    acc_cond = dfh.loc[mask, "ok"].mean() if cov > 0 else np.nan
    print(f"[THRESH] τ={tau:.2f} -> cobertura={cov:.3f} | acc@conf≥τ={acc_cond:.3f}")

# --- 5) Métricas por segmentos (origem e mês/hora se existirem) ---
seg_cols = []
if "origin_departure" in dfh.columns: seg_cols.append("origin_departure")
if "purchase_datetime" in dfh.columns:
    ts = pd.to_datetime(dfh["purchase_datetime"], errors="coerce")
    dfh["month"] = ts.dt.month
    dfh["hour"]  = ts.dt.hour
    seg_cols += ["month","hour"]

for col in seg_cols:
    g = dfh.groupby(col)["ok"].agg(["size","mean"]).sort_values("size", ascending=False).head(15)
    print(f"\n[SEG] Top 15 {col} por volume:")
    print(g.rename(columns={"size":"n","mean":"acc"}))

# --- 6) Head vs Tail por frequência do destino (no TREINO) ---
# Reconstrói a frequência de classes do TREINO a partir do artifacts (se você salvou y_train_all, pule isso; aqui usamos prior global como proxy)
# Melhor: se você tiver um CSV com destino do TREINO, use-o. Como fallback, usamos o prior_global para ordenar classes (proxy).
prior = np.array(arts["global_prior"], dtype=float)
order = np.argsort(prior)[::-1]  # classes mais frequentes primeiro (aproximação)
# mapeia rótulo -> rank aproximado de frequência
rank = {le_dest.inverse_transform([c])[0]: i for i, c in enumerate(classes[order])}

dfh["freq_rank"] = dfh["destination_departure"].map(rank).fillna(len(rank)+1).astype(int)
# buckets (head <= 10%, mid 10-50%, tail > 50%)
q1, q5 = np.percentile(list(rank.values()), [10, 50])
def bucket(r):
    if r <= q1: return "head"
    if r <= q5: return "mid"
    return "tail"
dfh["bucket"] = dfh["freq_rank"].apply(bucket)
print("\n[HEAD/TAIL] acc por bucket:")
print(dfh.groupby("bucket")["ok"].agg(["size","mean"]).rename(columns={"size":"n","mean":"acc"}))

# --- 7) Drift simples (PSI) treino vs holdout em features numéricas se você tiver X_train_all/X_test salvos ---
# Se não tiver, pule. Exemplo de função PSI para uma coluna:
def psi(expected, actual, bins=10):
    e_bins = pd.qcut(expected, q=bins, duplicates='drop')
    a_bins = pd.cut(actual,   pd.IntervalIndex(e_bins.cat.categories))
    e_pct = e_bins.value_counts(normalize=True).sort_index()
    a_pct = a_bins.value_counts(normalize=True).reindex(e_pct.index).fillna(0)
    return ((a_pct - e_pct) * np.log((a_pct + 1e-9) / (e_pct + 1e-9))).sum()

# Exemplo (se você tiver as colunas brutas no holdout):
num_cols = [c for c in ["total_value","tickets_quantity","days_since_last","client_total_orders","client_avg_value"] if c in dfh.columns]
if num_cols:
    print("\n[DRIFT] PSI (usar treino real como 'expected' quando disponível):")
    # aqui só ilustramos PSI do holdout c/ ele mesmo (não faz sentido estatístico, apenas placeholder)
    for c in num_cols:
        v = dfh[c].dropna()
        if len(v) > 100:
            print(f"{c}: PSI≈{psi(v.sample(min(5000,len(v)), random_state=42), v.sample(min(5000,len(v)), random_state=43)):.4f} (placeholder)")


[CHECK] Top-1=0.0000 | Top-3=0.0000
[CALIB] ECE (Top-1) = 0.2845
             bin      n    conf_m  acc_m       gap
0  (-0.001, 0.1]     20  0.093415    0.0 -0.093415
1     (0.1, 0.2]  48929  0.159785    0.0 -0.159785
2     (0.2, 0.3]  81655  0.247037    0.0 -0.247037
3     (0.3, 0.4]  20329  0.342816    0.0 -0.342816
4     (0.4, 0.5]   9346  0.449467    0.0 -0.449467
5     (0.5, 0.6]   4261  0.545833    0.0 -0.545833
6     (0.6, 0.7]   4659  0.648238    0.0 -0.648238
7     (0.7, 0.8]   4198  0.744091    0.0 -0.744091
8     (0.8, 0.9]   1028  0.850412    0.0 -0.850412
9     (0.9, 1.0]   1585  0.987860    0.0 -0.987860
[THRESH] τ=0.10 -> cobertura=1.000 | acc@conf≥τ=0.000
[THRESH] τ=0.15 -> cobertura=0.894 | acc@conf≥τ=0.000
[THRESH] τ=0.20 -> cobertura=0.722 | acc@conf≥τ=0.000
[THRESH] τ=0.25 -> cobertura=0.501 | acc@conf≥τ=0.000

[SEG] Top 15 origin_departure por volume:
                                                        n  acc
origin_departure                                    

In [21]:
# === Avaliação robusta do holdout (corrige mismatch de rótulos) ===
import json, ast, numpy as np, pandas as pd
from joblib import load

ART = "artifacts_destino_model/artifacts.joblib"
CSV = "predicoes_holdout_blend.csv"

arts = load(ART)
le_dest = arts["le_dest"]
classes_codes = np.array(arts["classes_model"])  # códigos (inteiros) no mesmo eixo do predict_proba
classes_labels = le_dest.inverse_transform(classes_codes)  # nomes (strings) correspondentes

# Mapeadores úteis
code2label = {int(c): l for c, l in zip(classes_codes, classes_labels)}
labels_set = set(le_dest.classes_)  # universo de labels (strings) visto no treino (inclui "__OUTROS__")

dfh = pd.read_csv(CSV)

# --- Funções auxiliares -------------------------------------------------------
def normalize_true_labels(s: pd.Series) -> pd.Series:
    """Coloca y_true no mesmo espaço do treino: se não visto -> '__OUTROS__'."""
    s = s.astype(str).str.strip()
    mask_known = s.isin(labels_set)
    s = s.where(mask_known, "__OUTROS__")
    return s

def looks_numeric_series(s: pd.Series, thresh: float = 0.8) -> bool:
    """Retorna True se >= thresh dos valores forem parseáveis como inteiros."""
    sn = pd.to_numeric(s, errors="coerce")
    ratio = (~sn.isna()).mean()
    return ratio >= thresh

def decode_pred_series_to_labels(s: pd.Series) -> pd.Series:
    """
    Converte a coluna de predição Top-1 para labels (strings).
    - Se já são labels do treino, retorna como está.
    - Se parecem numéricas, faz mapping code->label.
    - Caso contrário, retorna strings limpas.
    """
    s_clean = s.astype(str).str.strip()
    if s_clean.isin(labels_set).mean() > 0.9:
        return s_clean
    if looks_numeric_series(s_clean):
        sn = pd.to_numeric(s_clean, errors="coerce").astype("Int64")
        return sn.map(code2label).fillna("__OUTROS__")
    return s_clean  # melhor esforço

def parse_labels_list(cell):
    """
    Lê a célula de Top-k:
    - Tenta json.loads
    - fallback: ast.literal_eval
    - Converte códigos -> labels se necessário
    - Retorna lista de strings
    """
    if pd.isna(cell):
        return []
    txt = str(cell).strip()
    if not txt:
        return []
    lst = None
    try:
        lst = json.loads(txt)
    except Exception:
        try:
            lst = ast.literal_eval(txt)
        except Exception:
            return []
    if not isinstance(lst, (list, tuple)):
        return []

    # Se for lista de números (ou strings numéricas), mapeia para labels
    arr = pd.Series(lst)
    if looks_numeric_series(arr, thresh=1.0):  # todos numéricos
        arr = pd.to_numeric(arr, errors="coerce").astype("Int64")
        return [code2label.get(int(x), "__OUTROS__") for x in arr.dropna().tolist()]
    # Se já são strings, padroniza e, se não pertencem ao universo, mantém assim (pode vir label hash já correto)
    return [str(x).strip() for x in arr.tolist()]

# --- Normalização das colunas relevantes --------------------------------------
assert "destination_departure" in dfh.columns, "CSV precisa ter destination_departure"
assert "pred_top1_blend" in dfh.columns, "CSV precisa ter pred_top1_blend"

y_true = normalize_true_labels(dfh["destination_departure"])
y_pred_top1 = decode_pred_series_to_labels(dfh["pred_top1_blend"])

# Top-3 (se existir no CSV)
has_topk = "pred_topk_labels_blend" in dfh.columns
if has_topk:
    topk_lists = dfh["pred_topk_labels_blend"].apply(parse_labels_list)
else:
    topk_lists = pd.Series([[]]*len(dfh))

# --- Métricas globais ---------------------------------------------------------
acc = (y_true == y_pred_top1).mean()
if has_topk:
    top3 = (pd.Series([yt in lst for yt, lst in zip(y_true, topk_lists)])).mean()
else:
    top3 = np.nan

print(f"[RESULT] Top-1={acc:.4f} | Top-3={0 if np.isnan(top3) else top3:.4f}")

# --- Calibração (se existir probabilidade do top1) ----------------------------
ece = np.nan
calib_df = None
if "pred_top1_prob_blend" in dfh.columns:
    conf = pd.to_numeric(dfh["pred_top1_prob_blend"], errors="coerce")/100.0
    ok = (y_true == y_pred_top1).astype(int)
    bins = np.linspace(0,1,11)
    binned = pd.cut(conf, bins, include_lowest=True)
    calib_df = pd.DataFrame({
        "n": ok.groupby(binned).size(),
        "conf_m": conf.groupby(binned).mean(),
        "acc_m": ok.groupby(binned).mean()
    }).reset_index().rename(columns={"index":"bin"})
    calib_df["gap"] = calib_df["acc_m"] - calib_df["conf_m"]
    ece = (calib_df["n"] * calib_df["gap"].abs()).sum() / calib_df["n"].sum()
    print(f"[CALIB] ECE={ece:.4f}")

# --- Segmentos (se colunas existirem) -----------------------------------------
seg_reports = {}
df_eval = pd.DataFrame({
    "y_true": y_true,
    "y_pred": y_pred_top1
})
# anexar colunas que existirem
for col in ("origin_departure","purchase_datetime"):
    if col in dfh.columns:
        df_eval[col] = dfh[col]

if "purchase_datetime" in df_eval.columns:
    ts = pd.to_datetime(df_eval["purchase_datetime"], errors="coerce")
    df_eval["month"] = ts.dt.month
    df_eval["hour"]  = ts.dt.hour

df_eval["ok"] = (df_eval["y_true"] == df_eval["y_pred"]).astype(int)

for seg in [c for c in ["origin_departure","month","hour"] if c in df_eval.columns]:
    g = df_eval.groupby(seg)["ok"].agg(n="size", acc="mean").reset_index().sort_values("n", ascending=False)
    seg_reports[seg] = g

# --- Salvar relatórios --------------------------------------------------------
out_dir = "eval_reports"
import os; os.makedirs(out_dir, exist_ok=True)

pd.DataFrame({
    "metric":["top1","top3","ece"],
    "value":[acc, float(top3) if not np.isnan(top3) else np.nan, ece]
}).to_csv(f"{out_dir}/summary_metrics.csv", index=False)

if calib_df is not None:
    calib_df.to_csv(f"{out_dir}/calibration_bins.csv", index=False)

for seg, g in seg_reports.items():
    g.to_csv(f"{out_dir}/segment_{seg}.csv", index=False)

print("[OK] Relatórios salvos em:", out_dir)


[RESULT] Top-1=0.0605 | Top-3=0.4995
[CALIB] ECE=0.2240
[OK] Relatórios salvos em: eval_reports


In [22]:
import json, ast, pandas as pd

def parse_list(cell):
    try:
        return json.loads(cell)
    except Exception:
        try:
            return ast.literal_eval(str(cell))
        except Exception:
            return []

dfh = pd.read_csv("predicoes_holdout_blend.csv")

# Top-k como lista
topk = dfh["pred_topk_labels_blend"].apply(parse_list)

# Checa concordância entre a coluna 'pred_top1_blend' e o 1º item da lista Top-k
agree = (dfh["pred_top1_blend"].astype(str)
         == topk.apply(lambda lst: str(lst[0]) if lst else "")).mean()
print(f"[CHECK] agreement pred_top1_blend vs topk[0]: {agree:.3f}")

# Se a concordância for < 0.9, vamos usar o topk[0] como top-1 "correto"
use_fixed_top1 = agree < 0.90
dfh["pred_top1_blend_fixed"] = topk.apply(lambda lst: str(lst[0]) if lst else "")

# Recalcular métricas com a coluna "fixa"
y_true = dfh["destination_departure"].astype(str)
top1_fixed = (y_true == dfh["pred_top1_blend_fixed"]).mean()
top3 = dfh.apply(lambda r: r["destination_departure"] in (parse_list(r["pred_topk_labels_blend"]) or []), axis=1).mean()

print(f"[RESULT] Top-1(recalc)= {top1_fixed:.4f} | Top-3= {top3:.4f}")

# (Opcional) salvar um CSV "corrigido" para evitar novos mismatches na equipe
dfh.to_csv("predicoes_holdout_blend_fixed.csv", index=False)
print("[OK] predicoes_holdout_blend_fixed.csv salvo")


[CHECK] agreement pred_top1_blend vs topk[0]: 1.000
[RESULT] Top-1(recalc)= 0.0000 | Top-3= 0.0000
[OK] predicoes_holdout_blend_fixed.csv salvo


In [23]:
# === Avaliação ALINHADA ao espaço de classes do modelo (códigos) ===
import json, ast, numpy as np, pandas as pd
from joblib import load

ART = "artifacts_destino_model/artifacts.joblib"
CSV = "predicoes_holdout_blend.csv"

arts = load(ART)
le_dest = arts["le_dest"]                      # LabelEncoder treinado do destino
classes_codes = np.array(arts["classes_model"])  # códigos inteiros usados no modelo
classes_labels = le_dest.inverse_transform(classes_codes)

# Mapas úteis
label2code = {lab: int(code) for lab, code in zip(classes_labels, classes_codes)}
code2label = {int(code): lab for lab, code in label2code.items()}
HAS_OUTROS = "__OUTROS__" in le_dest.classes_
OUTROS_CODE = label2code["__OUTROS__"] if HAS_OUTROS else None

def parse_list(cell):
    if pd.isna(cell): return []
    s = str(cell).strip()
    if not s: return []
    try:
        return json.loads(s)
    except Exception:
        try:
            return ast.literal_eval(s)
        except Exception:
            return []

def series_looks_numeric(s: pd.Series, thresh=0.8) -> bool:
    sn = pd.to_numeric(s, errors="coerce")
    return (~sn.isna()).mean() >= thresh

def to_codes_from_mixed(series) -> pd.Series:
    """
    Converte uma série de rótulos que podem estar como número (código) ou string (hash)
    para códigos do modelo. Valores fora do universo vão para __OUTROS__ (se existir).
    """
    s = series.copy()
    if series_looks_numeric(s):
        # Já parecem códigos
        sn = pd.to_numeric(s, errors="coerce").astype("Int64")
        if OUTROS_CODE is not None:
            sn = sn.where(~sn.isna(), OUTROS_CODE)
        return sn.astype(int)
    else:
        # Strings -> códigos via label2code
        s = s.astype(str).str.strip()
        if OUTROS_CODE is not None:
            return s.map(label2code).fillna(OUTROS_CODE).astype(int)
        else:
            # Se não houver __OUTROS__, dropamos linhas desconhecidas
            known = s.isin(label2code)
            print(f"[WARN] Sem __OUTROS__. Removendo {(~known).sum()} linhas com rótulos desconhecidos.")
            return s[known].map(label2code).astype(int)

def list_to_codes(lst):
    if not isinstance(lst, (list, tuple)): return []
    if len(lst) == 0: return []
    # Se forem números -> códigos direto
    all_num = True
    out = []
    for x in lst:
        try:
            xi = int(x)
            out.append(xi)
        except Exception:
            all_num = False
            break
    if all_num:
        return out
    # Se forem strings -> map
    mapped = []
    for x in lst:
        lab = str(x).strip()
        if lab in label2code:
            mapped.append(label2code[lab])
        elif OUTROS_CODE is not None:
            mapped.append(OUTROS_CODE)
    return mapped

# -------------------- Carrega CSV e normaliza --------------------
dfh = pd.read_csv(CSV)

assert "destination_departure" in dfh.columns, "CSV precisa de 'destination_departure'"
assert "pred_top1_blend" in dfh.columns, "CSV precisa de 'pred_top1_blend'"

y_true_codes  = to_codes_from_mixed(dfh["destination_departure"])
y_pred1_codes = to_codes_from_mixed(dfh["pred_top1_blend"])

if "pred_topk_labels_blend" in dfh.columns:
    topk_codes = dfh["pred_topk_labels_blend"].apply(parse_list).apply(list_to_codes)
else:
    topk_codes = pd.Series([[]]*len(dfh))

# Remove linhas inválidas (se aparecer algo NaN após conversão)
mask_valid = (~pd.isna(y_true_codes)) & (~pd.isna(y_pred1_codes))
n_drop = (~mask_valid).sum()
if n_drop > 0:
    print(f"[INFO] Removendo {n_drop} linhas inválidas após alinhamento.")
y_true_codes  = y_true_codes[mask_valid].astype(int)
y_pred1_codes = y_pred1_codes[mask_valid].astype(int)
topk_codes    = topk_codes[mask_valid].reset_index(drop=True)

# -------------------- Métricas --------------------
acc = (y_true_codes.values == y_pred1_codes.values).mean()
# Top-3
in_top3 = []
for yt, lst in zip(y_true_codes.values, topk_codes.values):
    in_top3.append(yt in (lst[:3] if lst else []))
top3 = np.mean(in_top3) if len(in_top3) else np.nan

print(f"[ALIGNED] Top-1={acc:.4f} | Top-3={0.0 if np.isnan(top3) else top3:.4f}")

# -------------------- (Opcional) Calibração do Top-1 --------------------
ece = np.nan
if "pred_top1_prob_blend" in dfh.columns:
    conf = pd.to_numeric(dfh.loc[mask_valid, "pred_top1_prob_blend"], errors="coerce")/100.0
    ok = (y_true_codes.values == y_pred1_codes.values).astype(int)
    bins = np.linspace(0,1,11)
    binned = pd.cut(conf, bins, include_lowest=True)
    calib = pd.DataFrame({
        "n": pd.Series(ok).groupby(binned).size(),
        "conf_m": conf.groupby(binned).mean(),
        "acc_m": pd.Series(ok).groupby(binned).mean()
    }).reset_index().rename(columns={"index":"bin"})
    calib["gap"] = calib["acc_m"] - calib["conf_m"]
    ece = (calib["n"] * calib["gap"].abs()).sum() / calib["n"].sum()
    print(f"[CALIB] ECE={ece:.4f}")

# -------------------- Salva CSV ALINHADO --------------------
aligned = pd.DataFrame({
    "y_true_code": y_true_codes.values,
    "y_pred1_code": y_pred1_codes.values,
    "in_top3": in_top3
})
# anexa, para conferência humana, as versões em label do par verdadeiro/predito:
aligned["y_true_label"]  = aligned["y_true_code"].map(code2label)
aligned["y_pred1_label"] = aligned["y_pred1_code"].map(code2label)

outf = "predicoes_holdout_blend_aligned.csv"
aligned.to_csv(outf, index=False)
print("[OK] Alinhado salvo em:", outf)


[ALIGNED] Top-1=0.3156 | Top-3=0.5499
[CALIB] ECE=0.0439
[OK] Alinhado salvo em: predicoes_holdout_blend_aligned.csv


In [26]:
import os
os.makedirs("eval_report", exist_ok=True)

thr_df.to_csv("eval_report/threshold_sweep.csv", index=False)
print("[OK] eval_report/threshold_sweep.csv salvo")


import json, ast, numpy as np, pandas as pd
from joblib import load

ART = "artifacts_destino_model/artifacts.joblib"
CSV = "predicoes_holdout_blend.csv"  # contém pred_top1_prob_blend (%)

arts = load(ART)
le_dest = arts["le_dest"]
classes_codes = np.array(arts["classes_model"])
classes_labels = le_dest.inverse_transform(classes_codes)
label2code = {lab:int(code) for lab,code in zip(classes_labels, classes_codes)}
HAS_OUTROS = "__OUTROS__" in le_dest.classes_
OUTROS_CODE = label2code["__OUTROS__"] if HAS_OUTROS else None

def series_looks_numeric(s, t=0.8): return (~pd.to_numeric(s, errors="coerce").isna()).mean()>=t
def to_codes_from_mixed(s):
    if series_looks_numeric(s):
        sn = pd.to_numeric(s, errors="coerce").astype("Int64")
        return sn.where(~sn.isna(), OUTROS_CODE).astype(int)
    s = s.astype(str).str.strip()
    return s.map(label2code).fillna(OUTROS_CODE).astype(int)

df = pd.read_csv(CSV)

y_true = to_codes_from_mixed(df["destination_departure"])
y_pred = to_codes_from_mixed(df["pred_top1_blend"])
conf   = pd.to_numeric(df["pred_top1_prob_blend"], errors="coerce")/100.0

mask = (~y_true.isna()) & (~y_pred.isna()) & (~conf.isna())
y_true = y_true[mask].values
y_pred = y_pred[mask].values
conf   = conf[mask].values

taus = np.round(np.linspace(0.05, 0.9, 18), 2)
rows = []
for t in taus:
    sel = conf >= t
    cov = sel.mean()                       # fração aceita (cobertura)
    acc = (y_true[sel] == y_pred[sel]).mean() if sel.any() else np.nan
    rows.append({"tau":float(t), "coverage":float(cov), "acc_given_tau":float(acc)})

thr_df = pd.DataFrame(rows)
thr_df.to_csv("eval_report/threshold_sweep.csv", index=False)
print(thr_df.head(10))
print("[OK] eval_report/threshold_sweep.csv salvo")


[OK] eval_report/threshold_sweep.csv salvo
    tau  coverage  acc_given_tau
0  0.05  1.000000       0.315596
1  0.10  0.999886       0.315597
2  0.15  0.893756       0.337804
3  0.20  0.722266       0.363891
4  0.25  0.501108       0.434615
5  0.30  0.258275       0.598759
6  0.35  0.190432       0.671699
7  0.40  0.142566       0.731200
8  0.45  0.115050       0.763654
9  0.50  0.089654       0.820722
[OK] eval_report/threshold_sweep.csv salvo


In [27]:
import os, json, ast
import numpy as np
import pandas as pd
from joblib import load

# ---------- Config ----------
ARTS_PATH = "artifacts_destino_model/artifacts.joblib"
INPUT_CSV = "predicoes_full_blend.csv"          # ou "predicoes_holdout_blend.csv"
OUTPUT_CSV = "predicoes_full_blend_thresholded.csv"
TAU_ACCEPT = 0.35   # aceita previsão do modelo se conf >= 0.35
TAU_PRIOR  = 0.20   # se 0.20 <= conf < 0.35 usa prior por origem; abaixo disso: abstém

# ---------- Utilidades ----------
def parse_list(cell):
    if pd.isna(cell): return []
    s = str(cell).strip()
    if not s: return []
    try:
        return json.loads(s)
    except Exception:
        try:
            return ast.literal_eval(s)
        except Exception:
            return []

# ---------- Carrega artefatos ----------
arts = load(ARTS_PATH)
le_dest   = arts["le_dest"]
classes   = np.array(arts["classes_model"])          # códigos ativos
code2lab  = {int(c): lab for c, lab in zip(classes, le_dest.inverse_transform(classes))}
lab2code  = {v:k for k,v in code2lab.items()}

prior_map = arts.get("prior_map", {})                # dict: origem_label -> destino_label (moda no treino)
global_prior = arts.get("global_prior", None)        # label mais frequente global

# ---------- Carrega previsões ----------
dfp = pd.read_csv(INPUT_CSV)

# normaliza prob para [0,1]
if "pred_top1_prob_blend" not in dfp.columns:
    raise ValueError(f"{INPUT_CSV} não tem coluna 'pred_top1_prob_blend'")

conf = pd.to_numeric(dfp["pred_top1_prob_blend"], errors="coerce")
# se vier em %, converte para [0,1]
if conf.max() > 1.0:
    conf = conf / 100.0

# top1 previsto (pode ser label ou código)
pred_top1 = dfp["pred_top1_blend"].astype(str)

# origem (para fallback)
origin_col = "origin_departure"
has_origin = origin_col in dfp.columns
if not has_origin:
    print("[AVISO] CSV não tem 'origin_departure'. Fallback usará apenas 'global_prior'.")
orig = dfp[origin_col].astype(str) if has_origin else pd.Series(["__NO_ORIGIN__"]*len(dfp))

# ---------- Função de fallback ----------
def choose_fallback(origin_label: str):
    # prior por origem -> se não houver, global
    if origin_label in prior_map:
        return prior_map[origin_label]
    return global_prior

# ---------- Decisão final ----------
# Regra:
# conf >= TAU_ACCEPT  -> usa modelo
# TAU_PRIOR <= conf < TAU_ACCEPT -> usa prior de origem (ou global)
# conf < TAU_PRIOR -> abstém (sem previsão final; você pode marcar como NA)
decision = np.where(conf >= TAU_ACCEPT, "model",
             np.where(conf >= TAU_PRIOR, "prior", "abstain"))

final_label = []
final_proba = []
for i, dec in enumerate(decision):
    if dec == "model":
        lbl = str(pred_top1.iloc[i])
        p   = float(conf.iloc[i])
    elif dec == "prior":
        lbl = choose_fallback(str(orig.iloc[i]))
        p   = np.nan   # prob. do prior (opcional: você pode preencher com frequência normalizada)
    else:
        lbl = np.nan
        p   = np.nan
    final_label.append(lbl)
    final_proba.append(p)

out = dfp.copy()
out["decision"] = decision
out["final_pred_label"] = final_label
out["final_pred_prob"]  = final_proba

# (opcional) mantenha também top-k do modelo para explicar casos aceitos
if "pred_topk_labels_blend" in out.columns:
    # já está como string/lista no CSV de origem
    pass

# ---------- Métricas se houver ground truth ----------
acc_all = np.nan
acc_model_only = np.nan
coverage_accept = (decision == "model").mean()
coverage_accept_or_prior = (decision != "abstain").mean()

if "destination_departure" in out.columns:
    y_true = out["destination_departure"].astype(str)

    mask_all = ~y_true.isna() & ~pd.isna(out["final_pred_label"])
    if mask_all.any():
        acc_all = (y_true[mask_all].values == out.loc[mask_all, "final_pred_label"].astype(str).values).mean()

    mask_model = (decision == "model")
    if mask_model.any():
        acc_model_only = (y_true[mask_model].values == out.loc[mask_model, "final_pred_label"].astype(str).values).mean()

    print(f"[METRICS] Cobertura(model)={coverage_accept:.3f} | Cobertura(model+prior)={coverage_accept_or_prior:.3f}")
    print(f"[METRICS] Acc (model-only)={acc_model_only if not np.isnan(acc_model_only) else 'NA'}")
    print(f"[METRICS] Acc (aceitos: model/prior)={acc_all if not np.isnan(acc_all) else 'NA'}")
else:
    print(f"[INFO] Sem rótulo verdadeiro no CSV. Cobertura(model)={coverage_accept:.3f} | Cobertura(total aceitos)={coverage_accept_or_prior:.3f}")

# ---------- Salva ----------
out.to_csv(OUTPUT_CSV, index=False)
print(f"[OK] CSV final salvo: {OUTPUT_CSV}")


[METRICS] Cobertura(model)=0.193 | Cobertura(model+prior)=0.722
[METRICS] Acc (model-only)=0.0
[METRICS] Acc (aceitos: model/prior)=0.0
[OK] CSV final salvo: predicoes_full_blend_thresholded.csv


In [28]:
# ----------------------------
# ALINHAMENTO + MÉTRICAS
# ----------------------------
# coberturas já calculadas:
# coverage_accept = (decision == "model").mean()
# coverage_accept_or_prior = (decision != "abstain").mean()

acc_all = np.nan
acc_model_only = np.nan

# Conjunto de labels que o modelo realmente conhece
model_labels = set(code2lab.values())  # strings invertidas do encoder (mesmas que o modelo preve)

if "destination_departure" in out.columns:
    # Ground truth bruto em string e sem espaços
    y_true_raw = out["destination_departure"].astype(str).str.strip()

    # Alinha o ground truth ao espaço de classes do modelo
    # Tudo que não estiver no espaço do modelo vira "__OUTROS__"
    def align_gt(lbl):
        return lbl if lbl in model_labels else "__OUTROS__"

    out["y_true_aligned"] = y_true_raw.apply(align_gt)

    # Normaliza a previsão final para string/strip
    out["final_pred_label"] = out["final_pred_label"].astype(str).str.strip()

    # Métrica nos exemplos aceitos (model OU prior)
    m_all = (~out["final_pred_label"].isna()) & (out["decision"] != "abstain")
    if m_all.any():
        acc_all = (out.loc[m_all, "final_pred_label"] == out.loc[m_all, "y_true_aligned"]).mean()

    # Métrica só nos aceitos pelo MODELO (sem prior)
    m_model = (out["decision"] == "model")
    if m_model.any():
        acc_model_only = (out.loc[m_model, "final_pred_label"] == out.loc[m_model, "y_true_aligned"]).mean()

    print(f"[METRICS] Cobertura(model)={coverage_accept:.3f} | Cobertura(model+prior)={coverage_accept_or_prior:.3f}")
    print(f"[METRICS] Acc (model-only)={acc_model_only if not np.isnan(acc_model_only) else 'NA'}")
    print(f"[METRICS] Acc (aceitos: model/prior)={acc_all if not np.isnan(acc_all) else 'NA'}")
else:
    print(f"[INFO] CSV sem ground truth ('destination_departure').")
    print(f"[INFO] Cobertura(model)={coverage_accept:.3f} | Cobertura(total aceitos)={coverage_accept_or_prior:.3f}")

# Salva já com y_true_aligned (útil pra auditoria)
out.to_csv(OUTPUT_CSV, index=False)
print(f"[OK] CSV final salvo: {OUTPUT_CSV}")


[METRICS] Cobertura(model)=0.193 | Cobertura(model+prior)=0.722
[METRICS] Acc (model-only)=0.04022967987623468
[METRICS] Acc (aceitos: model/prior)=0.010750345836447187
[OK] CSV final salvo: predicoes_full_blend_thresholded.csv


In [32]:
import os, json, ast
import numpy as np
import pandas as pd
from joblib import load

# =======================
# CONFIG
# =======================
ARTS_PATH   = "artifacts_destino_model/artifacts.joblib"
FULL_CSV    = "predicoes_full_blend.csv"      # já existente
OUT_DIR     = "eval_report"

MODE        = "no_prior"                      # manter simples
TAU_ACCEPT  = 0.35                            # threshold de aceitação do modelo

os.makedirs(OUT_DIR, exist_ok=True)

# =======================
# Carregar artefatos (para alinhar labels)
# =======================
arts = load(ARTS_PATH)
le_dest      = arts["le_dest"]
classes      = np.array(arts["classes_model"])
model_labels = set(le_dest.inverse_transform(classes))

def align_label(lbl: str) -> str:
    return lbl if lbl in model_labels else "__OUTROS__"

# =======================
# 1) Ler o FULL e padronizar colunas
# =======================
df = pd.read_csv(FULL_CSV)

# Predição top-1 — nomes candidatos (o seu já tem 'pred_top1_blend')
PRED_CANDIDATES = [
    "pred_top1_blend","pred_top1","pred_label","prediction","pred",
    "pred_blend_top1","pred_top1_model","pred_model_top1"
]
pred_col = next((c for c in PRED_CANDIDATES if c in df.columns), None)
if pred_col is None:
    raise ValueError(f"Não achei a coluna de predição top-1. Procurei: {PRED_CANDIDATES}.")
df["pred_top1_blend"] = df[pred_col].astype(str).str.strip()

# Confiança top-1 — adicionar 'pred_top1_prob_blend' aqui
CONF_CANDIDATES = [
    "pred_top1_prob_blend",     # <-- seu caso
    "conf_top1_blend","conf_top1","conf_blend","confidence",
    "prob_top1_blend","prob_top1","score_top1","model_conf","conf_model_top1"
]
conf_col = next((c for c in CONF_CANDIDATES if c in df.columns), None)

if conf_col is None and "pred_topk_probs_blend" in df.columns:
    # Extrai a maior prob do array/string de probs
    def _extract_max_prob(x):
        # aceita lista python em string: "[0.12, 0.08, ...]"
        if isinstance(x, str):
            try:
                arr = ast.literal_eval(x)
            except Exception:
                return np.nan
        else:
            arr = x
        try:
            return float(np.max(arr))
        except Exception:
            return np.nan
    df["conf_top1_blend"] = df["pred_topk_probs_blend"].apply(_extract_max_prob).fillna(0.0)
else:
    if conf_col is None:
        found = list(df.columns)
        raise ValueError(
            "Não encontrei coluna de confiança. Tente salvar uma das seguintes no CSV:\n"
            "  - pred_top1_prob_blend  (recomendado)\n"
            "  - conf_top1_blend / prob_top1_blend / ...\n"
            f"Colunas existentes: {found[:50]}{' ...' if len(found)>50 else ''}"
        )
    df["conf_top1_blend"] = pd.to_numeric(df[conf_col], errors="coerce").fillna(0.0)

# =======================
# 2) Threshold simples
# =======================
decision = np.where(df["conf_top1_blend"] >= TAU_ACCEPT, "model", "abstain")
df["decision"] = decision
df["final_pred_label"] = np.where(df["decision"]=="model", df["pred_top1_blend"], "")

# =======================
# 3) Métricas (se houver y_true no CSV)
# =======================
coverage_model = (df["decision"] == "model").mean()
acc_all = acc_model_only = np.nan

if "destination_departure" in df.columns:
    y_true_raw = df["destination_departure"].astype(str).str.strip()
    df["y_true_aligned"] = y_true_raw.apply(align_label)

    m_model = (df["decision"] == "model")
    if m_model.any():
        acc_model_only = (df.loc[m_model, "final_pred_label"] == df.loc[m_model, "y_true_aligned"]).mean()
    # como não estamos usando prior aqui, "aceitos" == "model"
    acc_all = acc_model_only

    print(f"[METRICS] Cobertura(model)={coverage_model:.3f}")
    print(f"[METRICS] Acc(model-only)={acc_model_only if not np.isnan(acc_model_only) else 'NA'}")
else:
    print(f"[INFO] Sem ground truth no FULL. Cobertura(model)={coverage_model:.3f}")

# =======================
# 4) Salvar resultado + métricas
# =======================
suffix = f"{MODE}_t{TAU_ACCEPT:.2f}"
OUT_CSV = f"predicoes_full_blend_thresholded_{suffix}.csv"
df.to_csv(OUT_CSV, index=False)
print(f"[OK] CSV final salvo: {OUT_CSV}")

summary = {
    "mode": MODE,
    "tau_accept": TAU_ACCEPT,
    "coverage_model": float(coverage_model),
    "acc_all": None if np.isnan(acc_all) else float(acc_all),
    "acc_model_only": None if np.isnan(acc_model_only) else float(acc_model_only),
    "n_rows": int(len(df)),
    "used_pred_col": pred_col,
    "used_conf_col": conf_col if conf_col is not None else "pred_topk_probs_blend[max]",
}
with open(os.path.join(OUT_DIR, f"metrics_{suffix}.json"), "w", encoding="utf-8") as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)
print(f"[OK] Métricas salvas em: {os.path.join(OUT_DIR, f'metrics_{suffix}.json')}")


[METRICS] Cobertura(model)=1.000
[METRICS] Acc(model-only)=0.06094947351011632
[OK] CSV final salvo: predicoes_full_blend_thresholded_no_prior_t0.35.csv
[OK] Métricas salvas em: eval_report\metrics_no_prior_t0.35.json


In [33]:
import os, ast, json
import numpy as np, pandas as pd
from joblib import load

ARTS_PATH = "artifacts_destino_model/artifacts.joblib"
FULL_CSV  = "predicoes_full_blend.csv"
OUT_DIR   = "eval_report"
os.makedirs(OUT_DIR, exist_ok=True)

arts = load(ARTS_PATH)
le_dest = arts["le_dest"]
classes = np.array(arts["classes_model"])
model_labels = set(le_dest.inverse_transform(classes))
align = lambda s: s if s in model_labels else "__OUTROS__"

df = pd.read_csv(FULL_CSV)
df["pred_top1_blend"] = df["pred_top1_blend"].astype(str).str.strip()

# pega confiança (se tiver outro nome, ajuste aqui)
if "pred_top1_prob_blend" in df.columns:
    df["conf_top1_blend"] = pd.to_numeric(df["pred_top1_prob_blend"], errors="coerce").fillna(0.0)
elif "pred_topk_probs_blend" in df.columns:
    df["conf_top1_blend"] = df["pred_topk_probs_blend"].apply(lambda x: float(np.max(ast.literal_eval(x))))
else:
    raise ValueError("Não achei coluna de confiança (ex.: pred_top1_prob_blend).")

have_gt = "destination_departure" in df.columns
if have_gt:
    df["y_true_aligned"] = df["destination_departure"].astype(str).str.strip().apply(align)

rows = []
for tau in np.arange(0.05, 0.55, 0.05):
    accept = df["conf_top1_blend"] >= tau
    cov = float(accept.mean())
    if have_gt and accept.any():
        acc = float((df.loc[accept,"pred_top1_blend"] == df.loc[accept,"y_true_aligned"]).mean())
    else:
        acc = np.nan
    rows.append({"tau":round(float(tau),2), "coverage":cov, "acc_given_tau":acc})

thr = pd.DataFrame(rows)
thr.to_csv(os.path.join(OUT_DIR,"threshold_sweep_full_blend.csv"), index=False)
print(thr)


    tau  coverage  acc_given_tau
0  0.05       1.0       0.060949
1  0.10       1.0       0.060949
2  0.15       1.0       0.060949
3  0.20       1.0       0.060949
4  0.25       1.0       0.060949
5  0.30       1.0       0.060949
6  0.35       1.0       0.060949
7  0.40       1.0       0.060949
8  0.45       1.0       0.060949
9  0.50       1.0       0.060949


In [34]:
import os, ast, json
import numpy as np
import pandas as pd
from joblib import load

# -------------------------
# Config
# -------------------------
FULL_CSV  = "predicoes_full_blend.csv"
ARTS_PATH = "artifacts_destino_model/artifacts.joblib"
OUT_DIR   = "eval_report"
TAU_ACCEPT = 0.50  # escolha seu tau (depois de olhar a curva)
os.makedirs(OUT_DIR, exist_ok=True)

# -------------------------
# Funções auxiliares
# -------------------------
def find_conf_column(df):
    cand = [
        "pred_top1_prob_blend",   # <- seu caso
        "conf_top1_blend","conf_top1","conf_blend","confidence",
        "prob_top1_blend","prob_top1","score_top1","model_conf","conf_model_top1"
    ]
    for c in cand:
        if c in df.columns:
            return c
    return None

def normalize_conf(s: pd.Series) -> pd.Series:
    # to numeric (coerce), e tenta normalizar
    x = pd.to_numeric(s, errors="coerce")
    # Heurística: se já está em [0,1], não faz nada
    q95 = x.quantile(0.95)
    mx  = float(x.max())
    if (q95 <= 1.0 and mx <= 1.0):
        return x.clip(0,1)

    # Se parece estar em % (0–100), normaliza
    if mx <= 100.0:
        y = (x / 100.0).clip(0,1)
        print(f"[INFO] Confiança parece estar em %. Normalizado: max={mx:.2f} → agora max={y.max():.4f}")
        return y

    # Caso extremo: valores >100 (algo errado), normaliza por max para evitar tudo=1
    y = (x / (mx if mx != 0 else 1.0)).clip(0,1)
    print(f"[AVISO] Confiança com escala >100. Normalização por max={mx:.2f}.")
    return y

def align_label(lbl: str, model_labels: set) -> str:
    return lbl if lbl in model_labels else "__OUTROS__"

# -------------------------
# Carregar artefatos (labels válidas)
# -------------------------
arts = load(ARTS_PATH)
le_dest = arts["le_dest"]
classes = np.array(arts["classes_model"])
model_labels = set(le_dest.inverse_transform(classes))

# -------------------------
# Ler full e padronizar colunas
# -------------------------
df = pd.read_csv(FULL_CSV)

# Predição top-1
if "pred_top1_blend" not in df.columns:
    raise ValueError("Não encontrei 'pred_top1_blend' no FULL.")

df["pred_top1_blend"] = df["pred_top1_blend"].astype(str).str.strip()

# Confiança top-1
conf_col = find_conf_column(df)
if conf_col is None and "pred_topk_probs_blend" in df.columns:
    # Extrai a maior prob da lista (string) como fallback
    def _extract_max_prob(x):
        try:
            arr = ast.literal_eval(x) if isinstance(x,str) else x
            return float(np.max(arr))
        except Exception:
            return np.nan
    conf_raw = df["pred_topk_probs_blend"].apply(_extract_max_prob)
else:
    if conf_col is None:
        raise ValueError("Não achei coluna de confiança (ex.: 'pred_top1_prob_blend').")
    conf_raw = df[conf_col]

# Normaliza confiança para [0,1]
df["conf_top1_blend"] = normalize_conf(conf_raw)

# -------------------------
# Sweep de thresholds (se tiver y_true)
# -------------------------
have_gt = "destination_departure" in df.columns
rows = []
if have_gt:
    df["y_true_aligned"] = df["destination_departure"].astype(str).str.strip().apply(lambda z: align_label(z, model_labels))

for tau in np.arange(0.05, 0.95+1e-9, 0.05):
    accept = df["conf_top1_blend"] >= tau
    cov = float(accept.mean())
    if have_gt and accept.any():
        acc = float((df.loc[accept,"pred_top1_blend"] == df.loc[accept,"y_true_aligned"]).mean())
    else:
        acc = np.nan
    rows.append({"tau": round(float(tau),2), "coverage": cov, "acc_given_tau": acc})

thr = pd.DataFrame(rows)
thr_path = os.path.join(OUT_DIR, "threshold_sweep_full_blend_fixed.csv")
thr.to_csv(thr_path, index=False)
print(thr.head(12))
print(f"[OK] {thr_path} salvo")

# -------------------------
# Gerar CSV final thresholded
# -------------------------
df["decision"] = np.where(df["conf_top1_blend"] >= TAU_ACCEPT, "model", "abstain")
df["final_pred_label"] = np.where(df["decision"]=="model", df["pred_top1_blend"], "")

out_csv = f"predicoes_full_blend_thresholded_no_prior_t{TAU_ACCEPT:.2f}_fixed.csv"
df.to_csv(out_csv, index=False)
print(f"[OK] CSV final salvo: {out_csv}")

# Métricas simples (se GT existir)
if have_gt:
    m = df["decision"]=="model"
    cov = float(m.mean())
    acc_model = float((df.loc[m,"final_pred_label"] == df.loc[m,"y_true_aligned"]).mean()) if m.any() else np.nan
    print(f"[METRICS] Cobertura(model)={cov:.3f} | Acc(model-only)={acc_model if not np.isnan(acc_model) else 'NA'}")

    with open(os.path.join(OUT_DIR, f"metrics_no_prior_t{TAU_ACCEPT:.2f}_fixed.json"), "w", encoding="utf-8") as f:
        json.dump({"tau":TAU_ACCEPT,"coverage_model":cov,"acc_model_only":None if np.isnan(acc_model) else acc_model}, f, indent=2, ensure_ascii=False)


[INFO] Confiança parece estar em %. Normalizado: max=100.00 → agora max=1.0000
     tau  coverage  acc_given_tau
0   0.05  1.000000       0.060949
1   0.10  0.999956       0.060951
2   0.15  0.894498       0.063629
3   0.20  0.722327       0.039489
4   0.25  0.501794       0.047425
5   0.30  0.259833       0.034518
6   0.35  0.192917       0.040234
7   0.40  0.143620       0.042872
8   0.45  0.116343       0.029695
9   0.50  0.090623       0.031710
10  0.55  0.076121       0.034839
11  0.60  0.065925       0.037440
[OK] eval_report\threshold_sweep_full_blend_fixed.csv salvo
[OK] CSV final salvo: predicoes_full_blend_thresholded_no_prior_t0.50_fixed.csv
[METRICS] Cobertura(model)=0.091 | Acc(model-only)=0.03170982091935668


In [35]:
import os, ast, json
import numpy as np
import pandas as pd
from joblib import load

# -------------------------
# Config
# -------------------------
FULL_CSV  = "predicoes_full_blend.csv"  # tem 'pred_top1_blend' e 'pred_top1_prob_blend'
ARTS_PATH = "artifacts_destino_model/artifacts.joblib"
OUT_DIR   = "eval_report"
TAU_ACCEPT = 0.50  # ajuste depois de ver a curva IN-VOCAB
os.makedirs(OUT_DIR, exist_ok=True)

# -------------------------
# Helpers
# -------------------------
def find_conf_column(df):
    cand = [
        "pred_top1_prob_blend",
        "conf_top1_blend","conf_top1","conf_blend","confidence",
        "prob_top1_blend","prob_top1","score_top1","model_conf","conf_model_top1"
    ]
    for c in cand:
        if c in df.columns:
            return c
    return None

def normalize_conf(s: pd.Series) -> pd.Series:
    x = pd.to_numeric(s, errors="coerce")
    q95 = x.quantile(0.95)
    mx  = float(x.max())
    if (q95 <= 1.0 and mx <= 1.0):
        return x.clip(0,1)
    if mx <= 100.0:
        print(f"[INFO] Confiança parece estar em %. Normalizado: max={mx:.2f} → agora max=1.0000")
        return (x/100.0).clip(0,1)
    print(f"[AVISO] Escala estranha (>100). Normalizando por max={mx:.2f}.")
    return (x/(mx if mx else 1.0)).clip(0,1)

# -------------------------
# Carregar artefatos (labels válidas do modelo)
# -------------------------
arts = load(ARTS_PATH)
le_dest = arts["le_dest"]
classes = np.array(arts["classes_model"])          # índices das classes que o modelo usa
model_labels = set(le_dest.inverse_transform(classes))  # nomes (hashes) dessas classes

# -------------------------
# Ler full e padronizar colunas
# -------------------------
df = pd.read_csv(FULL_CSV)
if "pred_top1_blend" not in df.columns:
    raise ValueError("predicoes_full_blend.csv precisa ter 'pred_top1_blend'.")

df["pred_top1_blend"] = df["pred_top1_blend"].astype(str).str.strip()

# Confiança top-1
conf_col = find_conf_column(df)
if conf_col is None and "pred_topk_probs_blend" in df.columns:
    def _extract_max_prob(x):
        try:
            arr = ast.literal_eval(x) if isinstance(x,str) else x
            return float(np.max(arr))
        except Exception:
            return np.nan
    conf_raw = df["pred_topk_probs_blend"].apply(_extract_max_prob)
else:
    if conf_col is None:
        raise ValueError("Não achei coluna de confiança (ex.: 'pred_top1_prob_blend').")
    conf_raw = df[conf_col]

df["conf_top1_blend"] = normalize_conf(conf_raw)

# -------------------------
# Construir máscara IN-VOCAB (onde existe y_true nas classes do modelo)
# -------------------------
have_gt = "destination_departure" in df.columns
if not have_gt:
    print("[AVISO] FULL sem ground-truth; não dá pra medir acurácia.")
    # mesmo assim gera thresholded por confiança
    mask_accept = df["conf_top1_blend"] >= TAU_ACCEPT
    df["decision"] = np.where(mask_accept, "model", "abstain")
    df["final_pred_label"] = np.where(mask_accept, df["pred_top1_blend"], "")
    out_csv = f"predicoes_full_blend_thresholded_invoc_t{TAU_ACCEPT:.2f}.csv"
    df.to_csv(out_csv, index=False)
    print(f"[OK] {out_csv} salvo (sem métricas).")
else:
    df["destination_departure"] = df["destination_departure"].astype(str).str.strip()
    in_vocab = df["destination_departure"].isin(model_labels)
    out_vocab = ~in_vocab
    print(f"[SPLIT] IN-VOCAB={in_vocab.mean():.3f} | OUT-OF-VOCAB={out_vocab.mean():.3f}")

    # -------- Sweep só em IN-VOCAB --------
    rows = []
    for tau in np.arange(0.05, 0.95+1e-9, 0.05):
        accept = (df["conf_top1_blend"] >= tau) & in_vocab
        cov = float(accept.mean())  # cobertura SOBRE TODO FULL (pra ver impacto global)
        # acurácia condicional SOMENTE nas linhas aceitas IN-VOCAB
        if accept.any():
            acc = float((df.loc[accept,"pred_top1_blend"] == df.loc[accept,"destination_departure"]).mean())
        else:
            acc = np.nan
        rows.append({"tau": round(float(tau),2), "coverage_overall": cov, "acc_on_accepted_in_vocab": acc})
    thr_in = pd.DataFrame(rows)
    thr_in_path = os.path.join(OUT_DIR, "threshold_sweep_in_vocab.csv")
    thr_in.to_csv(thr_in_path, index=False)
    print(f"[OK] {thr_in_path} salvo")
    print(thr_in.head(12))

    # -------- Relato do OUT-OF-VOCAB (taxa de cauda) --------
    tail_rate = float(out_vocab.mean())
    with open(os.path.join(OUT_DIR,"tail_rate.json"),"w",encoding="utf-8") as f:
        json.dump({"tail_rate": tail_rate, "in_vocab_rate": 1.0-tail_rate}, f, indent=2, ensure_ascii=False)
    print(f"[TAIL] OUT-OF-VOCAB rate ~ {tail_rate:.3f} salvo em eval_report/tail_rate.json")

    # -------- CSV final thresholded (aceita só IN-VOCAB confiantes) --------
    accept_final = (df["conf_top1_blend"] >= TAU_ACCEPT) & in_vocab
    df["decision"] = np.where(accept_final, "model", "abstain")
    df["final_pred_label"] = np.where(accept_final, df["pred_top1_blend"], "")
    out_csv = f"predicoes_full_blend_thresholded_invoc_t{TAU_ACCEPT:.2f}.csv"
    df.to_csv(out_csv, index=False)
    print(f"[OK] CSV final salvo: {out_csv}")

    # Métricas resumidas
    cov_overall = float(accept_final.mean())
    if accept_final.any():
        acc_accept = float((df.loc[accept_final,"final_pred_label"] == df.loc[accept_final,"destination_departure"]).mean())
    else:
        acc_accept = np.nan
    with open(os.path.join(OUT_DIR, f"metrics_in_vocab_t{TAU_ACCEPT:.2f}.json"), "w", encoding="utf-8") as f:
        json.dump({
            "tau": TAU_ACCEPT,
            "coverage_overall": cov_overall,
            "acc_on_accepted_in_vocab": None if np.isnan(acc_accept) else acc_accept,
            "tail_rate": tail_rate
        }, f, indent=2, ensure_ascii=False)
    print(f"[METRICS] τ={TAU_ACCEPT:.2f} | cobertura_total={cov_overall:.3f} | acc(aceitos IN-VOCAB)={acc_accept if not np.isnan(acc_accept) else 'NA'}")


[INFO] Confiança parece estar em %. Normalizado: max=100.00 → agora max=1.0000
[SPLIT] IN-VOCAB=0.000 | OUT-OF-VOCAB=1.000
[OK] eval_report\threshold_sweep_in_vocab.csv salvo
     tau  coverage_overall  acc_on_accepted_in_vocab
0   0.05               0.0                       NaN
1   0.10               0.0                       NaN
2   0.15               0.0                       NaN
3   0.20               0.0                       NaN
4   0.25               0.0                       NaN
5   0.30               0.0                       NaN
6   0.35               0.0                       NaN
7   0.40               0.0                       NaN
8   0.45               0.0                       NaN
9   0.50               0.0                       NaN
10  0.55               0.0                       NaN
11  0.60               0.0                       NaN
[TAIL] OUT-OF-VOCAB rate ~ 1.000 salvo em eval_report/tail_rate.json
[OK] CSV final salvo: predicoes_full_blend_thresholded_invoc_t0.50.

In [36]:
import os, ast, json
import numpy as np
import pandas as pd
from joblib import load

FULL_CSV  = "predicoes_full_blend.csv"   # precisa ter pred_top1_blend e a coluna de confiança (ex: pred_top1_prob_blend)
ARTS_PATH = "artifacts_destino_model/artifacts.joblib"
OUT_DIR   = "eval_report"
TAU_ACCEPT = 0.50
os.makedirs(OUT_DIR, exist_ok=True)

def find_conf_column(df):
    cand = [
        "pred_top1_prob_blend",
        "conf_top1_blend","conf_top1","conf_blend","confidence",
        "prob_top1_blend","prob_top1","score_top1","model_conf","conf_model_top1"
    ]
    for c in cand:
        if c in df.columns:
            return c
    return None

def normalize_conf(s: pd.Series) -> pd.Series:
    x = pd.to_numeric(s, errors="coerce")
    q95 = x.quantile(0.95)
    mx  = float(x.max())
    if (q95 <= 1.0 and mx <= 1.0):
        return x.clip(0,1)
    if mx <= 100.0:
        print(f"[INFO] Confiança parece estar em %. Normalizado: max={mx:.2f} → agora max=1.0000")
        return (x/100.0).clip(0,1)
    print(f"[AVISO] Escala estranha (>100). Normalizando por max={mx:.2f}.")
    return (x/(mx if mx else 1.0)).clip(0,1)

# -------------------------
# 1) Artefatos e reconstrução robusta do vocabulário
# -------------------------
arts = load(ARTS_PATH)

def _to_set_str_lower(x):
    return set(map(lambda z: str(z).strip().lower(), x))

model_labels = set()

# a) nomes mantidos explicitamente (se existirem)
if "kept_label_names" in arts:
    model_labels |= _to_set_str_lower(arts["kept_label_names"])

# b) via le_dest + classes_model (índices ou nomes)
if "le_dest" in arts:
    le_dest = arts["le_dest"]
    if "classes_model" in arts:
        cm = np.array(arts["classes_model"])
        try:
            # se for numérico, inverta pelo encoder
            if np.issubdtype(cm.dtype, np.integer) or np.issubdtype(cm.dtype, np.floating):
                cm_int = cm.astype(int)
                inv = le_dest.inverse_transform(cm_int)
                model_labels |= _to_set_str_lower(inv)
            else:
                model_labels |= _to_set_str_lower(cm)
        except Exception as e:
            print("[AVISO] Falha ao inverse_transform classes_model; vou tentar interpretar como nomes. Erro:", e)
            model_labels |= _to_set_str_lower(cm)

# c) adiciona chaves dos priors (normalmente refletem o vocabulário treinado)
if "prior_map" in arts and isinstance(arts["prior_map"], dict):
    for src, dest_counts in arts["prior_map"].items():
        try:
            model_labels |= _to_set_str_lower(dest_counts.keys())
        except Exception:
            pass

if "global_prior" in arts and isinstance(arts["global_prior"], dict):
    model_labels |= _to_set_str_lower(arts["global_prior"].keys())

print(f"[VOCAB] Rótulos únicos recuperados: {len(model_labels)}")

# Sanidade: se ainda estiver vazio, aborta com instrução clara
if len(model_labels) == 0:
    raise RuntimeError("Vocabulário do modelo ficou vazio. Confira o artifacts.joblib: "
                       "precisa ter kept_label_names, ou classes_model + le_dest, ou prior_map/global_prior.")

# -------------------------
# 2) Ler FULL e normalizar colunas
# -------------------------
df = pd.read_csv(FULL_CSV)
if "pred_top1_blend" not in df.columns:
    raise ValueError("O CSV full precisa ter 'pred_top1_blend'.")

df["pred_top1_blend"] = df["pred_top1_blend"].astype(str).str.strip().str.lower()

# confiança top-1
conf_col = find_conf_column(df)
if conf_col is None and "pred_topk_probs_blend" in df.columns:
    def _extract_max_prob(x):
        try:
            arr = ast.literal_eval(x) if isinstance(x,str) else x
            return float(np.max(arr))
        except Exception:
            return np.nan
    conf_raw = df["pred_topk_probs_blend"].apply(_extract_max_prob)
else:
    if conf_col is None:
        raise ValueError("Não achei coluna de confiança (ex.: 'pred_top1_prob_blend').")
    conf_raw = df[conf_col]

df["conf_top1_blend"] = normalize_conf(conf_raw)

# ground-truth normalizado (se existir)
have_gt = "destination_departure" in df.columns
if not have_gt:
    print("[AVISO] FULL sem ground-truth; não dá pra medir acurácia.")
    mask_accept = (df["conf_top1_blend"] >= TAU_ACCEPT)
    df["decision"] = np.where(mask_accept, "model", "abstain")
    df["final_pred_label"] = np.where(mask_accept, df["pred_top1_blend"], "")
    out_csv = f"predicoes_full_blend_thresholded_invoc_t{TAU_ACCEPT:.2f}.csv"
    df.to_csv(out_csv, index=False)
    print(f"[OK] {out_csv} salvo (sem métricas).")
else:
    df["destination_departure"] = df["destination_departure"].astype(str).str.strip().str.lower()

    # -------------------------
    # 3) Split IN-VOCAB vs OUT-OF-VOCAB (robusto)
    # -------------------------
    in_vocab = df["destination_departure"].isin(model_labels)
    out_vocab = ~in_vocab

    # Diagnóstico se der baixo demais
    rate_in = float(in_vocab.mean())
    print(f"[SPLIT] IN-VOCAB={rate_in:.3f} | OUT-OF-VOCAB={float(out_vocab.mean()):.3f}")

    if rate_in == 0.0:
        # Mostra pistas para depurar
        gt_sample = set(df["destination_departure"].head(1000).unique())
        inter = gt_sample & model_labels
        print("[DEBUG] Nenhum match nos 1000 primeiros GT; exemplos de GT:", list(sorted(list(gt_sample))[:5]))
        print("[DEBUG] Exemplos do VOCAB:", list(sorted(list(model_labels))[:5]))
        print("→ Verifique se o FULL e o treino foram normalizados do mesmo jeito (trim/lower/hash).")
        # ainda assim, gera o CSV thresholded (tudo abstain)
        accept_final = (df["conf_top1_blend"] >= TAU_ACCEPT) & in_vocab
        df["decision"] = np.where(accept_final, "model", "abstain")
        df["final_pred_label"] = np.where(accept_final, df["pred_top1_blend"], "")
        out_csv = f"predicoes_full_blend_thresholded_invoc_t{TAU_ACCEPT:.2f}.csv"
        df.to_csv(out_csv, index=False)
        print(f"[OK] CSV final salvo (tudo abstain): {out_csv}")
    else:
        # -------------------------
        # 4) Sweep só em IN-VOCAB
        # -------------------------
        rows = []
        for tau in np.arange(0.05, 0.95 + 1e-9, 0.05):
            accept = (df["conf_top1_blend"] >= tau) & in_vocab
            cov = float(accept.mean())  # cobertura sobre o FULL (impacto global)
            if accept.any():
                acc = float((df.loc[accept, "pred_top1_blend"] ==
                             df.loc[accept, "destination_departure"]).mean())
            else:
                acc = np.nan
            rows.append({
                "tau": round(float(tau), 2),
                "coverage_overall": cov,
                "acc_on_accepted_in_vocab": acc
            })
        thr_in = pd.DataFrame(rows)
        thr_in_path = os.path.join(OUT_DIR, "threshold_sweep_in_vocab.csv")
        thr_in.to_csv(thr_in_path, index=False)
        print(f"[OK] {thr_in_path} salvo")
        print(thr_in.head(12))

        # -------------------------
        # 5) CSV final thresholded (aceita só IN-VOCAB confiantes)
        # -------------------------
        accept_final = (df["conf_top1_blend"] >= TAU_ACCEPT) & in_vocab
        df["decision"] = np.where(accept_final, "model", "abstain")
        df["final_pred_label"] = np.where(accept_final, df["pred_top1_blend"], "")
        out_csv = f"predicoes_full_blend_thresholded_invoc_t{TAU_ACCEPT:.2f}.csv"
        df.to_csv(out_csv, index=False)
        print(f"[OK] CSV final salvo: {out_csv}")

        # métricas resumidas
        cov_overall = float(accept_final.mean())
        if accept_final.any():
            acc_accept = float((df.loc[accept_final, "final_pred_label"] ==
                                df.loc[accept_final, "destination_departure"]).mean())
        else:
            acc_accept = np.nan
        with open(os.path.join(OUT_DIR, f"metrics_in_vocab_t{TAU_ACCEPT:.2f}.json"), "w", encoding="utf-8") as f:
            json.dump({
                "tau": TAU_ACCEPT,
                "coverage_overall": cov_overall,
                "acc_on_accepted_in_vocab": None if np.isnan(acc_accept) else acc_accept,
                "tail_rate": float(out_vocab.mean())
            }, f, indent=2, ensure_ascii=False)
        print(f"[METRICS] τ={TAU_ACCEPT:.2f} | cobertura_total={cov_overall:.3f} | acc(aceitos IN-VOCAB)={acc_accept if not np.isnan(acc_accept) else 'NA'}")


[VOCAB] Rótulos únicos recuperados: 333
[INFO] Confiança parece estar em %. Normalizado: max=100.00 → agora max=1.0000
[SPLIT] IN-VOCAB=0.000 | OUT-OF-VOCAB=1.000
[DEBUG] Nenhum match nos 1000 primeiros GT; exemplos de GT: ['1', '100', '102', '103', '104']
[DEBUG] Exemplos do VOCAB: ['011af72a910ac4acf367eef9e6b761e0980842c30d4e9809840f4141d5163ede', '01a0123885ebec5b37b52ddc058c20d052525c654b69e7b7bfd5feb291428bba', '0228374d12ee995cdeff8e25d819990f80d40c8773321fafe4ce1572c7df29af', '02d20bbd7e394ad5999a4cebabac9619732c343a4cac99470c03e23ba2bdc2bc', '0359bdb72fa3b3ee4adfdfcae848c4b5cc3c889bc7400efc01051e9cd67482c9']
→ Verifique se o FULL e o treino foram normalizados do mesmo jeito (trim/lower/hash).
[OK] CSV final salvo (tudo abstain): predicoes_full_blend_thresholded_invoc_t0.50.csv


In [37]:
# --- depois de:
# arts = load(ARTS_PATH)
# df = pd.read_csv(FULL_CSV)
# e antes de usar destination_departure para split/métricas ---

# 1) Garante que temos o encoder
if "le_dest" not in arts:
    raise RuntimeError("Artefatos não têm 'le_dest'. Não consigo decodificar rótulos numéricos do ground-truth.")

le_dest = arts["le_dest"]

# 2) Detecta e decodifica ground-truth numérico
def _looks_numeric_series(s: pd.Series) -> bool:
    # True se todos os valores forem inteiros válidos (inclusive strings de dígitos)
    try:
        tmp = pd.to_numeric(s, errors="coerce")
        return tmp.notna().all() and (tmp.astype(int) == tmp).all()
    except Exception:
        return False

if "destination_departure" in df.columns:
    gt = df["destination_departure"]

    if _looks_numeric_series(gt):
        # Converte para int e aplica inverse_transform
        gt_int = pd.to_numeric(gt, errors="coerce").astype("Int64")
        if gt_int.isna().any():
            raise ValueError("Há valores não numéricos em 'destination_departure' que não consigo converter.")

        # inverse_transform espera array de int sem NA
        gt_decoded = le_dest.inverse_transform(gt_int.astype(int).to_numpy())
        df["destination_departure"] = pd.Series(gt_decoded, index=df.index)
        print("[FIX] Ground-truth decodificado via le_dest.inverse_transform().")

    # Normalização final (igual ao treino)
    df["destination_departure"] = df["destination_departure"].astype(str).str.strip().str.lower()

# 3) Também normalize as colunas de predição/confiança (mantendo o que você já fazia)
df["pred_top1_blend"] = df["pred_top1_blend"].astype(str).str.strip().str.lower()

# reusa sua função find_conf_column/normalize_conf para montar df["conf_top1_blend"]...


[FIX] Ground-truth decodificado via le_dest.inverse_transform().


In [38]:
import os, json, math, ast
import numpy as np
import pandas as pd

# ====== ENTRADAS ======
ARTS_PATH   = "artifacts_destino_model/artifacts.joblib"
FULL_CSV    = "predicoes_full_blend.csv"   # já carregado no seu fluxo
OUT_DIR     = "eval_report"
TAU_DEFAULT = 0.35                         # ajuste se desejar

# Se ainda não tiver em memória:
# from joblib import load
# arts = load(ARTS_PATH)
# df   = pd.read_csv(FULL_CSV)

# ---------- 1) Normalizações e vocabulário ----------
# GT já foi decodificado via le_dest.inverse_transform() no seu passo anterior.
# Ainda assim, garantimos normalização idêntica:
df["destination_departure"] = df["destination_departure"].astype(str).str.strip().str.lower()
df["pred_top1_blend"]       = df["pred_top1_blend"].astype(str).str.strip().str.lower()

# Vocabulário do modelo
model_labels = set([str(x).strip().lower() for x in arts.get("classes_model", [])])
if not model_labels:
    # fallback: do encoder
    le_dest = arts["le_dest"]
    model_labels = set([str(x).strip().lower() for x in le_dest.classes_])

# ---------- 2) Confiança (detecta a coluna automaticamente) ----------
CONF_CANDIDATES = [
    "conf_top1_blend", "pred_top1_prob_blend", "conf_top1", "confidence",
    "prob_top1_blend", "prob_top1", "score_top1", "model_conf"
]
conf_col = None
for c in CONF_CANDIDATES:
    if c in df.columns:
        conf_col = c
        break
if conf_col is None:
    raise ValueError(f"Não encontrei coluna de confiança. Tentei: {CONF_CANDIDATES}. "
                     f"Colunas no CSV: {list(df.columns)[:40]}{' ...' if df.shape[1]>40 else ''}")

conf = pd.to_numeric(df[conf_col], errors="coerce")
# Normaliza se vier em %
if conf.max() > 1.5:
    print(f"[INFO] Confiança em % detectada (max={conf.max():.2f}). Convertendo para [0,1].")
    conf = conf / 100.0
df["conf_top1_blend"] = conf.clip(0, 1)

# ---------- 3) Top-k (se disponível) ----------
def _safe_parse_list(x):
    # aceita string "[...]" ou já lista/array
    if isinstance(x, (list, tuple, np.ndarray)):
        return list(x)
    if isinstance(x, str):
        try:
            return ast.literal_eval(x)
        except Exception:
            return []
    return []

has_topk = ("pred_topk_labels_blend" in df.columns) and ("pred_topk_probs_blend" in df.columns)
if has_topk:
    topk_labels = df["pred_topk_labels_blend"].apply(_safe_parse_list).apply(
        lambda arr: [str(z).strip().lower() for z in arr]
    )
    topk_probs  = df["pred_topk_probs_blend"].apply(_safe_parse_list)
else:
    topk_labels = None
    topk_probs  = None

# ---------- 4) Split IN-VOCAB ----------
in_vocab_mask = df["destination_departure"].isin(model_labels)
inv_rate = 1.0 - in_vocab_mask.mean()
print(f"[SPLIT] IN-VOCAB={in_vocab_mask.mean():.3f} | OUT-OF-VOCAB={inv_rate:.3f}")

os.makedirs(OUT_DIR, exist_ok=True)
# Pequeno dump do tail rate
with open(os.path.join(OUT_DIR, "tail_rate.json"), "w", encoding="utf-8") as f:
    json.dump({"out_of_vocab_rate": inv_rate}, f, ensure_ascii=False, indent=2)

# ---------- 5) Métricas Top-1 / Top-3 ----------
def top1_acc(df_, mask=None):
    m = mask if mask is not None else np.ones(len(df_), dtype=bool)
    if m.sum() == 0: return float("nan")
    return (df_.loc[m, "pred_top1_blend"] == df_.loc[m, "destination_departure"]).mean()

def top3_acc(df_, mask=None):
    if not has_topk: return float("nan")
    m = mask if mask is not None else np.ones(len(df_), dtype=bool)
    if m.sum() == 0: return float("nan")
    ok = []
    gt = df_.loc[m, "destination_departure"].values
    for i, row_labels in enumerate(topk_labels[m]):
        ok.append(gt[i] in set(row_labels[:3]))
    return float(np.mean(ok)) if ok else float("nan")

acc_overall  = top1_acc(df)
top3_overall = top3_acc(df)
print(f"[RESULT] Top-1(overall)= {acc_overall:.4f} | Top-3(overall)= {top3_overall:.4f}")

acc_in  = top1_acc(df, in_vocab_mask)
top3_in = top3_acc(df, in_vocab_mask)
print(f"[RESULT IN-VOCAB] Top-1= {acc_in:.4f} | Top-3= {top3_in:.4f}")

# ---------- 6) Calibração (ECE em 10 bins, Top-1) ----------
def ece_top1(conf, correct, n_bins=10):
    bins = np.linspace(0, 1, n_bins+1)
    idx  = np.digitize(conf, bins) - 1
    ece = 0.0; rows=[]
    for b in range(n_bins):
        m = idx == b
        if m.sum() == 0:
            rows.append((f"({bins[b]:.1f},{bins[b+1]:.1f}]", 0, np.nan, np.nan, np.nan))
            continue
        conf_m = conf[m].mean()
        acc_m  = correct[m].mean()
        rows.append((f"({bins[b]:.1f},{bins[b+1]:.1f}]", int(m.sum()), conf_m, acc_m, acc_m - conf_m))
        ece += (m.mean()) * abs(acc_m - conf_m)
    return ece, pd.DataFrame(rows, columns=["bin","n","conf_m","acc_m","gap"])

correct_overall = (df["pred_top1_blend"] == df["destination_departure"]).astype(float).values
ece, calib_df = ece_top1(df["conf_top1_blend"].values, correct_overall, n_bins=10)
calib_df.to_csv(os.path.join(OUT_DIR, "calibration_bins.csv"), index=False)
print(f"[CALIB] ECE(Top-1) = {ece:.4f} | bins salvos em {OUT_DIR}/calibration_bins.csv")

# ---------- 7) Threshold sweep (τ) ----------
def sweep_threshold(df_, taus=np.arange(0.05, 0.61, 0.05)):
    rows = []
    for t in taus:
        acc_m = float("nan")
        m = df_["conf_top1_blend"] >= t
        cov = m.mean()
        if cov > 0:
            acc_m = (df_.loc[m, "pred_top1_blend"] == df_.loc[m, "destination_departure"]).mean()
        rows.append({"tau": float(t), "coverage": float(cov), "acc_given_tau": float(acc_m if not math.isnan(acc_m) else np.nan)})
    return pd.DataFrame(rows)

thr_df = sweep_threshold(df)
thr_df.to_csv(os.path.join(OUT_DIR, "threshold_sweep_full_blend.csv"), index=False)
print(f"[OK] {OUT_DIR}/threshold_sweep_full_blend.csv salvo")

# ---------- 8) CSV final thresholded ----------
tau = TAU_DEFAULT
accepted = df["conf_top1_blend"] >= tau
out = df.copy()
out["accepted"] = accepted.astype(int)     # 1 = modelo confiante, 0 = abstain
# (opcional) marque também se o GT estava no vocabulário do modelo
out["in_vocab"] = in_vocab_mask.astype(int)

FINAL_CSV = f"predicoes_full_blend_thresholded_no_prior_t{tau:.2f}.csv".replace(",", ".")
out.to_csv(FINAL_CSV, index=False)
print(f"[OK] CSV final salvo: {FINAL_CSV}")

# ---------- 9) Métricas → JSON ----------
metrics = {
    "top1_overall": float(acc_overall),
    "top3_overall": float(top3_overall) if not math.isnan(top3_overall) else None,
    "top1_in_vocab": float(acc_in) if not math.isnan(acc_in) else None,
    "top3_in_vocab": float(top3_in) if not math.isnan(top3_in) else None,
    "ece_top1": float(ece),
    "threshold": float(tau),
    "coverage_at_tau": float(accepted.mean()),
}
with open(os.path.join(OUT_DIR, "metrics_full_blend.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, ensure_ascii=False, indent=2)
print(f"[OK] Métricas salvas em: {OUT_DIR}/metrics_full_blend.json")



[SPLIT] IN-VOCAB=0.000 | OUT-OF-VOCAB=1.000
[RESULT] Top-1(overall)= 0.3253 | Top-3(overall)= 0.5633
[RESULT IN-VOCAB] Top-1= nan | Top-3= nan
[CALIB] ECE(Top-1) = 0.0459 | bins salvos em eval_report/calibration_bins.csv
[OK] eval_report/threshold_sweep_full_blend.csv salvo
[OK] CSV final salvo: predicoes_full_blend_thresholded_no_prior_t0.35.csv
[OK] Métricas salvas em: eval_report/metrics_full_blend.json


In [39]:
import os, json, math, ast, numpy as np, pandas as pd

OUT_DIR = "eval_report"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Normalização mínima (mantenha seu df carregado) ---
df["destination_departure"] = df["destination_departure"].astype(str).str.strip().str.lower()
df["pred_top1_blend"]       = df["pred_top1_blend"].astype(str).str.strip().str.lower()

# --- VOCAB corrigido ---
le_dest = arts["le_dest"]
def _build_model_labels(arts):
    cls = np.array(list(arts.get("classes_model", [])))
    base = None
    if cls.size > 0:
        if np.issubdtype(cls.dtype, np.integer):
            try:
                base = le_dest.inverse_transform(cls)
            except Exception:
                base = None
        if base is None and cls.dtype.kind in ("U", "S", "O"):
            base = cls
    if base is None:
        base = le_dest.classes_
    return set(str(x).strip().lower() for x in base)

model_labels = _build_model_labels(arts)

# --- Confiança (detecção automática já usada antes) ---
conf_col = "conf_top1_blend" if "conf_top1_blend" in df.columns else "pred_top1_prob_blend"
conf = pd.to_numeric(df[conf_col], errors="coerce")
if conf.max() > 1.5:
    print(f"[INFO] Confiança em % detectada (max={conf.max():.2f}). Convertendo para [0,1].")
    conf = conf / 100.0
df["conf_top1_blend"] = conf.clip(0,1)

# --- Top-k (se existir) ---
def _safe_parse_list(x):
    if isinstance(x, (list, tuple, np.ndarray)): return list(x)
    if isinstance(x, str):
        try: return ast.literal_eval(x)
        except: return []
    return []
has_topk = ("pred_topk_labels_blend" in df.columns) and ("pred_topk_probs_blend" in df.columns)
if has_topk:
    topk_labels = df["pred_topk_labels_blend"].apply(_safe_parse_list).apply(lambda arr: [str(z).strip().lower() for z in arr])
else:
    topk_labels = None

# --- Split IN/OUT agora correto ---
in_vocab_mask = df["destination_departure"].isin(model_labels)
print(f"[SPLIT] IN-VOCAB={in_vocab_mask.mean():.3f} | OUT-OF-VOCAB={1 - in_vocab_mask.mean():.3f}")
with open(os.path.join(OUT_DIR, "tail_rate.json"), "w", encoding="utf-8") as f:
    json.dump({"out_of_vocab_rate": float(1 - in_vocab_mask.mean())}, f, ensure_ascii=False, indent=2)

# --- Métricas ---
def top1_acc(mask=None):
    m = np.ones(len(df), dtype=bool) if mask is None else mask
    if m.sum()==0: return float("nan")
    return (df.loc[m,"pred_top1_blend"]==df.loc[m,"destination_departure"]).mean()

def top3_acc(mask=None):
    if not has_topk: return float("nan")
    m = np.ones(len(df), dtype=bool) if mask is None else mask
    if m.sum()==0: return float("nan")
    ok=[]; gt=df.loc[m,"destination_departure"].values
    tkl = topk_labels[m]
    for i, labs in enumerate(tkl):
        ok.append(gt[i] in set(labs[:3]))
    return float(np.mean(ok)) if ok else float("nan")

acc_overall, top3_overall = top1_acc(), top3_acc()
acc_in, top3_in          = top1_acc(in_vocab_mask), top3_acc(in_vocab_mask)
print(f"[RESULT] Top-1(overall)= {acc_overall:.4f} | Top-3(overall)= {top3_overall:.4f}")
print(f"[RESULT IN-VOCAB] Top-1= {acc_in:.4f} | Top-3= {top3_in:.4f}")

# --- ECE simples ---
def ece_top1(conf, correct, n_bins=10):
    bins=np.linspace(0,1,n_bins+1); idx=np.digitize(conf,bins)-1; ece=0; rows=[]
    for b in range(n_bins):
        m = idx==b
        if m.sum()==0:
            rows.append((f"({bins[b]:.1f},{bins[b+1]:.1f}]",0,np.nan,np.nan,np.nan)); continue
        cm=conf[m].mean(); am=correct[m].mean()
        rows.append((f"({bins[b]:.1f},{bins[b+1]:.1f}]",int(m.sum()),cm,am,am-cm))
        ece += (m.mean())*abs(am-cm)
    return ece, pd.DataFrame(rows, columns=["bin","n","conf_m","acc_m","gap"])

correct = (df["pred_top1_blend"]==df["destination_departure"]).astype(float).values
ece, calib_df = ece_top1(df["conf_top1_blend"].values, correct, n_bins=10)
calib_df.to_csv(os.path.join(OUT_DIR,"calibration_bins.csv"), index=False)
print(f"[CALIB] ECE(Top-1) = {ece:.4f}")

# --- Threshold sweep e CSV final (mesmo τ que você usou) ---
def sweep(df_, taus=np.arange(0.05,0.61,0.05)):
    rows=[]
    for t in taus:
        m = df_["conf_top1_blend"]>=t
        cov = m.mean()
        acc = (df_.loc[m,"pred_top1_blend"]==df_.loc[m,"destination_departure"]).mean() if cov>0 else np.nan
        rows.append({"tau":float(t),"coverage":float(cov),"acc_given_tau":float(acc if not math.isnan(acc) else np.nan)})
    return pd.DataFrame(rows)

thr_df = sweep(df)
thr_df.to_csv(os.path.join(OUT_DIR,"threshold_sweep_full_blend.csv"), index=False)
print("[OK] threshold_sweep_full_blend.csv salvo")

TAU = 0.35
out = df.copy()
out["accepted"] = (out["conf_top1_blend"] >= TAU).astype(int)
out["in_vocab"] = in_vocab_mask.astype(int)
final_csv = f"predicoes_full_blend_thresholded_no_prior_t{TAU:.2f}.csv"
out.to_csv(final_csv, index=False)
print(f"[OK] CSV final salvo: {final_csv}")

with open(os.path.join(OUT_DIR,"metrics_full_blend.json"), "w", encoding="utf-8") as f:
    json.dump({
        "top1_overall": float(acc_overall),
        "top3_overall": float(top3_overall) if not math.isnan(top3_overall) else None,
        "top1_in_vocab": float(acc_in) if not math.isnan(acc_in) else None,
        "top3_in_vocab": float(top3_in) if not math.isnan(top3_in) else None,
        "ece_top1": float(ece),
        "threshold": float(TAU),
        "coverage_at_tau": float((out['accepted']==1).mean())
    }, f, ensure_ascii=False, indent=2)
print(f"[OK] Métricas salvas em {OUT_DIR}/metrics_full_blend.json")


[SPLIT] IN-VOCAB=1.000 | OUT-OF-VOCAB=0.000
[RESULT] Top-1(overall)= 0.3253 | Top-3(overall)= 0.5633
[RESULT IN-VOCAB] Top-1= 0.3253 | Top-3= 0.5633
[CALIB] ECE(Top-1) = 0.0459
[OK] threshold_sweep_full_blend.csv salvo
[OK] CSV final salvo: predicoes_full_blend_thresholded_no_prior_t0.35.csv
[OK] Métricas salvas em eval_report/metrics_full_blend.json


In [40]:
# ==== eval_temporal_segmentos.py ====
import os, ast, json
import numpy as np
import pandas as pd
from joblib import load

PRED_CSV = "predicoes_full_blend.csv"  # se quiser avaliar o thresholded, troque aqui
ARTS    = "artifacts_destino_model/artifacts.joblib"
OUTDIR  = "eval_report"

os.makedirs(OUTDIR, exist_ok=True)

# ---------- util ----------

def _parse_list(s):
    """Tenta converter para lista de strings (suporta JSON ou lista em string com aspas simples)."""
    if isinstance(s, (list, tuple, np.ndarray)):
        return [str(x) for x in s]
    if pd.isna(s):
        return []
    st = str(s).strip()
    try:
        return [str(x) for x in json.loads(st)]
    except Exception:
        try:
            return [str(x) for x in ast.literal_eval(st)]
        except Exception:
            # fallback: único rótulo como string
            return [st]

def top1_acc(y_true, y_pred):
    return float((y_true == y_pred).mean())

def topk_acc(y_true, y_topk_lists):
    ok = []
    for yt, lst in zip(y_true, y_topk_lists):
        lst = _parse_list(lst)
        ok.append(yt in lst)
    return float(np.mean(ok))

# ---------- carrega ----------

arts = load(ARTS)
le_dest = arts["le_dest"]
# demais encoders e modelo ficam disponíveis se precisar

dfp = pd.read_csv(PRED_CSV)

# Normaliza colunas esperadas
need_cols = {"purchase_datetime","destination_departure",
             "pred_top1_blend","pred_topk_labels_blend"}
missing = need_cols - set(dfp.columns)
if missing:
    raise ValueError(f"Faltam colunas em {PRED_CSV}: {missing}")

# Datas
dfp["purchase_datetime"] = pd.to_datetime(dfp["purchase_datetime"], errors="coerce")
dfp["ym"] = dfp["purchase_datetime"].dt.to_period("M").astype(str)
dfp["hour"] = dfp["purchase_datetime"].dt.hour

# Garante comparabilidade dos rótulos: todos como strings (mesmo “vocabulário” do treino)
# Caso o ground-truth esteja codificado numérico, decodifica com le_dest
y_true_raw = dfp["destination_departure"].astype(str)
# Heurística: se é tudo dígito e dentro do range do encoder, decodifica:
if y_true_raw.str.fullmatch(r"\d+").all():
    y_true_ids = y_true_raw.astype(int).to_numpy()
    try:
        y_true = pd.Series(le_dest.inverse_transform(y_true_ids))
    except Exception:
        y_true = y_true_raw.astype(str)
else:
    y_true = y_true_raw

y_pred1 = dfp["pred_top1_blend"].astype(str)
y_topk  = dfp["pred_topk_labels_blend"]

# ---------- métricas globais ----------
acc1 = top1_acc(y_true, y_pred1)
acc3 = topk_acc(y_true, y_topk)
print(f"[GLOBAL] Top-1={acc1:.4f} | Top-3={acc3:.4f}")

# ---------- por mês ----------
rows = []
for ym, g in dfp.groupby("ym"):
    yt = y_true.loc[g.index]
    yp = y_pred1.loc[g.index]
    yk = y_topk.loc[g.index]
    a1 = top1_acc(yt, yp)
    a3 = topk_acc(yt, yk)
    rows.append({"ym": ym, "n": len(g), "top1": a1, "top3": a3})

temporal_df = pd.DataFrame(rows).sort_values("ym")
temporal_df.to_csv(os.path.join(OUTDIR, "temporal_by_month.csv"), index=False)
print(f"[OK] temporal_by_month.csv salvo ({len(temporal_df)} linhas)")

# ---------- por origem (top N por volume) ----------
TOPN = 30
by_origin = (dfp
             .assign(_ok_top1=(y_true==y_pred1).astype(int))
             .groupby("origin_departure")
             .agg(n=("origin_departure","size"),
                  acc=("._ok_top1", "mean"))
             .rename(columns={"._ok_top1":"acc"})
             .sort_values("n", ascending=False)
             .head(TOPN)
             .reset_index())
by_origin.to_csv(os.path.join(OUTDIR, "by_origin_top30.csv"), index=False)
print(f"[OK] by_origin_top30.csv salvo")

# ---------- por hora ----------
by_hour = (dfp
           .assign(_ok_top1=(y_true==y_pred1).astype(int))
           .groupby("hour")
           .agg(n=("hour","size"),
                acc=("._ok_top1","mean"))
           .rename(columns={"._ok_top1":"acc"})
           .reset_index()
           .sort_values("hour"))
by_hour.to_csv(os.path.join(OUTDIR, "by_hour.csv"), index=False)
print(f"[OK] by_hour.csv salvo")

# Resumo de console
print("\n[MÊS] (últimos 6)")
print(temporal_df.tail(6))
print("\n[ORIGEM] Top 10")
print(by_origin.head(10))
print("\n[HORA]")
print(by_hour.head(10))


[GLOBAL] Top-1=0.3253 | Top-3=0.5633
[OK] temporal_by_month.csv salvo (128 linhas)


KeyError: "Column(s) ['._ok_top1'] do not exist"