In [None]:
# ============================================================
# RSNA Intracranial Aneurysm Detection — GBM 2-stage (rápido)
# LGBM + XGB + CatBoost + ExtraTrees | Sin internet | Kaggle API OK
# ============================================================

# ---- Imports
import os, json, random, warnings, gc
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier

import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier

import scipy.ndimage as ndi
import pydicom
import joblib

import kaggle_evaluation.rsna_inference_server

# ============================================================
# Config (FAST)
# ============================================================
INPUT_DIR  = "/kaggle/input/rsna-intracranial-aneurysm-detection"
SERIES_DIR = os.path.join(INPUT_DIR, "series")
TRAIN_CSV  = os.path.join(INPUT_DIR, "train.csv")

SEED = 42
random.seed(SEED); np.random.seed(SEED)

# ---- Ajustes de velocidad/calidad
N_FOLDS        = 2          # 2 = rápido; si cabe tiempo usa 3
READ_INTENS    = True       # estadísticos de intensidades submuestreadas
FAST_SLICES    = 24         # nº máx de cortes por serie (24/32/48)
STRIDE_INTENS  = 32         # stride sobre píxeles para intensidades

MAX_ROUNDS_LGB = 1200       # rondas base
MAX_ROUNDS_XGB = 1200
CAT_ITERS      = 6000

LGB_NUM_THREADS = 8
XGB_NTHREAD     = 8
CAT_THREAD_COUNT= 8
ET_N_JOBS = 4

FEAT_CACHE = "/kaggle/working/features_fast.parquet"  # cache de features

# extra config for meta stacking
META_FOLDS = 3

# Etiquetas
LABELS_13 = [
 'Left Infraclinoid Internal Carotid Artery',
 'Right Infraclinoid Internal Carotid Artery',
 'Left Supraclinoid Internal Carotid Artery',
 'Right Supraclinoid Internal Carotid Artery',
 'Left Middle Cerebral Artery',
 'Right Middle Cerebral Artery',
 'Anterior Communicating Artery',
 'Left Anterior Cerebral Artery',
 'Right Anterior Cerebral Artery',
 'Left Posterior Communicating Artery',
 'Right Posterior Communicating Artery',
 'Basilar Tip',
 'Other Posterior Circulation'
]
MAIN        = 'Aneurysm Present'
ALL_TARGETS = LABELS_13 + [MAIN]

# ============================================================
# Utils
# ============================================================
def robust_stats(arr: np.ndarray):
    if arr.size == 0:
        return dict(mean=0, std=0, p1=0, p5=0, p50=0, p95=0, p99=0)
    v = arr.astype(np.float32)
    v = v[np.isfinite(v)]
    if v.size == 0:
        return dict(mean=0, std=0, p1=0, p5=0, p50=0, p95=0, p99=0)
    p1, p99 = np.percentile(v, [1, 99])
    v = np.clip(v, p1, p99)
    return dict(
        mean=float(v.mean()),
        std=float(v.std() + 1e-6),
        p1=float(np.percentile(v, 1)),
        p5=float(np.percentile(v, 5)),
        p50=float(np.percentile(v, 50)),
        p95=float(np.percentile(v, 95)),
        p99=float(np.percentile(v, 99)),
    )

# windows and histogram settings
HU_WINDOWS = [(-100, 700), (-200, 200), (500, 1500)]
HIST_BINS = 16

def extract_series_features(series_path: str, max_slices: int = FAST_SLICES):
    """Lee metadatos y (opcional) intensidades submuestreadas para rapidez.
    Añade: HU window stats, histograms, local-variance texture (3x3) features.
    """
    try:
        files = [os.path.join(series_path,f) for f in os.listdir(series_path) if f.endswith(".dcm")]
    except Exception:
        files = []
    if not files:
        # keep same keys as before
        out = {
            'NumSlices':0,'Rows_med':0,'Cols_med':0,
            'PixelSpacingX_med':0.0,'PixelSpacingY_med':0.0,
            'SliceThickness_med':0.0,
            'Modality_tag':'UNK','is_CT_like':0,'is_MR_like':0
        }
        # add placeholders for intens stats
        for k in ['mean','std','p1','p5','p50','p95','p99']:
            out[f'Int_{k}'] = 0.0
        # hist bins
        for i in range(HIST_BINS): out[f'Hist_ct_bin_{i}'] = 0.0
        # texture placeholders
        for k in ['mean','std','p1','p5','p50','p95','p99']:
            out[f'Tex3_{k}'] = 0.0
        # HU window placeholders
        for wi in range(len(HU_WINDOWS)):
            for k in ['mean','std','p1','p5','p50','p95','p99']:
                out[f'HU_w{wi}_{k}'] = 0.0
        return out

    try:
        files = sorted(files, key=lambda p: getattr(pydicom.dcmread(p, stop_before_pixels=True, force=True), 'InstanceNumber', 0))
    except:
        pass

    n = len(files)
    chosen = files if n <= max_slices else [files[i] for i in np.linspace(0, n-1, max_slices).astype(int)]

    rows, cols, px, py, th = [], [], [], [], []
    modality_tag = None
    intens_list = []
    texture_vals = []
    step_slice = max(1, len(chosen)//max(1, min(len(chosen), 32)))

    for i, fp in enumerate(chosen):
        try:
            ds_meta = pydicom.dcmread(fp, stop_before_pixels=True, force=True)
            if modality_tag is None:
                modality_tag = getattr(ds_meta, 'Modality', None)
            if hasattr(ds_meta, 'Rows'): rows.append(int(ds_meta.Rows))
            if hasattr(ds_meta, 'Columns'): cols.append(int(ds_meta.Columns))
            if hasattr(ds_meta, 'PixelSpacing') and len(ds_meta.PixelSpacing)==2:
                px.append(float(ds_meta.PixelSpacing[0])); py.append(float(ds_meta.PixelSpacing[1]))
            if hasattr(ds_meta, 'SliceThickness'):
                th.append(float(ds_meta.SliceThickness))

            if READ_INTENS and i % step_slice == 0:
                ds = pydicom.dcmread(fp, force=True)
                if hasattr(ds, 'PixelData'):
                    arr = ds.pixel_array.astype(np.float32)
                    # apply standard CT window for intensity stats by default
                    if modality_tag is not None and str(modality_tag).upper().startswith("CT"):
                        slope = float(getattr(ds, 'RescaleSlope', 1.0))
                        intercept = float(getattr(ds, 'RescaleIntercept', 0.0))
                        arr = arr * slope + intercept
                    # collect intensities (sampled)
                    try:
                        intens_list.append(arr.ravel()[::STRIDE_INTENS])
                    except Exception:
                        intens_list.append(arr.ravel())

                    # local 3x3 variance texture (fast using uniform_filter)
                    try:
                        mean_local = ndi.uniform_filter(arr, size=3)
                        mean_sq_local = ndi.uniform_filter(arr*arr, size=3)
                        local_var = mean_sq_local - mean_local*mean_local
                        texture_vals.append(local_var.ravel()[::STRIDE_INTENS])
                    except Exception:
                        pass
        except Exception:
            continue

    feats = dict(
        NumSlices=n,
        Rows_med=int(np.median(rows)) if rows else 0,
        Cols_med=int(np.median(cols)) if cols else 0,
        PixelSpacingX_med=float(np.median(px)) if px else 0.0,
        PixelSpacingY_med=float(np.median(py)) if py else 0.0,
        SliceThickness_med=float(np.median(th)) if th else 0.0,
    )

    # combined intens array for histogram/windows/stats
    if READ_INTENS and intens_list:
        try:
            all_int = np.concatenate(intens_list)
        except Exception:
            all_int = intens_list[0] if intens_list else np.array([])
        st = robust_stats(all_int)
        for k, v in st.items(): feats[f'Int_{k}'] = v

        # HU windows stats
        for wi, (low, high) in enumerate(HU_WINDOWS):
            arr_win = np.clip(all_int, low, high)
            stw = robust_stats(arr_win)
            for k, v in stw.items(): feats[f'HU_w{wi}_{k}'] = v

        # histogram over CT window (first window assumed CT-like)
        try:
            low, high = HU_WINDOWS[0]
            hist_vals, _ = np.histogram(np.clip(all_int, low, high), bins=HIST_BINS, range=(low, high), density=True)
            for i, h in enumerate(hist_vals): feats[f'Hist_ct_bin_{i}'] = float(h)
        except Exception:
            for i in range(HIST_BINS): feats[f'Hist_ct_bin_{i}'] = 0.0

        # texture stats (concat across sampled slices)
        if texture_vals:
            try:
                all_tex = np.concatenate(texture_vals)
                stt = robust_stats(all_tex)
                for k, v in stt.items(): feats[f'Tex3_{k}'] = v
            except Exception:
                for k in ['mean','std','p1','p5','p50','p95','p99']:
                    feats[f'Tex3_{k}'] = 0.0
        else:
            for k in ['mean','std','p1','p5','p50','p95','p99']:
                feats[f'Tex3_{k}'] = 0.0

    else:
        # defaults if no intensity
        for k in ['mean','std','p1','p5','p50','p95','p99']:
            feats[f'Int_{k}'] = 0.0
        for wi in range(len(HU_WINDOWS)):
            for k in ['mean','std','p1','p5','p50','p95','p99']:
                feats[f'HU_w{wi}_{k}'] = 0.0
        for i in range(HIST_BINS): feats[f'Hist_ct_bin_{i}'] = 0.0
        for k in ['mean','std','p1','p5','p50','p95','p99']:
            feats[f'Tex3_{k}'] = 0.0

    tag = str(modality_tag) if modality_tag is not None else 'UNK'
    feats['Modality_tag'] = tag
    feats['is_CT_like'] = 1 if 'CT' in tag.upper() else 0
    feats['is_MR_like'] = 1 if 'MR' in tag.upper() else 0
    return feats

def mean_weighted_columnwise_auc(y_true_df, y_pred_df):
    y, p = y_true_df[MAIN].values, y_pred_df[MAIN].values
    main_auc = roc_auc_score(y, p) if len(np.unique(y))>1 else np.nan
    loc_aucs = []
    for c in LABELS_13:
        yy, pp = y_true_df[c].values, y_pred_df[c].values
        if len(np.unique(yy))>1: loc_aucs.append(roc_auc_score(yy, pp))
    loc_mean = float(np.mean(loc_aucs)) if len(loc_aucs) else np.nan
    return (main_auc + loc_mean)/2.0, main_auc, loc_mean

# ============================================================
# Carga y features (con caché)
# ============================================================
train = pd.read_csv(TRAIN_CSV)
train['PatientSex'] = train['PatientSex'].astype(str)
train['Modality']   = train['Modality'].astype(str)
for c in ALL_TARGETS: train[c] = train[c].astype(float)

series_ids = train['SeriesInstanceUID'].unique().tolist()

if os.path.exists(FEAT_CACHE):
    print("Leyendo features de cache:", FEAT_CACHE)
    feat_df = pd.read_parquet(FEAT_CACHE)
else:
    print(f"Construyendo features para {len(series_ids)} series...")
    rows = []
    for i, sid in enumerate(series_ids):
        feats = extract_series_features(os.path.join(SERIES_DIR, sid), max_slices=FAST_SLICES)
        feats['SeriesInstanceUID'] = sid
        rows.append(feats)
        if (i+1) % 100 == 0:
            print(f"[features] {i+1}/{len(series_ids)}")
    feat_df = pd.DataFrame(rows)
    feat_df.to_parquet(FEAT_CACHE)
    print("Cache de features guardada en", FEAT_CACHE)

df = train.merge(feat_df, on='SeriesInstanceUID', how='inner')

# Codifica mínimas (no se usan como features)
for col in ['PatientSex','Modality','Modality_tag']:
    le = LabelEncoder(); df[col] = le.fit_transform(df[col].astype(str))

# solo columnas numéricas válidas (fuera IDs/targets/categóricas)
exclude = set(['SeriesInstanceUID','PatientSex','Modality','Modality_tag'] + ALL_TARGETS)
feats = [c for c in df.columns if c not in exclude and np.issubdtype(df[c].dtype, np.number)]
print("Nº de features:", len(feats))

# ============================================================
# Folds y params
# ============================================================
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
df['fold'] = -1
for i, (_, va_idx) in enumerate(skf.split(df, df[MAIN].values)):
    df.loc[va_idx, 'fold'] = i
df['fold'] = df['fold'].astype(int)

oof_bin = np.zeros(len(df))
oof_loc = {lab: np.zeros(len(df)) for lab in LABELS_13}

# guardar modelos por fold
bin_models = {'lgb':[], 'xgb':[], 'cat':[], 'et':[]}
loc_models = {lab:{'lgb':[], 'xgb':[], 'cat':[]} for lab in LABELS_13}

pos_ratio = float((df[MAIN]==1).mean())
scale_pos_weight = (1 - pos_ratio) / max(pos_ratio, 1e-6)

lgb_params = dict(
    objective='binary', metric='auc', learning_rate=0.03,
    num_leaves=63, feature_fraction=0.8, bagging_fraction=0.8, bagging_freq=5,
    lambda_l2=2.0, verbose=-1, seed=SEED, scale_pos_weight=scale_pos_weight,
    num_threads=LGB_NUM_THREADS,
)
xgb_params_bin = dict(
    objective='binary:logistic', eval_metric='auc', tree_method='hist',
    learning_rate=0.03, max_depth=6, min_child_weight=8, subsample=0.8, colsample_bytree=0.8,
    reg_lambda=2.0, seed=SEED, scale_pos_weight=float(scale_pos_weight),
    nthread=XGB_NTHREAD,
)

# ============================================================
# Entrenamiento
# ============================================================
for fold in range(N_FOLDS):
    print(f"\n========== Fold {fold} ==========")
    tr = df[df['fold']!=fold]; va = df[df['fold']==fold]
    X_tr, y_tr = tr[feats], tr[MAIN].values
    X_va, y_va = va[feats], va[MAIN].values

    # LGBM binario
    dtr = lgb.Dataset(X_tr, label=y_tr); dva = lgb.Dataset(X_va, label=y_va)
    lgb_bin = lgb.train(
        params=lgb_params, train_set=dtr, num_boost_round=MAX_ROUNDS_LGB,
        valid_sets=[dtr, dva], valid_names=['tr','va'],
        callbacks=[lgb.early_stopping(200, verbose=False), lgb.log_evaluation(200)]
    )
    p_lgb = lgb_bin.predict(X_va, num_iteration=lgb_bin.best_iteration or lgb_bin.current_iteration())

    # XGB binario
    xg_tr, xg_va = xgb.DMatrix(X_tr, label=y_tr), xgb.DMatrix(X_va, label=y_va)
    xgb_bin = xgb.train(
        params=xgb_params_bin, dtrain=xg_tr, num_boost_round=MAX_ROUNDS_XGB,
        evals=[(xg_tr,'tr'),(xg_va,'va')], early_stopping_rounds=200, verbose_eval=False
    )
    p_xgb = xgb_bin.predict(xg_va)

    # CatBoost binario
    cat_bin = CatBoostClassifier(
        loss_function='Logloss', eval_metric='AUC', depth=6, learning_rate=0.03, l2_leaf_reg=6,
        iterations=CAT_ITERS, od_type='Iter', od_wait=200, random_seed=SEED,
        class_weights=[1.0, float(scale_pos_weight)], thread_count=CAT_THREAD_COUNT, verbose=False
    )
    cat_bin.fit(X_tr, y_tr, eval_set=(X_va, y_va), verbose=False)
    p_cat = cat_bin.predict_proba(X_va)[:,1]

    # ExtraTrees (fast tabular model)
    et_clf = ExtraTreesClassifier(n_estimators=100, n_jobs=ET_N_JOBS, random_state=SEED)
    et_clf.fit(X_tr, y_tr)
    p_et = et_clf.predict_proba(X_va)[:,1]

    # ensemble binario (keep per-model OOF for stacking)
    p_bin_lgb = p_lgb
    p_bin_xgb = p_xgb
    p_bin_cat = p_cat
    p_bin_et = p_et
    p_bin = (p_bin_lgb + p_bin_xgb + p_bin_cat + p_bin_et)/4.0
    oof_bin[va.index] = p_bin

    # store models and oof per-model preds for stacking
    bin_models['lgb'].append(lgb_bin)
    bin_models['xgb'].append(xgb_bin)
    bin_models['cat'].append(cat_bin)
    bin_models['et'].append(et_clf)
    # collect OOF columns for stacking
    if fold == 0:
        oof_stack_lgb = np.zeros(len(df))
        oof_stack_xgb = np.zeros(len(df))
        oof_stack_cat = np.zeros(len(df))
        oof_stack_et = np.zeros(len(df))
    oof_stack_lgb[va.index] = p_bin_lgb
    oof_stack_xgb[va.index] = p_bin_xgb
    oof_stack_cat[va.index] = p_bin_cat
    oof_stack_et[va.index] = p_bin_et

    # Etapa 2: OvR por etiqueta condicionada a positivos
    tr_pos = tr[tr[MAIN]==1]
    if len(tr_pos)==0: 
        print("Sin positivos en fold; salto localización.")
        continue
    X_tr_pos = tr_pos[feats]

    for lab in LABELS_13:
        y_tr_loc = tr_pos[lab].values

        # LGB
        dtr_pos = lgb.Dataset(X_tr_pos, label=y_tr_loc)
        lgb_loc = lgb.train(
            params=dict(lgb_params, objective='binary'),
            train_set=dtr_pos, num_boost_round=max(600, MAX_ROUNDS_LGB//2),
            valid_sets=[dtr_pos], valid_names=['tr'],
            callbacks=[lgb.early_stopping(120, verbose=False), lgb.log_evaluation(200)]
        )
        pl_lgb = lgb_loc.predict(X_va, num_iteration=lgb_loc.best_iteration or lgb_loc.current_iteration())

        # XGB
        xg_pos = xgb.DMatrix(X_tr_pos, label=y_tr_loc)
        xgb_loc = xgb.train(
            params=xgb_params_bin, dtrain=xg_pos, num_boost_round=max(600, MAX_ROUNDS_XGB//2),
            evals=[(xg_pos,'tr')], early_stopping_rounds=120, verbose_eval=False
        )
        pl_xgb = xgb_loc.predict(xgb.DMatrix(X_va))

        # CatBoost
        cat_loc = CatBoostClassifier(
            loss_function='Logloss', eval_metric='AUC', depth=6, learning_rate=0.03, l2_leaf_reg=6,
            iterations=max(3000, CAT_ITERS//3), od_type='Iter', od_wait=150,
            random_seed=SEED, thread_count=CAT_THREAD_COUNT, verbose=False
        )
        cat_loc.fit(X_tr_pos, y_tr_loc, verbose=False)
        pl_cat = cat_loc.predict_proba(X_va)[:,1]

        # P(loc) = P(aneu) * P(loc | aneu)
        p_loc = p_bin * ((pl_lgb + pl_xgb + pl_cat)/3.0)
        oof_loc[lab][va.index] = p_loc

        loc_models[lab]['lgb'].append(lgb_loc)
        loc_models[lab]['xgb'].append(xgb_loc)
        loc_models[lab]['cat'].append(cat_loc)

    gc.collect()

# ============================================================
# Métrica OOF
# ============================================================
oof_df = pd.DataFrame({MAIN:oof_bin}, index=df.index)
for lab in LABELS_13: oof_df[lab] = oof_loc[lab]
true_df = df[ALL_TARGETS].copy()
mw_auc, auc_main, auc_13mean = mean_weighted_columnwise_auc(true_df, oof_df)

print(f"\nOOF AUC (Aneurysm Present): {auc_main:.5f}")
print(f"OOF AUC (mean 13 locs)    : {auc_13mean:.5f}")
print(f"OOF FINAL (MWC-AUC)       : {mw_auc:.5f}")

# ============================================================
# Stacking meta-model: KFold stacking + LightGBM shallow meta
# ============================================================
try:
    stack_X = np.vstack([oof_stack_lgb, oof_stack_xgb, oof_stack_cat, oof_stack_et]).T
    stack_y = df[MAIN].values
    # decide meta folds
    meta_folds = META_FOLDS if META_FOLDS <= len(df) else 2
    meta_folds = min(meta_folds, N_FOLDS if N_FOLDS>0 else 2)
    skf_meta = StratifiedKFold(n_splits=meta_folds, shuffle=True, random_state=SEED)
    meta_oof = np.zeros(len(df))
    meta_models = []
    lgb_meta_params = dict(
        objective='binary', metric='auc', learning_rate=0.05,
        num_leaves=8, max_depth=3, verbose=-1, seed=SEED, num_threads=1
    )
    for mi, (mt_idx, mv_idx) in enumerate(skf_meta.split(stack_X, stack_y)):
        Xm_tr, ym_tr = stack_X[mt_idx], stack_y[mt_idx]
        Xm_va, ym_va = stack_X[mv_idx], stack_y[mv_idx]
        dtr_m = lgb.Dataset(Xm_tr, label=ym_tr); dva_m = lgb.Dataset(Xm_va, label=ym_va)
        meta_m = lgb.train(
            params=lgb_meta_params, train_set=dtr_m, num_boost_round=500,
            valid_sets=[dtr_m, dva_m], valid_names=['tr','va'],
            callbacks=[lgb.early_stopping(50, verbose=False), lgb.log_evaluation(50)]
        )
        pred_m = meta_m.predict(Xm_va, num_iteration=meta_m.best_iteration or meta_m.current_iteration())
        meta_oof[mv_idx] = pred_m
        meta_models.append(meta_m)

    # diagnostic
    meta_auc = roc_auc_score(stack_y, meta_oof)
    simple_auc = roc_auc_score(stack_y, oof_bin)
    print(f"Meta OOF AUC: {meta_auc:.5f}; Simple average OOF AUC: {simple_auc:.5f}")

    # train final meta on full data (shallow LGB) and save
    dfull = lgb.Dataset(stack_X, label=stack_y)
    final_meta = lgb.train(params=lgb_meta_params, train_set=dfull, num_boost_round=int(np.mean([m.best_iteration or 100 for m in meta_models]) if meta_models else 100))
    os.makedirs("/kaggle/working/models_meta", exist_ok=True)
    final_meta.save_model("/kaggle/working/models_meta/meta_lgb_final.txt")
    # also keep a sklearn fallback (logreg) trained on full stack
    logreg_fb = LogisticRegression(C=1.0, solver='lbfgs', max_iter=400)
    logreg_fb.fit(stack_X, stack_y)
    joblib.dump(logreg_fb, "/kaggle/working/models_meta/logreg_meta.pkl")
except Exception as e:
    print("Meta stacking failed:", e)

# ============================================================
# Guardado modelos + metadatos para inferencia
# ============================================================
os.makedirs("/kaggle/working/models_bin", exist_ok=True)
os.makedirs("/kaggle/working/models_loc", exist_ok=True)

for i in range(len(bin_models['lgb'])):
    joblib.dump(bin_models['lgb'][i], f"/kaggle/working/models_bin/lgb_{i}.pkl")
    bin_models['xgb'][i].save_model(f"/kaggle/working/models_bin/xgb_{i}.json")
    bin_models['cat'][i].save_model(f"/kaggle/working/models_bin/cat_{i}.cbm")
    joblib.dump(bin_models['et'][i], f"/kaggle/working/models_bin/et_{i}.pkl")

for lab in LABELS_13:
    for i in range(len(loc_models[lab]['lgb'])):
        joblib.dump(loc_models[lab]['lgb'][i], f"/kaggle/working/models_loc/{lab}_lgb_{i}.pkl")
        loc_models[lab]['xgb'][i].save_model(f"/kaggle/working/models_loc/{lab}_xgb_{i}.json")
        loc_models[lab]['cat'][i].save_model(f"/kaggle/working/models_loc/{lab}_cat_{i}.cbm")

with open("/kaggle/working/feats_meta.json","w") as f:
    json.dump({'feats':feats,'labels_13':LABELS_13,'main':MAIN}, f)

print("Modelos y metadatos guardados.")

# ============================================================
# Carga liviana para inferencia
# ============================================================
def _load_models_for_infer():
    bin_loaded = {'lgb':[], 'xgb':[], 'cat':[], 'et':[]}
    i = 0
    while True:
        p1 = f"/kaggle/working/models_bin/lgb_{i}.pkl"
        p2 = f"/kaggle/working/models_bin/xgb_{i}.json"
        p3 = f"/kaggle/working/models_bin/cat_{i}.cbm"
        p4 = f"/kaggle/working/models_bin/et_{i}.pkl"
        if not os.path.exists(p1): break
        bin_loaded['lgb'].append(joblib.load(p1))
        bx = xgb.Booster(); bx.load_model(p2); bin_loaded['xgb'].append(bx)
        bc = CatBoostClassifier(); bc.load_model(p3); bin_loaded['cat'].append(bc)
        if os.path.exists(p4):
            bin_loaded['et'].append(joblib.load(p4))
        else:
            bin_loaded['et'].append(None)
        i += 1

    loc_loaded = {lab:{'lgb':[], 'xgb':[], 'cat':[]} for lab in LABELS_13}
    for lab in LABELS_13:
        i = 0
        while True:
            p1 = f"/kaggle/working/models_loc/{lab}_lgb_{i}.pkl"
            p2 = f"/kaggle/working/models_loc/{lab}_xgb_{i}.json"
            p3 = f"/kaggle/working/models_loc/{lab}_cat_{i}.cbm"
            if not os.path.exists(p1): break
            loc_loaded[lab]['lgb'].append(joblib.load(p1))
            bx = xgb.Booster(); bx.load_model(p2); loc_loaded[lab]['xgb'].append(bx)
            bc = CatBoostClassifier(); bc.load_model(p3); loc_loaded[lab]['cat'].append(bc)
            i += 1

    # try load meta-model (LightGBM final) if present, else sklearn fallback
    meta_model = None
    meta_path_lgb = "/kaggle/working/models_meta/meta_lgb_final.txt"
    meta_path_sk = "/kaggle/working/models_meta/logreg_meta.pkl"
    if os.path.exists(meta_path_lgb):
        try:
            meta_model = lgb.Booster(model_file=meta_path_lgb)
        except Exception:
            meta_model = None
    elif os.path.exists(meta_path_sk):
        try:
            meta_model = joblib.load(meta_path_sk)
        except Exception:
            meta_model = None
    return bin_loaded, loc_loaded, meta_model

BIN_MDL, LOC_MDL, META_META = _load_models_for_infer()

with open("/kaggle/working/feats_meta.json") as f:
    META = json.load(f)
FEATS_LIST = META['feats']

def predict_one_from_features(fnum: dict):
    X = pd.DataFrame([fnum])[FEATS_LIST].astype(float)

    # per-fold blended preds (as before)
    pb_folds = []
    # also keep per-model average across folds for stacking meta
    pl_g = []
    px_g = []
    pc_g = []
    pe_g = []
    for i in range(len(BIN_MDL['lgb'])):
        pb_l = BIN_MDL['lgb'][i].predict(X)
        pb_x = BIN_MDL['xgb'][i].predict(xgb.DMatrix(X))
        pb_c = BIN_MDL['cat'][i].predict_proba(X)[:,1]
        pb_e = BIN_MDL['et'][i].predict_proba(X)[:,1] if (i < len(BIN_MDL['et']) and BIN_MDL['et'][i] is not None) else np.array([0.5])
        pb_folds.append((pb_l + pb_x + pb_c + pb_e)/4.0)
        pl_g.append(pb_l)
        px_g.append(pb_x)
        pc_g.append(pb_c)
        pe_g.append(pb_e)
    P_aneu_foldblend = float(np.mean(pb_folds)) if pb_folds else 0.5

    # per-model average across folds for stacking input
    pred_lgb_avg = float(np.mean(pl_g)) if pl_g else 0.5
    pred_xgb_avg = float(np.mean(px_g)) if px_g else 0.5
    pred_cat_avg = float(np.mean(pc_g)) if pc_g else 0.5
    pred_et_avg = float(np.mean(pe_g)) if pe_g else 0.5

    # meta prediction if available
    P_meta = None
    try:
        if META_META is not None:
            xin = np.array([[pred_lgb_avg, pred_xgb_avg, pred_cat_avg, pred_et_avg]])
            # lgb.Booster has predict, sklearn has predict_proba
            if isinstance(META_META, lgb.basic.Booster):
                P_meta = float(META_META.predict(xin)[0])
            else:
                P_meta = float(META_META.predict_proba(xin)[:,1])
    except Exception:
        P_meta = None

    # final aneurysm probability: use meta if available, else fallback to fold-blend
    if P_meta is not None:
        P_aneu = P_meta
    else:
        P_aneu = P_aneu_foldblend

    loc_probs = {}
    for lab in LABELS_13:
        pl = []
        for i in range(len(LOC_MDL[lab]['lgb'])):
            pl_l = LOC_MDL[lab]['lgb'][i].predict(X)
            pl_x = LOC_MDL[lab]['xgb'][i].predict(xgb.DMatrix(X))
            pl_c = LOC_MDL[lab]['cat'][i].predict_proba(X)[:,1]
            pl.append((pl_l + pl_x + pl_c)/3.0)
        P_loc_cond = float(np.mean(pl)) if pl else 0.1
        loc_probs[lab] = float(np.clip(P_aneu * P_loc_cond, 0, 1))

    out = {MAIN: P_aneu}; out.update(loc_probs)
    return out

def predict(series_path: str) -> pd.DataFrame:
    feats_dict = extract_series_features(series_path, max_slices=FAST_SLICES)
    clean = {}
    for c in FEATS_LIST:
        val = feats_dict.get(c, 0.0)
        try: clean[c] = float(val)
        except: clean[c] = 0.0
    probs = predict_one_from_features(clean)
    return pd.DataFrame([probs])[ALL_TARGETS]

# ============================================================
# SERVIDOR DE EVALUACIÓN (OBLIGATORIO EN SUBMIT)
# ============================================================
inference_server = kaggle_evaluation.rsna_inference_server.RSNAInferenceServer(predict)

is_submit = os.environ.get("KAGGLE_IS_COMPETITION_RERUN", "0") == "1"

if is_submit:
    print("Kaggle submit run -> starting inference server")
    inference_server.serve()   # <- se queda a la espera mientras evalúan el test
else:
    print("Commit/interactive run -> creando preview de submission.parquet")
    inference_server.run_local_gateway()  # escribe /kaggle/working/submission.parquet
    try:
        display(pd.read_parquet('/kaggle/working/submission.parquet').head())
    except Exception as e:
        print("Preview no disponible:", e)

# CSV placeholder para que Kaggle deje pulsar Submit si estás en sesión interactiva
pd.DataFrame({"ok":[1]}).to_csv("/kaggle/working/submission.csv", index=False)
print("submission.csv (placeholder) escrito.")