# EXP-015: NN фабрика + Hill Climbing (LB 0.8527, рекорд!)

**Базируется на EXP-014** (LB 0.8522)

## Идея: "фабрика NN"
Обучаем несколько NN с разной архитектурой/скалером → diversity → сильный бленд.

## Результаты:
| Версия | Архитектура | Scaler | Dropout | OOF |
|--------|------------|--------|---------|-----|
| v3 | 512→256→128 | StandardScaler | 0.30 | 0.8415 |
| v4 | 512→256→128 | RankGauss | 0.30 + InputDrop 0.10 | 0.8426 |
| v5 SWA | v4 + SWA | RankGauss | 0.30 | 0.8421 (ПРОВАЛ) |
| **v6** | **1024→512→256** | **RankGauss** | **0.40** + InputDrop 0.10 | **0.8440** |

## Финальный бленд:
- Hill Climbing per-target (XGB + v3 + v4 + v6) → **OOF 0.8493** (+0.0012)
- Средние веса: xgb=0.47, v3=0.20, v4=0.08, v6=0.25
- **LB: 0.8527** (рекорд, +0.0005)

In [None]:
# ============================================================
# CELL 1: Setup + Load EXP-014 artifacts
# ============================================================
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from joblib import Parallel, delayed
import gc, time, os, json
from datetime import datetime

from google.colab import drive
drive.mount('/content/drive')

log_msg = lambda msg: print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")

# --- Пути (Drive папка = data_fusion, НЕ data_fusion_2026!) ---
DATA = '/content/drive/MyDrive/data_fusion'
ART_L1 = f'{DATA}/artifacts/l1_oof'
ART_L2 = f'{DATA}/artifacts/l2_stacking'

RANDOM_SEED = 42
N_FOLDS_L2 = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# --- Таргеты (БЕЗ sorted! sorted ломает порядок столбцов → AUC=0.50) ---
target = pd.read_parquet(f'{DATA}/train_target.parquet')
target_cols = [c for c in target.columns if c.startswith('target_')]
y_train_arr = target[target_cols].values.astype(np.int8)
train_ids = target['customer_id'].values
del target; gc.collect()
print(f"Targets: {y_train_arr.shape}, cols: {len(target_cols)}")
print(f"Порядок: {target_cols[:3]} ... {target_cols[-2:]}")

# --- L1 OOF ---
oof_xgb = np.load(f'{ART_L1}/oof_xgb.npy')
oof_cb  = np.load(f'{ART_L1}/oof_cb.npy')
oof_lgb = np.load(f'{ART_L1}/oof_lgb.npy')
test_xgb = np.load(f'{ART_L1}/test_xgb.npy')
test_cb  = np.load(f'{ART_L1}/test_cb.npy')
test_lgb = np.load(f'{ART_L1}/test_lgb.npy')
print(f"L1 OOF: XGB {oof_xgb.shape}, CB {oof_cb.shape}, LGB {oof_lgb.shape}")

# --- L2 матрица (123 OOF + 82 meta = 205 features) ---
X_l2_train = np.hstack([oof_xgb, oof_cb, oof_lgb])
X_l2_test  = np.hstack([test_xgb, test_cb, test_lgb])
oof_stack = np.stack([oof_xgb, oof_cb, oof_lgb], axis=0)
test_stack = np.stack([test_xgb, test_cb, test_lgb], axis=0)
X_l2_train = np.hstack([X_l2_train, oof_stack.mean(0), oof_stack.std(0)])
X_l2_test  = np.hstack([X_l2_test, test_stack.mean(0), test_stack.std(0)])
del oof_stack, test_stack; gc.collect()
print(f"L2 matrix: train {X_l2_train.shape}, test {X_l2_test.shape}")

# --- L2 XGB OOF (якорь) ---
oof_l2_xgb = np.load(f'{ART_L2}/oof_l2_xgb.npy')
test_l2_xgb = np.load(f'{ART_L2}/test_l2_xgb.npy')
xgb_macro = np.mean([roc_auc_score(y_train_arr[:, i], oof_l2_xgb[:, i]) for i in range(41)])
print(f"L2 XGB OOF Macro AUC: {xgb_macro:.4f}")

# --- L2 NN v3 OOF (файл oof_l2_nn_v3.npy, НЕ oof_l2_nn.npy!) ---
oof_l2_nn_v3 = np.load(f'{ART_L2}/oof_l2_nn_v3.npy')
test_l2_nn_v3 = np.load(f'{ART_L2}/test_l2_nn_v3.npy')
nn_v3_macro = np.mean([roc_auc_score(y_train_arr[:, i], oof_l2_nn_v3[:, i]) for i in range(41)])
print(f"L2 NN v3 OOF Macro AUC: {nn_v3_macro:.4f}")

# --- Blend baseline ---
blend = 0.6 * oof_l2_xgb + 0.4 * oof_l2_nn_v3
blend_macro = np.mean([roc_auc_score(y_train_arr[:, i], blend[:, i]) for i in range(41)])
print(f"\n{'='*60}")
print(f"BASELINE: XGB={xgb_macro:.4f}, NN_v3={nn_v3_macro:.4f}, Blend 60/40={blend_macro:.4f}")
print(f"LB 0.8522")
print(f"{'='*60}")

In [None]:
# ============================================================
# CELL 2: RankGauss + NN architectures
# ============================================================

# --- RankGauss ---
log_msg("Fitting QuantileTransformer (RankGauss)...")
qt = QuantileTransformer(n_quantiles=1000, output_distribution='normal', random_state=RANDOM_SEED)
X_train_rg = qt.fit_transform(X_l2_train).astype(np.float32)
X_test_rg = qt.transform(X_l2_test).astype(np.float32)
print(f"RankGauss range: [{X_train_rg.min():.2f}, {X_train_rg.max():.2f}]")

# --- NN v4: 512→256→128 + RankGauss + Input Dropout ---
class L2NetV4(nn.Module):
    def __init__(self, in_dim=205, h1=512, h2=256, h3=128, n_targets=41,
                 drop_input=0.10, drop1=0.3, drop2=0.25, drop3=0.2):
        super().__init__()
        self.input_drop = nn.Dropout(drop_input)
        self.input_norm = nn.LayerNorm(in_dim)
        self.fc1 = nn.Linear(in_dim, h1)
        self.ln1 = nn.LayerNorm(h1)
        self.fc2 = nn.Linear(h1, h2)
        self.ln2 = nn.LayerNorm(h2)
        self.skip_proj = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, h3)
        self.ln3 = nn.LayerNorm(h3)
        self.classifier = nn.Linear(h3, n_targets)
        self.drop1 = nn.Dropout(drop1)
        self.drop2 = nn.Dropout(drop2)
        self.drop3 = nn.Dropout(drop3)

    def forward(self, x):
        x = self.input_drop(x)
        x = self.input_norm(x)
        h1 = self.drop1(F.silu(self.ln1(self.fc1(x))))
        h2 = self.ln2(self.fc2(h1))
        h2 = self.drop2(F.silu(h2 + self.skip_proj(h1) * 0.5))
        h3 = self.drop3(F.silu(self.ln3(self.fc3(h2))))
        return self.classifier(h3)

# --- NN v6: 1024→512→256 + RankGauss + Higher Dropout ---
class L2NetV6(nn.Module):
    def __init__(self, in_dim=205, h1=1024, h2=512, h3=256, n_targets=41,
                 drop_input=0.10, drop1=0.40, drop2=0.35, drop3=0.30):
        super().__init__()
        self.input_drop = nn.Dropout(drop_input)
        self.input_norm = nn.LayerNorm(in_dim)
        self.fc1 = nn.Linear(in_dim, h1)
        self.ln1 = nn.LayerNorm(h1)
        self.fc2 = nn.Linear(h1, h2)
        self.ln2 = nn.LayerNorm(h2)
        self.skip_proj = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, h3)
        self.ln3 = nn.LayerNorm(h3)
        self.classifier = nn.Linear(h3, n_targets)
        self.drop1 = nn.Dropout(drop1)
        self.drop2 = nn.Dropout(drop2)
        self.drop3 = nn.Dropout(drop3)

    def forward(self, x):
        x = self.input_drop(x)
        x = self.input_norm(x)
        h1 = self.drop1(F.silu(self.ln1(self.fc1(x))))
        h2 = self.ln2(self.fc2(h1))
        h2 = self.drop2(F.silu(h2 + self.skip_proj(h1) * 0.5))
        h3 = self.drop3(F.silu(self.ln3(self.fc3(h2))))
        return self.classifier(h3)

print(f"L2NetV4 params: {sum(p.numel() for p in L2NetV4().parameters()):,}")
print(f"L2NetV6 params: {sum(p.numel() for p in L2NetV6().parameters()):,}")

In [None]:
# ============================================================
# CELL 3: Train NN helper function
# ============================================================

def train_nn_l2(model_class, X_train_scaled, X_test_scaled, y_train_arr, target_cols,
                n_folds=5, n_epochs=60, batch=512, lr=0.001, wd=1e-5, patience=15,
                seed=42, label="NN"):
    """Обучает NN L2, возвращает (oof, test, fold_aucs)."""
    oof = np.zeros((len(X_train_scaled), 41), dtype=np.float32)
    test_preds = np.zeros((len(X_test_scaled), 41), dtype=np.float32)
    X_te = torch.FloatTensor(X_test_scaled).to(device)
    y_all = torch.FloatTensor(y_train_arr.astype(np.float32))
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
    fold_aucs = []
    t_total = time.time()

    print(f"\n{'='*60}")
    print(f"{label}: {n_folds}-fold, {n_epochs} ep, patience={patience}, batch={batch}, lr={lr}")
    print(f"{'='*60}")

    for fold, (tr_idx, val_idx) in enumerate(skf.split(X_train_scaled, y_train_arr[:, 0])):
        t0 = time.time()
        X_tr = torch.FloatTensor(X_train_scaled[tr_idx]).to(device)
        y_tr = y_all[tr_idx].to(device)
        X_val = torch.FloatTensor(X_train_scaled[val_idx]).to(device)
        train_dl = DataLoader(TensorDataset(X_tr, y_tr), batch_size=batch, shuffle=True)

        model = model_class().to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer, max_lr=lr, epochs=n_epochs,
            steps_per_epoch=len(train_dl), pct_start=0.3)
        criterion = nn.BCEWithLogitsLoss()
        best_auc = 0; best_state = None; no_improve = 0

        for epoch in range(n_epochs):
            model.train()
            for xb, yb in train_dl:
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

            model.eval()
            with torch.no_grad():
                val_probs = torch.sigmoid(model(X_val)).cpu().numpy()
            aucs = [roc_auc_score(y_train_arr[val_idx, j], val_probs[:, j]) for j in range(41)]
            macro = np.mean(aucs)
            if macro > best_auc:
                best_auc = macro
                best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
                no_improve = 0
            else:
                no_improve += 1

            if (epoch + 1) % 10 == 0 or no_improve >= patience:
                log_msg(f"  Fold {fold} ep {epoch+1}: AUC={macro:.4f} (best={best_auc:.4f}, no_imp={no_improve})")
            if no_improve >= patience:
                break

        model.load_state_dict(best_state)
        model.eval()
        with torch.no_grad():
            oof[val_idx] = torch.sigmoid(model(X_val)).cpu().numpy()
            test_preds += torch.sigmoid(model(X_te)).cpu().numpy() / n_folds

        fold_aucs.append(best_auc)
        log_msg(f"Fold {fold}: AUC={best_auc:.4f}, time={time.time()-t0:.0f}s")
        del X_tr, y_tr, X_val, model, optimizer, scheduler, best_state
        torch.cuda.empty_cache(); gc.collect()

    macro = np.mean([roc_auc_score(y_train_arr[:, i], oof[:, i]) for i in range(41)])
    print(f"\n{label} OOF Macro AUC: {macro:.4f}")
    print(f"Per-fold: {[f'{a:.4f}' for a in fold_aucs]}")
    print(f"Total: {(time.time()-t_total)/60:.1f} min")
    return oof, test_preds, fold_aucs

print("train_nn_l2() ready")

In [None]:
# ============================================================
# CELL 4: Train NN v4 (SKIP if artifact exists)
# ============================================================

if os.path.exists(f'{ART_L2}/oof_l2_nn_v4.npy'):
    print(">>> NN v4: SKIP (артефакты найдены) <<<")
    oof_l2_nn_v4 = np.load(f'{ART_L2}/oof_l2_nn_v4.npy')
    test_l2_nn_v4 = np.load(f'{ART_L2}/test_l2_nn_v4.npy')
    v4_macro = np.mean([roc_auc_score(y_train_arr[:, i], oof_l2_nn_v4[:, i]) for i in range(41)])
    print(f"  OOF Macro AUC: {v4_macro:.4f}")
else:
    oof_l2_nn_v4, test_l2_nn_v4, _ = train_nn_l2(
        L2NetV4, X_train_rg, X_test_rg, y_train_arr, target_cols, label="NN v4")
    np.save(f'{ART_L2}/oof_l2_nn_v4.npy', oof_l2_nn_v4)
    np.save(f'{ART_L2}/test_l2_nn_v4.npy', test_l2_nn_v4)
    print(f"Saved to {ART_L2}/")

In [None]:
# ============================================================
# CELL 5: Train NN v6 Wider (SKIP if artifact exists)
# ============================================================

if os.path.exists(f'{ART_L2}/oof_l2_nn_v6.npy'):
    print(">>> NN v6: SKIP (артефакты найдены) <<<")
    oof_l2_nn_v6 = np.load(f'{ART_L2}/oof_l2_nn_v6.npy')
    test_l2_nn_v6 = np.load(f'{ART_L2}/test_l2_nn_v6.npy')
    v6_macro = np.mean([roc_auc_score(y_train_arr[:, i], oof_l2_nn_v6[:, i]) for i in range(41)])
    print(f"  OOF Macro AUC: {v6_macro:.4f}")
else:
    oof_l2_nn_v6, test_l2_nn_v6, _ = train_nn_l2(
        L2NetV6, X_train_rg, X_test_rg, y_train_arr, target_cols, label="NN v6 Wider")
    np.save(f'{ART_L2}/oof_l2_nn_v6.npy', oof_l2_nn_v6)
    np.save(f'{ART_L2}/test_l2_nn_v6.npy', test_l2_nn_v6)
    print(f"Saved to {ART_L2}/")

In [None]:
# ============================================================
# CELL 6: N-way blend + Hill Climbing + Submission
# ============================================================

baseline_2way = np.mean([roc_auc_score(y_train_arr[:, i],
    (0.6*oof_l2_xgb + 0.4*oof_l2_nn_v3)[:, i]) for i in range(41)])
print(f"Baseline 2-way (EXP-014): {baseline_2way:.4f}")

# === N-way бленды ===
print(f"\n=== N-way бленды ===")
best_auc = 0; best_w = None
for w_xgb in [0.45, 0.50, 0.55, 0.60]:
    for w_v3 in [0.00, 0.05, 0.10, 0.15]:
        for w_v4 in [0.00, 0.05, 0.10, 0.15]:
            w_v6 = round(1.0 - w_xgb - w_v3 - w_v4, 2)
            if w_v6 < 0.10 or w_v6 > 0.45:
                continue
            bl = w_xgb*oof_l2_xgb + w_v3*oof_l2_nn_v3 + w_v4*oof_l2_nn_v4 + w_v6*oof_l2_nn_v6
            auc = np.mean([roc_auc_score(y_train_arr[:, i], bl[:, i]) for i in range(41)])
            if auc > best_auc:
                best_auc = auc; best_w = (w_xgb, w_v3, w_v4, w_v6)
            if auc > baseline_2way + 0.0005:
                print(f"  XGB={w_xgb} v3={w_v3} v4={w_v4} v6={w_v6} -> {auc:.5f} ({auc-baseline_2way:+.5f})")

print(f"\nЛучший fixed: XGB={best_w[0]} v3={best_w[1]} v4={best_w[2]} v6={best_w[3]} -> {best_auc:.5f}")

# === Hill Climbing per-target (параллельный!) ===
print(f"\n=== Hill Climbing per-target ===")

def hill_climb_target(i):
    """Ищет лучшие веса для одного таргета."""
    y_col = y_train_arr[:, i]
    best_auc_t = 0; best_w_t = None
    for w_xgb in np.arange(0.3, 0.71, 0.05):
        for w_v6 in np.arange(0.15, 0.51, 0.05):
            remainder = 1.0 - w_xgb - w_v6
            if remainder < 0:
                continue
            for w_v4 in np.arange(0, remainder + 0.01, 0.05):
                w_v3 = round(remainder - w_v4, 2)
                if w_v3 < 0 or w_v3 > 0.3:
                    continue
                bl = (w_xgb * oof_l2_xgb[:, i] + w_v3 * oof_l2_nn_v3[:, i] +
                      w_v4 * oof_l2_nn_v4[:, i] + w_v6 * oof_l2_nn_v6[:, i])
                auc = roc_auc_score(y_col, bl)
                if auc > best_auc_t:
                    best_auc_t = auc
                    best_w_t = (round(w_xgb, 2), round(w_v3, 2), round(w_v4, 2), round(w_v6, 2))
    return i, target_cols[i], best_w_t, best_auc_t

results = Parallel(n_jobs=-1, verbose=1)(delayed(hill_climb_target)(i) for i in range(41))

oof_hill = np.zeros((len(y_train_arr), 41), dtype=np.float64)
test_hill = np.zeros((250000, 41), dtype=np.float64)
per_target_weights = {}

for i, tc, w, auc in results:
    oof_hill[:, i] = w[0]*oof_l2_xgb[:, i] + w[1]*oof_l2_nn_v3[:, i] + w[2]*oof_l2_nn_v4[:, i] + w[3]*oof_l2_nn_v6[:, i]
    test_hill[:, i] = w[0]*test_l2_xgb[:, i] + w[1]*test_l2_nn_v3[:, i] + w[2]*test_l2_nn_v4[:, i] + w[3]*test_l2_nn_v6[:, i]
    per_target_weights[tc] = {'xgb': w[0], 'v3': w[1], 'v4': w[2], 'v6': w[3], 'auc': auc}

hill_macro = np.mean([roc_auc_score(y_train_arr[:, i], oof_hill[:, i]) for i in range(41)])
print(f"\nHill Climbing OOF: {hill_macro:.5f} (vs baseline {baseline_2way:.5f}, diff={hill_macro-baseline_2way:+.5f})")

import collections
w_stats = collections.Counter()
for tc, w in per_target_weights.items():
    dominant = max(w, key=lambda k: w[k] if k != 'auc' else 0)
    w_stats[dominant] += 1
print(f"Доминантная модель: {dict(w_stats)}")
avg_w = {k: np.mean([per_target_weights[tc][k] for tc in target_cols]) for k in ['xgb', 'v3', 'v4', 'v6']}
print(f"Средние веса: {', '.join(f'{k}={v:.2f}' for k, v in avg_w.items())}")

# === Submission ===
print(f"\n=== Submission ===")
test_df = pd.read_parquet(f'{DATA}/test_main_features.parquet', columns=['customer_id'])
sub = pd.DataFrame({'customer_id': test_df['customer_id'].values})
for i, tc in enumerate(target_cols):
    sub[tc.replace('target_', 'predict_')] = test_hill[:, i].astype(np.float64)

assert sub.shape == (250000, 42)
assert sub.iloc[:, 1:].dtypes.unique()[0] == np.float64

out_path = f'{DATA}/submission_exp015_hill_climb.parquet'
sub.to_parquet(out_path, index=False)
print(f"Сохранено: {out_path}")
print(f"Hill Climbing OOF: {hill_macro:.5f} → LB: 0.8527")

# Save artifacts
with open(f'{ART_L2}/hill_climb_weights.json', 'w') as f:
    json.dump(per_target_weights, f, indent=2)
print("hill_climb_weights.json saved!")

del test_df; gc.collect()