# EXP-012b: PyTorch L2 + XGBoost L2 Blend

**LB: 0.8510** (лучший результат)

## Пайплайн
1. Загружаем L1 OOF (XGBoost per-target, Optuna params, 5-fold) — `xgb_oof_optuna.npy`
2. Logit-трансформация OOF для NN
3. Full train Multi-Task NN со skip connection на logit(OOF)
4. Бленд 60% XGBoost L2 (стекинг) + 40% PyTorch L2

## Артефакты (входные)
- `xgb_oof_optuna.npy` — L1 OOF [750k × 41]
- `xgb_test_optuna.npy` — L1 test preds [250k × 41]
- `train_target.parquet` — таргеты
- `submission_optuna_stacking.parquet` — L2 XGBoost стекинг (LB 0.8472)

## Архитектура NN
```
Input(41) → Linear(41→512) → BN → SiLU → Drop(0.19)
         → Linear(512→256) → BN → SiLU → Drop(0.22)
         → [concat с input: 256+41=297]
         → Linear(297→41) → sigmoid
```
Skip connection: вход конкатенируется с выходом feature_extractor

In [None]:
# Step 0: Настройка окружения
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import gc
import time

# Путь к данным (Google Drive)
DATA = '/content/drive/MyDrive/data_fusion'  # <- поменять при необходимости

print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Step 1: Загрузка данных и logit-трансформация

OOF_TRAIN_PATH = f'{DATA}/xgb_oof_optuna.npy'       # L1 OOF (750000, 41)
TEST_PREDS_PATH = f'{DATA}/xgb_test_optuna.npy'     # L1 Test (250000, 41)
TARGET_PATH = f'{DATA}/train_target.parquet'
BEST_SUB_PATH = f'{DATA}/submission_optuna_stacking.parquet'  # XGB L2, LB 0.8472

print('Загрузка данных...')
X_meta_train = np.load(OOF_TRAIN_PATH).astype(np.float32)
X_meta_test = np.load(TEST_PREDS_PATH).astype(np.float32)

targets_df = pd.read_parquet(TARGET_PATH)
if 'customer_id' in targets_df.columns:
    targets_df = targets_df.drop('customer_id', axis=1)
y_meta_train = targets_df.values.astype(np.float32)

# Logit-трансформация: log(p/(1-p)) — нормализует вероятности для NN
eps = 1e-7
X_meta_train = np.clip(X_meta_train, eps, 1 - eps)
X_meta_test = np.clip(X_meta_test, eps, 1 - eps)

X_train_logits = np.log(X_meta_train / (1 - X_meta_train))
X_test_logits = np.log(X_meta_test / (1 - X_meta_test))

print(f'Train: {X_train_logits.shape}, Test: {X_test_logits.shape}')
print(f'Targets: {y_meta_train.shape}')

In [None]:
# Step 2: Перенос данных на GPU

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

X_train_t = torch.tensor(X_train_logits, dtype=torch.float32, device=device)
y_train_t = torch.tensor(y_meta_train, dtype=torch.float32, device=device)
X_test_t = torch.tensor(X_test_logits, dtype=torch.float32, device=device)

print(f'GPU memory: {torch.cuda.memory_allocated()/1e6:.0f} MB')

In [None]:
# Step 3: Архитектура Multi-Task NN со skip connection
# Optuna-параметры: hidden=512, drop1=0.1907, drop2=0.2226, lr=0.02558, wd=1.157e-5

class MultiLabelMetaNN_Res(nn.Module):
    """Multi-Task NN с skip connection для L2 мета-обучения.
    
    Вход: logit(OOF) [batch, 41]
    Выход: logits [batch, 41] (до sigmoid)
    
    Skip connection: исходный вход конкатенируется с выходом feature_extractor,
    позволяя classifier видеть и трансформированные, и сырые признаки.
    """
    def __init__(self, input_dim=41, hidden_dim=512, drop_1=0.1907, drop_2=0.2226, output_dim=41):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.SiLU(),
            nn.Dropout(drop_1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.SiLU(),
            nn.Dropout(drop_2)
        )
        self.classifier = nn.Linear((hidden_dim // 2) + input_dim, output_dim)
        
    def forward(self, x):
        features = self.feature_extractor(x)
        combined = torch.cat([features, x], dim=1)
        return self.classifier(combined)

print('Архитектура:')
model = MultiLabelMetaNN_Res().to(device)
total_params = sum(p.numel() for p in model.parameters())
print(f'Параметров: {total_params:,}')
print(model)

In [None]:
# Step 4: Full Train (750k, 100 эпох)

EPOCHS = 100
BATCH_SIZE = 4096
n_samples = X_train_t.shape[0]
n_batches = int(np.ceil(n_samples / BATCH_SIZE))

model = MultiLabelMetaNN_Res().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.02558/10, weight_decay=1.157e-05)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=0.02558, steps_per_epoch=n_batches, epochs=EPOCHS
)

print(f'Старт обучения: {EPOCHS} эпох, batch={BATCH_SIZE}, lr=0.02558')
start_time = time.time()

model.train()
for epoch in range(EPOCHS):
    perm = torch.randperm(n_samples, device=device)
    train_loss = 0
    
    for i in range(0, n_samples, BATCH_SIZE):
        idx = perm[i:i+BATCH_SIZE]
        X_batch = X_train_t[idx]
        y_batch = y_train_t[idx]
        
        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits, y_batch)
        loss.backward()
        optimizer.step()
        scheduler.step()
        train_loss += loss.item()
        
    if (epoch+1) % 5 == 0 or epoch == 0:
        print(f'Epoch {epoch+1}/{EPOCHS} | Loss: {train_loss/n_batches:.4f}')

print(f'Обучение завершено за {time.time() - start_time:.2f} сек!')

In [None]:
# Step 5: Инференс на test + бленд с XGB L2

print('Генерация предсказаний для теста...')
model.eval()
nn_test_preds = []
with torch.no_grad():
    for i in range(0, X_test_t.shape[0], BATCH_SIZE):
        X_batch = X_test_t[i:i+BATCH_SIZE]
        logits = model(X_batch)
        probs = torch.sigmoid(logits)
        nn_test_preds.append(probs.cpu().numpy())
nn_test_preds = np.vstack(nn_test_preds)

# Загружаем лучший XGB L2 сабмит
best_sub = pd.read_parquet(BEST_SUB_PATH)
customer_ids = best_sub['customer_id'].values
best_sub_preds = best_sub.drop('customer_id', axis=1).values

# Бленд: 60% XGB L2 + 40% NN L2
blend_test_preds = 0.6 * best_sub_preds + 0.4 * nn_test_preds

# Формирование сабмита
submit_cols = [c for c in best_sub.columns if c != 'customer_id']
final_sub = pd.DataFrame(blend_test_preds, columns=submit_cols)
final_sub.insert(0, 'customer_id', customer_ids)
final_sub['customer_id'] = final_sub['customer_id'].astype(np.int32)

sub_name = 'submission_EXP012b_nn_blend.parquet'
final_sub.to_parquet(sub_name, index=False)
print(f'Сабмит сохранён: {sub_name}')
print(f'Shape: {final_sub.shape}')
print(f'\nСтатистика предсказаний:')
print(f'  Mean: {blend_test_preds.mean():.4f}')
print(f'  Min:  {blend_test_preds.min():.6f}')
print(f'  Max:  {blend_test_preds.max():.6f}')