In [2]:
import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
import copy

# Глобальная фиксация
SEED = 322
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

# 1. ЗАГРУЗКА И ПОДГОТОВКА
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train = train[train['price_p05'] > 0]
train['dt'] = pd.to_datetime(train['dt'])
test['dt'] = pd.to_datetime(test['dt'])

# 2. FEATURE ENGINEERING (из актуального CatBoost)
def create_smart_features(df, train_ref=None):
    if train_ref is not None:
        prod_price_map = train_ref.groupby('product_id')['price_p05'].mean().to_dict()
        df['global_prod_avg'] = df['product_id'].map(prod_price_map)
        cat_price_map = train_ref.groupby('third_category_id')['price_p05'].mean().to_dict()
        df['global_cat_avg'] = df['third_category_id'].map(cat_price_map)
        for col in ['management_group_id', 'first_category_id', 'third_category_id']:
            df[f'std_p05_{col}'] = df[col].map(train_ref.groupby(col)['price_p05'].std().to_dict())
            df[f'std_p95_{col}'] = df[col].map(train_ref.groupby(col)['price_p95'].std().to_dict())
    
    cat_stores_map = df.groupby('third_category_id')['n_stores'].transform('mean')
    df['store_density_ratio'] = df['n_stores'] / (cat_stores_map + 1e-6)
    df['temp_hum_index'] = df['avg_temperature'] * (df['avg_humidity'] / 100)
    df['category_breadth'] = df.groupby(['dt', 'third_category_id'])['product_id'].transform('nunique')
    df['month_sin'] = np.sin(2 * np.pi * df['dt'].dt.month / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['dt'].dt.month / 12)
    return df

train = create_smart_features(train, train_ref=train)
test = create_smart_features(test, train_ref=train)

cat_features = ['management_group_id', 'first_category_id', 'activity_flag']
num_features = [
    'n_stores', 'precpt', 'avg_temperature', 'avg_humidity', 'avg_wind_level', 
    'week_of_year', 'month_sin', 'month_cos', 'global_prod_avg', 'global_cat_avg', 
    'store_density_ratio', 'temp_hum_index', 'category_breadth'
]
std_cols = [c for c in train.columns if 'std_p' in c and 'dow' not in c and 'second' not in c]
num_features += std_cols

# Заполнение пропусков и кодирование
train[num_features] = train[num_features].fillna(train[num_features].mean())
test[num_features] = test[num_features].fillna(train[num_features].mean())

label_encoders = {}
embedding_sizes = []
for col in cat_features:
    le = LabelEncoder()
    full_data = pd.concat([train[col], test[col]]).astype(str)
    le.fit(full_data)
    train[col] = le.transform(train[col].astype(str))
    test[col] = le.transform(test[col].astype(str))
    label_encoders[col] = le
    embedding_sizes.append((len(le.classes_), min(50, (len(le.classes_) + 1) // 2)))

scaler = StandardScaler()
train[num_features] = scaler.fit_transform(train[num_features])
test[num_features] = scaler.transform(test[num_features])

# 3. КВАНТИЛЬНЫЙ ЛОСС (PINBALL LOSS)
class QuantileLoss(nn.Module):
    def __init__(self, quantile=0.5):
        super().__init__()
        self.quantile = quantile
    def forward(self, preds, target):
        errors = target - preds
        loss = torch.max((self.quantile - 1) * errors, self.quantile * errors)
        return loss.mean()

# 4. МОДЕЛЬ И ДАТАСЕТ
class TabularDataset(Dataset):
    def __init__(self, df, cats, nums, target_name=None):
        self.cats = torch.LongTensor(df[cats].values)
        self.nums = torch.FloatTensor(df[nums].values)
        self.targets = torch.FloatTensor(df[target_name].values) if target_name else None
    def __len__(self): return len(self.cats)
    def __getitem__(self, i):
        if self.targets is not None: return self.cats[i], self.nums[i], self.targets[i]
        return self.cats[i], self.nums[i]

class TabularNN(nn.Module):
    def __init__(self, emb_sizes, n_cont):
        super().__init__()
        self.embs = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in emb_sizes])
        n_emb = sum(e.embedding_dim for e in self.embs)
        self.net = nn.Sequential(
            nn.Linear(n_emb + n_cont, 512), nn.ReLU(), nn.BatchNorm1d(512), nn.Dropout(0.2),
            nn.Linear(512, 256), nn.ReLU(), nn.BatchNorm1d(256), nn.Dropout(0.1),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, 1)
        )
    def forward(self, x_cat, x_num):
        x = torch.cat([e(x_cat[:, i]) for i, e in enumerate(self.embs)], 1)
        return self.net(torch.cat([x, x_num], 1)).squeeze()

# 5. ОБУЧЕНИЕ
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

import torch.nn.functional as F

class CoverageMAE(nn.Module):
    def __init__(self, quantile, penalty_weight=10.0):
        super().__init__()
        self.quantile = quantile
        self.penalty_weight = penalty_weight

    def forward(self, preds, target):
        # 1. Базовый MAE (минимизируем отклонение)
        mae_loss = F.l1_loss(preds, target)
        
        # 2. Штраф за покрытие (Hinge-style)
        errors = target - preds
        
        if self.quantile < 0.5:
            # Для p05: штрафуем, если предсказание ВЫШЕ таргета (target < preds)
            # Мы хотим, чтобы только 5% данных были ниже границы
            violation = F.relu(-errors) 
        else:
            # Для p95: штрафуем, если предсказание НИЖЕ таргета (target > preds)
            # Мы хотим, чтобы только 5% данных были выше границы
            violation = F.relu(errors)
            
        coverage_penalty = violation.mean()
        
        # Итоговый лосс: баланс между точностью и соблюдением границы
        return mae_loss + self.penalty_weight * coverage_penalty

# Обновляем функцию обучения, чтобы она использовала новый лосс
def train_nn(train_loader, val_loader, q, epochs=150, patience=10):
    model = TabularNN(embedding_sizes, len(num_features)).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    
    # penalty_weight можно подбирать: 
    # если интервалы слишком узкие — увеличиваем, если слишком широкие — уменьшаем.
    criterion = CoverageMAE(quantile=q, penalty_weight=15.0) 
    
    # Добавим планировщик скорости обучения для лучшей сходимости
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)
    
    best_loss = float('inf')
    best_state = None
    counter = 0
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for c, n, t in train_loader:
            c, n, t = c.to(device), n.to(device), t.to(device)
            optimizer.zero_grad()
            loss = criterion(model(c, n), t)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for c, n, t in val_loader:
                c, n, t = c.to(device), n.to(device), t.to(device)
                val_loss += criterion(model(c, n), t).item()
        
        val_loss /= len(val_loader)
        scheduler.step(val_loss) # Снижаем LR, если лосс застрял
        
        if val_loss < best_loss:
            best_loss, best_state, counter = val_loss, copy.deepcopy(model.state_dict()), 0
        else:
            counter += 1
            
        if (epoch + 1) % 20 == 0:
            print(f"Epoch {epoch+1} | Val Loss: {val_loss:.4f} | LR: {optimizer.param_groups[0]['lr']:.6e}")
            
        if counter >= patience:
            print(f"Early stop at epoch {epoch+1}")
            break
            
    model.load_state_dict(best_state)
    return model

# Сплит по времени (как в CatBoost)
train_parts, val_parts = [], []
for _, group in train.groupby('dt'):
    group = group.sample(frac=1, random_state=SEED)
    idx = int(len(group) * 0.8)
    train_parts.append(group.iloc[:idx]); val_parts.append(group.iloc[idx:])

tr_df, val_df = pd.concat(train_parts), pd.concat(val_parts)

# Модели
loaders = {
    'tr_low': DataLoader(TabularDataset(tr_df, cat_features, num_features, 'price_p05'), batch_size=4096, shuffle=True),
    'val_low': DataLoader(TabularDataset(val_df, cat_features, num_features, 'price_p05'), batch_size=4096),
    'tr_high': DataLoader(TabularDataset(tr_df, cat_features, num_features, 'price_p95'), batch_size=4096, shuffle=True),
    'val_high': DataLoader(TabularDataset(val_df, cat_features, num_features, 'price_p95'), batch_size=4096)
}

print("Обучение NN моделей...")
nn_low = train_nn(loaders['tr_low'], loaders['val_low'], 0.05)
nn_high = train_nn(loaders['tr_high'], loaders['val_high'], 0.95)

# 6. CONFORMAL CALIBRATION (РАЗДЕЛЬНЫЕ АЛЬФЫ)
def get_preds(model, loader):
    model.eval()
    all_p = []
    with torch.no_grad():
        for c, n, _ in loader:
            all_p.append(model(c.to(device), n.to(device)).cpu().numpy())
    return np.concatenate(all_p)

calib_low = get_preds(nn_low, loaders['val_low'])
calib_high = get_preds(nn_high, loaders['val_high'])

nonconf_low = calib_low - val_df['price_p05'].values
nonconf_high = val_df['price_p95'].values - calib_high

def calculate_iou(l_t, u_t, l_p, u_p):
    inter = np.maximum(0, np.minimum(u_t, u_p) - np.maximum(l_t, l_p))
    union = (u_t - l_t) + (u_p - l_p) - inter
    return np.mean(inter / (union + 1e-6))

print("Поиск оптимальных q...")
best_iou, best_qs = -1, (0, 0)
for al in np.linspace(0.1, 0.9, 41):
    ql = np.quantile(nonconf_low, 1 - al/2)
    for ah in np.linspace(0.1, 0.9, 41):
        qh = np.quantile(nonconf_high, 1 - ah/2)
        score = calculate_iou(val_df['price_p05'].values, val_df['price_p95'].values, calib_low - ql, np.maximum(calib_high + qh, calib_low - ql + 0.001))
        if score > best_iou:
            best_iou, best_qs = score, (ql, qh)

q_low, q_high = best_qs
print(f"Best IoU: {best_iou:.4f} with q_low: {q_low:.4f}, q_high: {q_high:.4f}")

# 7. SUBMISSION
test_loader = DataLoader(TabularDataset(test, cat_features, num_features), batch_size=4096, shuffle=False)
def predict_test(model, loader):
    model.eval()
    all_p = []
    with torch.no_grad():
        for c, n in loader: all_p.append(model(c.to(device), n.to(device)).cpu().numpy())
    return np.concatenate(all_p)

test['price_p05'] = predict_test(nn_low, test_loader) - q_low
test['price_p95'] = predict_test(nn_high, test_loader) + q_high
test['price_p95'] = np.maximum(test['price_p95'], test['price_p05'] + 0.001)

test[['row_id', 'price_p05', 'price_p95']].to_csv('submission_nn_conformal.csv', index=False)

Обучение NN моделей...
Epoch 20 | Val Loss: 0.3087 | LR: 5.000000e-04
Early stop at epoch 27
Epoch 20 | Val Loss: 0.4980 | LR: 1.000000e-03
Epoch 40 | Val Loss: 0.4945 | LR: 1.000000e-03
Epoch 60 | Val Loss: 0.4887 | LR: 5.000000e-04
Early stop at epoch 74
Поиск оптимальных q...
Best IoU: 0.2391 with q_low: -0.1171, q_high: -0.0627
