In [None]:
import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
import copy
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder

# ==========================================
# 0. НАСТРОЙКИ И КОНФИГУРАЦИЯ
# ==========================================
SEED = 993

NN_CONFIG = {
    'learning_rate': 0.001,
    'batch_size': 4096,
    'epochs': 1000,          # Ставим с запасом, Early Stopping остановит раньше
    'patience': 50,          # Сколько эпох ждать улучшения
    'hidden_layers': [512, 256, 128, 64, 32], 
    'dropout': 0.2,         
    'loss_type': 'MAE', # Рекомендую Quantile для этой задачи
    'device': 'cuda' if torch.cuda.is_available() else 'cpu'
}

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(SEED)

# ==========================================
# 1. ЗАГРУЗКА И ПОДГОТОВКА ДАННЫХ
# ==========================================
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train = train[train['price_p05'] > 0]
train['dt'] = pd.to_datetime(train['dt'])
test['dt'] = pd.to_datetime(test['dt'])

# 2. FEATURE ENGINEERING
def create_smart_features(df, train_ref=None):
    if train_ref is not None:
        prod_price_map = train_ref.groupby('product_id')['price_p05'].mean().to_dict()
        df['global_prod_avg'] = df['product_id'].map(prod_price_map)
        cat_price_map = train_ref.groupby('third_category_id')['price_p05'].mean().to_dict()
        df['global_cat_avg'] = df['third_category_id'].map(cat_price_map)
    
    cat_stores_map = df.groupby('third_category_id')['n_stores'].transform('mean')
    df['store_density_ratio'] = df['n_stores'] / (cat_stores_map + 1e-6)
    df['temp_hum_index'] = df['avg_temperature'] * (df['avg_humidity'] / 100)
    df['category_breadth'] = df.groupby(['dt', 'third_category_id'])['product_id'].transform('nunique')
    return df

train = create_smart_features(train, train_ref=train)
test = create_smart_features(test, train_ref=train)

def add_cyclical_features(df):
    df['dow_sin'] = np.sin(2 * np.pi * df['dow'] / 7)
    df['dow_cos'] = np.cos(2 * np.pi * df['dow'] / 7)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    return df

train = add_cyclical_features(train)
test = add_cyclical_features(test)

# ==========================================
# 3. ПОДГОТОВКА К НЕЙРОСЕТИ
# ==========================================
cat_cols = ['management_group_id', 'first_category_id', 'activity_flag', 'product_id', 'third_category_id']
num_cols = [
    'n_stores', 'precpt', 'avg_temperature', 'avg_humidity', 
    'avg_wind_level', 'week_of_year', 'month_sin', 'month_cos',
    'global_prod_avg', 'global_cat_avg', 'store_density_ratio',
    'temp_hum_index', 'category_breadth'
]

# 3.1 Заполнение пропусков
train[num_cols] = train[num_cols].fillna(train[num_cols].mean())
test[num_cols] = test[num_cols].fillna(train[num_cols].mean())
train[cat_cols] = train[cat_cols].fillna(-1)
test[cat_cols] = test[cat_cols].fillna(-1)

# 3.2 Label Encoding 
label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    full_col = pd.concat([train[col].astype(str), test[col].astype(str)], axis=0)
    le.fit(full_col)
    train[col] = le.transform(train[col].astype(str))
    test[col] = le.transform(test[col].astype(str))
    label_encoders[col] = le

# 3.3 Scaling 
scaler = StandardScaler()
train[num_cols] = scaler.fit_transform(train[num_cols])
test[num_cols] = scaler.transform(test[num_cols])

# 3.4 Правило размера эмбеддингов
embedding_sizes = []
for col in cat_cols:
    num_unique = len(label_encoders[col].classes_)
    emb_dim = min(50, (num_unique + 1) // 2)
    embedding_sizes.append((num_unique, emb_dim))
    print(f"Feature '{col}': {num_unique} unique -> embedding size {emb_dim}")

# ==========================================
# 4. МОДЕЛЬ НЕЙРОСЕТИ И УТИЛИТЫ
# ==========================================

class TabularDataset(Dataset):
    def __init__(self, df, cat_cols, num_cols, target=None):
        self.cats = df[cat_cols].values.astype(np.int64)
        self.nums = df[num_cols].values.astype(np.float32)
        self.target = df[target].values.astype(np.float32) if target is not None else None

    def __len__(self):
        return len(self.cats)

    def __getitem__(self, idx):
        if self.target is not None:
            return self.cats[idx], self.nums[idx], self.target[idx]
        return self.cats[idx], self.nums[idx]

class TabularNN(nn.Module):
    def __init__(self, embedding_sizes, n_cont, hidden_layers, dropout=0.2):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_sizes])
        self.n_emb = sum(e.embedding_dim for e in self.embeddings)
        self.n_cont = n_cont
        
        layers = []
        in_size = self.n_emb + self.n_cont
        
        for h_size in hidden_layers:
            layers.append(nn.Linear(in_size, h_size))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(h_size))
            layers.append(nn.Dropout(dropout))
            in_size = h_size
            
        layers.append(nn.Linear(in_size, 1)) 
        self.layers = nn.Sequential(*layers)

    def forward(self, x_cat, x_cont):
        x = [e(x_cat[:, i]) for i, e in enumerate(self.embeddings)]
        x = torch.cat(x, 1)
        x = torch.cat([x, x_cont], 1)
        return self.layers(x).squeeze(1)

# КЛАСС EARLY STOPPING
class EarlyStopping:
    def __init__(self, patience=20, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model_state = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model_state = copy.deepcopy(model.state_dict())
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.best_model_state = copy.deepcopy(model.state_dict())
            self.counter = 0

class QuantileLoss(nn.Module):
    def __init__(self, quantile):
        super().__init__()
        self.quantile = quantile

    def forward(self, preds, target):
        errors = target - preds
        loss = torch.max((self.quantile - 1) * errors, self.quantile * errors)
        return torch.abs(loss).mean()

def get_loss_fn(name, quantile=None):
    if name == 'MSE': return nn.MSELoss()
    if name == 'MAE': return nn.L1Loss()
    if name == 'Quantile': return QuantileLoss(quantile)
    raise ValueError("Unknown Loss Type")

# Функция обучения одной эпохи
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for cats, nums, targets in loader:
        cats, nums, targets = cats.to(device), nums.to(device), targets.to(device)
        optimizer.zero_grad()
        preds = model(cats, nums)
        loss = criterion(preds, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Функция расчета лосса на валидации (для Early Stopping)
def validate_loss(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            if len(batch) == 3:
                cats, nums, targets = batch
            else:
                continue # Если нет таргета, нельзя посчитать лосс
            
            cats, nums, targets = cats.to(device), nums.to(device), targets.to(device)
            preds = model(cats, nums)
            loss = criterion(preds, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Функция предсказания
def predict(model, loader, device):
    model.eval()
    preds_list = []
    with torch.no_grad():
        for batch in loader:
            if len(batch) == 3:
                cats, nums, _ = batch
            else:
                cats, nums = batch
            
            cats, nums = cats.to(device), nums.to(device)
            preds = model(cats, nums)
            preds_list.append(preds.cpu().numpy())
    return np.concatenate(preds_list)

# ==========================================
# 5. ОБУЧЕНИЕ И ВАЛИДАЦИЯ
# ==========================================
train_parts, val_parts = [], []
for _, group in train.groupby('dt'):
    group = group.sample(frac=1, random_state=SEED).reset_index(drop=True)
    split_idx = int(len(group) * 0.8)
    train_parts.append(group.iloc[:split_idx])
    val_parts.append(group.iloc[split_idx:])

train_part = pd.concat(train_parts)
val_part = pd.concat(val_parts)

# Датасеты
train_ds_low = TabularDataset(train_part, cat_cols, num_cols, 'price_p05')
val_ds_low = TabularDataset(val_part, cat_cols, num_cols, 'price_p05')
train_ds_high = TabularDataset(train_part, cat_cols, num_cols, 'price_p95')
val_ds_high = TabularDataset(val_part, cat_cols, num_cols, 'price_p95')

loaders = {
    'train_low': DataLoader(train_ds_low, batch_size=NN_CONFIG['batch_size'], shuffle=True),
    'val_low': DataLoader(val_ds_low, batch_size=NN_CONFIG['batch_size']*2),
    'train_high': DataLoader(train_ds_high, batch_size=NN_CONFIG['batch_size'], shuffle=True),
    'val_high': DataLoader(val_ds_high, batch_size=NN_CONFIG['batch_size']*2)
}

device = NN_CONFIG['device']
print(f"Using device: {device}")

def train_and_validate_with_es(target_name, quantile, train_loader, val_loader):
    print(f"\n=== Training Model for {target_name} (Loss: {NN_CONFIG['loss_type']}) ===")
    
    model = TabularNN(embedding_sizes, len(num_cols), NN_CONFIG['hidden_layers'], NN_CONFIG['dropout']).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=NN_CONFIG['learning_rate'])
    criterion = get_loss_fn(NN_CONFIG['loss_type'], quantile)
    
    # Инициализация Early Stopping
    early_stopper = EarlyStopping(patience=NN_CONFIG['patience'])
    
    for epoch in range(NN_CONFIG['epochs']):
        # Шаг обучения
        train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
        
        # Шаг валидации
        val_loss = validate_loss(model, val_loader, criterion, device)
        
        # Проверка Early Stopping
        early_stopper(val_loss, model)
        
        if (epoch+1) % 10 == 0 or early_stopper.early_stop:
            print(f"Epoch {epoch+1}/{NN_CONFIG['epochs']} | Train Loss: {train_loss:.5f} | Val Loss: {val_loss:.5f}")
        
        if early_stopper.early_stop:
            print(f"Early stopping triggered at epoch {epoch+1}!")
            break
            
    # Загружаем лучшие веса
    print("Loading best model weights...")
    model.load_state_dict(early_stopper.best_model_state)
    
    return model

# Обучаем валидационные модели
model_low_val = train_and_validate_with_es('price_p05', 0.05, loaders['train_low'], loaders['val_low'])
model_high_val = train_and_validate_with_es('price_p95', 0.95, loaders['train_high'], loaders['val_high'])

# РАСЧЕТ МЕТРИКИ (IoU)
def calculate_iou(lower_true, upper_true, lower_pred, upper_pred, epsilon=1e-6):
    intersection = np.maximum(0, np.minimum(upper_true, upper_pred) - np.maximum(lower_true, lower_pred))
    union = (upper_true - lower_true + epsilon) + (upper_pred - lower_pred + epsilon) - intersection
    return np.mean(intersection / union)

print("Calculating Validation Metrics...")
preds_low = predict(model_low_val, loaders['val_low'], device)
preds_high = predict(model_high_val, loaders['val_high'], device)
val_part['pred_p05'] = preds_low
val_part['pred_p95'] = np.maximum(preds_high, preds_low + 0.001)

iou_score = calculate_iou(
    val_part['price_p05'], val_part['price_p95'],
    val_part['pred_p05'], val_part['pred_p95']
)
print(f"\n>>> VALIDATION IoU SCORE: {iou_score:.5f} <<<")
print("-" * 40)

# ==========================================
# 6. ФИНАЛЬНОЕ ОБУЧЕНИЕ (FULL TRAIN)
# ==========================================
print("Retraining on FULL dataset...")
full_ds_low = TabularDataset(train, cat_cols, num_cols, 'price_p05')
full_ds_high = TabularDataset(train, cat_cols, num_cols, 'price_p95')

full_loader_low = DataLoader(full_ds_low, batch_size=NN_CONFIG['batch_size'], shuffle=True)
full_loader_high = DataLoader(full_ds_high, batch_size=NN_CONFIG['batch_size'], shuffle=True)

# ХАК: Для финального обучения используем train выборку как валидационную
# Это позволяет остановить обучение, когда модель перестанет улучшаться (сходимость)
# и выбрать лучшую точку сходимости.
final_model_low = train_and_validate_with_es('Final Low', 0.05, full_loader_low, full_loader_low)
final_model_high = train_and_validate_with_es('Final High', 0.95, full_loader_high, full_loader_high)

# ==========================================
# 7. ПРЕДСКАЗАНИЕ
# ==========================================
print("Generating submission...")
test_ds = TabularDataset(test, cat_cols, num_cols, None)
test_loader = DataLoader(test_ds, batch_size=NN_CONFIG['batch_size']*2, shuffle=False)

test['price_p05'] = predict(final_model_low, test_loader, device)
test['price_p95'] = predict(final_model_high, test_loader, device)

test['price_p95'] = np.maximum(test['price_p95'], test['price_p05'] + 0.001)

submission = test[['row_id', 'price_p05', 'price_p95']].sort_values('row_id')
submission.to_csv('submission.csv', index=False)
print(f"Готово! Результаты зафиксированы с seed {SEED}.")