In [70]:
import numpy as np

class EpsilonLossObjective(object):
    def __init__(self, alpha=1.2, beta=0.9, epsilon=1.0, p=2):
        self.alpha = alpha
        self.beta = beta
        self.epsilon = epsilon
        self.p = p
    
    def calc_ders_range(self, approxes, targets, weights):
        grad = np.zeros(len(targets), dtype=np.float64)
        hess = np.zeros(len(targets), dtype=np.float64)
        
        residuals = np.array(approxes) - np.array(targets)
        
        within_epsilon = np.abs(residuals) <= self.epsilon
        overestimation = residuals > self.epsilon
        underestimation = residuals < -self.epsilon
        
        # Градиент
        grad[overestimation] = self.alpha * self.p * (residuals[overestimation] - self.epsilon)**(self.p - 1)
        grad[underestimation] = -self.beta * self.p * (-residuals[underestimation] - self.epsilon)**(self.p - 1)
        
        # Гессиан (только если p > 1)
        if self.p > 1:
            hess[overestimation] = self.alpha * self.p * (self.p - 1) * (residuals[overestimation] - self.epsilon)**(self.p - 2)
            hess[underestimation] = self.beta * self.p * (self.p - 1) * (-residuals[underestimation] - self.epsilon)**(self.p - 2)
        
        # Защита от численной нестабильности
        grad = np.nan_to_num(grad, nan=0.0, posinf=0.0, neginf=0.0)
        hess = np.nan_to_num(hess, nan=0.0, posinf=0.0, neginf=0.0)
        
        return list(zip(grad, hess))

In [72]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor, Pool
from time import time

train = pd.read_csv('train_contest.csv')
test = pd.read_csv('test_contest.csv')

# Подготовка данных
features = list(train.drop('target', axis=1).columns)
cat_features = train.select_dtypes(include=['object', 'category']).columns.tolist()

# Преобразование категориальных признаков
cat_features = train.select_dtypes(include=['object', 'category']).columns.tolist()

for feat in cat_features:
    train[feat] = train[feat].astype('category')
    test[feat] = test[feat].astype('category')

# Разделение данных (быстрее чем KFold)
X_train, X_valid, y_train, y_valid = train_test_split(
	train[features], train['target'],
	test_size=0.2,
	random_state=42
)

# Параметры с оптимизацией для скорости
params = {
    'learning_rate': 0.09652479858727847,
    'depth': 7,
    'l2_leaf_reg': 4.130523663958776,
    'random_strength': 0.07304403301548891,
    'bagging_temperature': 0.5696363378933689,
    'leaf_estimation_iterations': 7,
    'eval_metric': 'RMSE',
    'random_seed': 42,

    'loss_function': 'RMSE',
	'iterations': 2000,
	'thread_count': -1,  # Используем все ядра
	'task_type': 'CPU',  # Явно указываем использование CPU
	'used_ram_limit': '16gb',  # Ограничение памяти для стабильности
	'verbose': 100  # Меньше логов для скорости
}

# Создание Pool объектов один раз
train_pool = Pool(X_train, y_train, cat_features=cat_features)
valid_pool = Pool(X_valid, y_valid, cat_features=cat_features)

# Обучение с таймингом
print("Training model...")
model = CatBoostRegressor(**params)
model.fit(
	train_pool,
	eval_set=valid_pool,
	early_stopping_rounds=None,
	use_best_model=True
)

# Быстрое предсказание
print("Making predictions...")
#test_pool = Pool(test[features], cat_features=cat_features)
#test['target'] = model.predict(test_pool)

# Сохранение результатов
#test[['index', 'target']].to_csv('catboost_optimized.csv', index=False)

Training model...
0:	learn: 2778.2309960	test: 2763.4402625	best: 2763.4402625 (0)	total: 363ms	remaining: 12m 5s
100:	learn: 1798.7265879	test: 1887.6999207	best: 1887.6999207 (100)	total: 49.1s	remaining: 15m 23s
200:	learn: 1735.7990165	test: 1869.6643861	best: 1869.6161934 (198)	total: 1m 48s	remaining: 16m 11s
300:	learn: 1698.5916040	test: 1864.1541251	best: 1864.1541251 (300)	total: 2m 44s	remaining: 15m 30s
400:	learn: 1668.4820843	test: 1860.8949010	best: 1860.4152419 (393)	total: 3m 58s	remaining: 15m 51s
500:	learn: 1642.6974292	test: 1858.5335379	best: 1858.0505328 (487)	total: 5m 21s	remaining: 16m 2s
600:	learn: 1622.3976849	test: 1857.1296150	best: 1856.8989825 (585)	total: 6m 55s	remaining: 16m 6s
700:	learn: 1602.7143331	test: 1857.4166120	best: 1856.7944616 (652)	total: 8m 36s	remaining: 15m 57s
800:	learn: 1584.7231998	test: 1856.3627768	best: 1855.8825750 (785)	total: 10m 21s	remaining: 15m 30s
900:	learn: 1567.6902371	test: 1855.9805061	best: 1855.8825750 (785)	tot

KeyboardInterrupt: 

In [None]:
test_pool = Pool(test[features], cat_features=cat_features)
test['target'] = model.predict(test_pool)

test[['index', 'target']].to_csv('custom_cat.csv', index=False)