In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
from sklearn.metrics import mean_squared_error, r2_score, median_absolute_error, mean_absolute_error
import numpy as np
import torch
import torch.nn as nn
from pytorch_tabnet.tab_model import TabNetRegressor
from pytorch_tabnet.metrics import Metric
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import TransformedTargetRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, InputLayer
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
import time

import warnings
warnings.filterwarnings("ignore", message="'pin_memory' argument is set as true but not supported on MPS")

In [2]:
df = pd.read_csv('final_data.csv', low_memory=False)
df.salary_gross.fillna(False, inplace=True)
df.shape

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df.salary_gross.fillna(False, inplace=True)
  df.salary_gross.fillna(False, inplace=True)


(709524, 43)

In [3]:
def culc_metrics(y_test, y_pred):
    test_mse = mean_squared_error(y_test, y_pred)
    rmse = test_mse**0.5
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    def symmetric_mean_absolute_percentage_error(y_true, y_pred):
        y_true = np.array(y_true)
        y_pred = np.array(y_pred)
        smape = 100 * np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred)))
        return smape

    smape = symmetric_mean_absolute_percentage_error(y_test, y_pred)

    medae = median_absolute_error(y_test, y_pred)

    print(f'Корень из среднеквадратичной ошибки (RMSE): {rmse}')
    print(f"R² Score: {r2}")
    print(f"Средняя абсолютная ошибка (MAE): {mae}")
    print(f"Средняя абсолютная процентная ошибка (SMAPE): {smape:.2f}%")
    print(f"Медианная абсолютная ошибка (MedAE): {medae}")

In [4]:
cat_columns = ['premium', 'has_test', 'response_letter_required', 'area_name', 'salary_currency', 'salary_gross', 'type_name', 'address_city', 'address_metro_station_name', 'address_metro_line_name', 'address_metro_stations_0_line_name', 'archived', 'employer_name', 'employer_accredited_it_employer', 'employer_trusted', 'schedule_name', 'accept_temporary', 'professional_roles_0_name', 'accept_incomplete_resumes', 'experience_name', 'employment_name', 'address_metro_stations_3_station_name', 'address_metro_stations_3_line_name', 'working_time_intervals_0_name', 'working_time_modes_0_name', 'working_days_0_name', 'branding_type', 'branding_tariff', 'department_name', 'insider_interview_id', 'brand_snippet_logo', 'brand_snippet_picture', 'brand_snippet_background_color', 'brand_snippet_background_gradient_angle', 'brand_snippet_background_gradient_color_list_0_position', 'brand_snippet_background_gradient_color_list_1_position', 'category']
text_columns = ['name', 'snippet_requirement', 'snippet_responsibility']
num_columns = ['name_length', 'length']

In [5]:
scaler = StandardScaler()
num_df = pd.DataFrame(scaler.fit_transform(df[num_columns]), columns=num_columns)

In [6]:
label_columns = []
ohe_columns = []

for column in cat_columns:
    if df[column].nunique() > 10:
        label_columns.append(column)
    else:
        ohe_columns.append(column)

to_bool = list(df[cat_columns].select_dtypes(include=['bool']).columns)
df[['salary_gross', 'employer_accredited_it_employer']] = df[['salary_gross', 'employer_accredited_it_employer']].astype(bool).astype(int)
df[to_bool] = df[to_bool].astype(int)

ohe = OneHotEncoder(sparse_output=False, drop='first')
ohe_encoded = ohe.fit_transform(df[ohe_columns])
ohe_feature_names = ohe.get_feature_names_out(ohe_columns).tolist()
encoded_ohe_data = pd.DataFrame(ohe_encoded, columns=ohe_feature_names)

embedding_dim = 5
embeddings = {}

for col in label_columns:
    unique_values = df[col].unique()
    value_to_idx = {v: i for i, v in enumerate(unique_values)}
    df[col+'_idx'] = df[col].map(value_to_idx)

    num_embeddings = len(unique_values)
    embedding_layer = nn.Embedding(num_embeddings, embedding_dim)

    embeddings[col] = {
        'value_to_idx': value_to_idx,
        'embedding': embedding_layer,
        'num_embeddings': num_embeddings
    }

embedded_data = []
for col in label_columns:
    indices = torch.tensor(df[col+'_idx'].values, dtype=torch.long)
    embedded = embeddings[col]['embedding'](indices).detach().numpy()
    embedded_cols = [f"{col}_embed_{i}" for i in range(embedding_dim)]
    embedded_df = pd.DataFrame(embedded, columns=embedded_cols)
    embedded_data.append(embedded_df)

embedded_data = pd.concat(embedded_data, axis=1)
final_data = pd.concat([encoded_ohe_data, embedded_data], axis=1)

for col in label_columns:
    df.drop(col+'_idx', axis=1, inplace=True)

In [7]:
final_data.shape

(709524, 111)

In [11]:
final_data

Unnamed: 0,premium_1,has_test_1,response_letter_required_1,salary_currency_BYR,salary_currency_EUR,salary_currency_GEL,salary_currency_KGS,salary_currency_KZT,salary_currency_RUR,salary_currency_USD,salary_currency_UZS,salary_gross_1,type_name_Закрытая,type_name_Открытая,type_name_Рекламная,archived_1,employer_trusted_1,schedule_name_Гибкий график,schedule_name_Полный день,schedule_name_Сменный график,schedule_name_Удаленная работа,accept_temporary_1,accept_incomplete_resumes_1,experience_name_Нет опыта,experience_name_От 1 года до 3 лет,experience_name_От 3 до 6 лет,employment_name_Полная занятость,employment_name_Проектная работа,employment_name_Стажировка,employment_name_Частичная занятость,working_time_intervals_0_name_Можно сменами по 4-6 часов в день,working_time_modes_0_name_С началом дня после 16:00,working_days_0_name_По субботам и воскресеньям,branding_type_MAKEUP,branding_type_Unknown,branding_tariff_Unknown,insider_interview_id_1,brand_snippet_logo_Unknown,brand_snippet_picture_Unknown,brand_snippet_background_color_#EF3124,brand_snippet_background_color_#FF5B29,brand_snippet_background_color_Unknown,brand_snippet_background_gradient_angle_134.0,brand_snippet_background_gradient_angle_200.0,brand_snippet_background_gradient_angle_206.43,brand_snippet_background_gradient_angle_67.0,brand_snippet_background_gradient_angle_Unknown,brand_snippet_background_gradient_color_list_0_position_0.0,brand_snippet_background_gradient_color_list_0_position_0.52,brand_snippet_background_gradient_color_list_0_position_6.96,brand_snippet_background_gradient_color_list_0_position_Unknown,brand_snippet_background_gradient_color_list_1_position_40.0,brand_snippet_background_gradient_color_list_1_position_88.86,brand_snippet_background_gradient_color_list_1_position_90.95,brand_snippet_background_gradient_color_list_1_position_94.48,brand_snippet_background_gradient_color_list_1_position_Unknown,area_name_embed_0,area_name_embed_1,area_name_embed_2,area_name_embed_3,area_name_embed_4,address_city_embed_0,address_city_embed_1,address_city_embed_2,address_city_embed_3,address_city_embed_4,address_metro_station_name_embed_0,address_metro_station_name_embed_1,address_metro_station_name_embed_2,address_metro_station_name_embed_3,address_metro_station_name_embed_4,address_metro_line_name_embed_0,address_metro_line_name_embed_1,address_metro_line_name_embed_2,address_metro_line_name_embed_3,address_metro_line_name_embed_4,address_metro_stations_0_line_name_embed_0,address_metro_stations_0_line_name_embed_1,address_metro_stations_0_line_name_embed_2,address_metro_stations_0_line_name_embed_3,address_metro_stations_0_line_name_embed_4,employer_name_embed_0,employer_name_embed_1,employer_name_embed_2,employer_name_embed_3,employer_name_embed_4,professional_roles_0_name_embed_0,professional_roles_0_name_embed_1,professional_roles_0_name_embed_2,professional_roles_0_name_embed_3,professional_roles_0_name_embed_4,address_metro_stations_3_station_name_embed_0,address_metro_stations_3_station_name_embed_1,address_metro_stations_3_station_name_embed_2,address_metro_stations_3_station_name_embed_3,address_metro_stations_3_station_name_embed_4,address_metro_stations_3_line_name_embed_0,address_metro_stations_3_line_name_embed_1,address_metro_stations_3_line_name_embed_2,address_metro_stations_3_line_name_embed_3,address_metro_stations_3_line_name_embed_4,department_name_embed_0,department_name_embed_1,department_name_embed_2,department_name_embed_3,department_name_embed_4,category_embed_0,category_embed_1,category_embed_2,category_embed_3,category_embed_4
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.205201,-1.375760,-2.024955,2.048965,-0.709423,0.374483,-0.416950,-1.000886,0.399107,0.078709,1.456029,-1.921303,1.127689,-0.017080,0.770425,-0.016991,0.039101,-0.289342,0.754083,0.581808,0.810224,0.717039,0.311768,-0.422995,-0.413073,0.331369,0.166283,-0.121262,-0.815128,0.145091,-1.161078,1.031893,1.355590,0.399013,-0.396810,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,0.002329,-1.117917,0.386405,0.278955,-0.008147
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.205201,-1.375760,-2.024955,2.048965,-0.709423,0.374483,-0.416950,-1.000886,0.399107,0.078709,-0.125601,0.868352,-0.494118,-0.454782,0.634682,0.700545,-0.450044,-0.571051,0.491362,1.003180,1.101166,1.218887,-0.223936,0.053249,0.781730,0.291983,-0.264579,0.164269,1.197892,-0.031116,-0.123503,-0.740737,0.142345,0.747123,-0.297275,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,1.791193,-1.104536,0.068570,0.142405,0.714003
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.205201,-1.375760,-2.024955,2.048965,-0.709423,0.374483,-0.416950,-1.000886,0.399107,0.078709,1.004128,1.144209,0.621498,1.698946,1.542607,1.912553,0.262874,0.682985,-0.222405,0.096435,0.857687,2.283147,0.500787,1.910633,-1.026238,0.690896,-0.883298,-1.122004,0.043918,0.589827,-0.123503,-0.740737,0.142345,0.747123,-0.297275,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,-1.764932,1.048306,-0.173701,-0.270792,-0.818727
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,-0.076298,1.466375,-0.765011,-0.231589,0.158975,0.374483,-0.416950,-1.000886,0.399107,0.078709,0.677289,-0.482713,1.445673,-0.624957,1.506250,0.546866,0.160707,1.100105,-0.818940,2.223594,0.879781,-1.116648,-1.571925,0.943941,-0.535612,1.545366,0.094847,0.021632,-2.082036,0.756443,-0.123503,-0.740737,0.142345,0.747123,-0.297275,0.120368,-0.611202,-0.460401,-1.332100,0.592302,-0.234597,0.188549,1.281139,0.405284,-0.070858,1.429785,-0.12721,-0.095787,-1.280430,0.523068,0.002329,-1.117917,0.386405,0.278955,-0.008147
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,-0.314204,-0.015904,-0.415464,1.478273,-0.576059,0.374483,-0.416950,-1.000886,0.399107,0.078709,0.480114,0.392200,0.635667,0.610692,0.782685,-0.348038,-0.518628,-0.128981,0.418283,-0.617086,-1.453323,2.179576,0.317885,0.223374,0.640807,-0.511499,1.253941,0.580151,-1.178894,-1.327576,0.618574,2.244796,1.224353,-0.066122,-1.038721,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,-1.014353,-1.595344,1.735746,-0.232897,1.159243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.094439,0.556053,-0.147310,0.668464,-0.019185,-0.089407,-0.479558,0.717473,-0.418876,-1.045492,0.410558,0.081635,0.110028,-0.526280,1.652590,-0.168085,-2.549698,-1.356583,-0.726098,0.369500,0.587124,-0.813821,1.021916,1.691159,1.534646,0.865447,1.244169,1.467285,0.063808,-1.063296,1.368929,0.890770,-0.182672,-0.940307,-0.841482,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,0.917495,-0.12210,-0.628140,1.085993,0.416683,0.140733,0.234820,-0.372194,0.416987,0.175681
709520,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.094439,0.556053,-0.147310,0.668464,-0.019185,-0.089407,-0.479558,0.717473,-0.418876,-1.045492,1.618188,-2.324020,-0.723260,-0.047075,-0.129131,1.336465,1.228576,0.195029,1.610126,-0.613384,-2.319588,-0.750993,-0.825062,0.280801,0.143363,0.338411,-0.533607,-0.735669,-0.969695,0.436879,0.397344,-1.214816,-0.931455,-0.334845,1.383814,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,-0.385330,-0.936141,2.482763,-0.252721,1.277278
709521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.205201,-1.375760,-2.024955,2.048965,-0.709423,0.374483,-0.416950,-1.000886,0.399107,0.078709,0.410558,0.081635,0.110028,-0.526280,1.652590,-0.168085,-2.549698,-1.356583,-0.726098,0.369500,0.587124,-0.813821,1.021916,1.691159,1.534646,0.878010,0.289776,0.853992,0.804243,0.484368,1.368929,0.890770,-0.182672,-0.940307,-0.841482,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,0.140733,0.234820,-0.372194,0.416987,0.175681
709522,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,-0.008302,0.563011,0.594524,-0.224431,-1.871617,-0.136895,-2.544892,1.548013,0.406414,0.943872,-0.228299,-1.137940,1.040894,0.155791,-0.614703,1.491590,-1.318055,1.086710,-1.061597,-0.481346,-0.242944,-0.406439,0.594328,0.064523,-1.332344,-1.229636,0.227100,0.150392,0.617529,0.994449,-1.161078,1.031893,1.355590,0.399013,-0.396810,-0.992398,0.643361,0.437380,-0.381886,0.808164,0.572118,0.968845,-0.180532,-1.329549,1.193094,1.429785,-0.12721,-0.095787,-1.280430,0.523068,0.002329,-1.117917,0.386405,0.278955,-0.008147


In [8]:
X_train, X_test_val, y_train, y_test_val, = train_test_split(final_data, df['salary'], test_size=0.4, random_state=12345)
X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=12345)

print(f'Размеры выборок: Обучающая {X_train.shape}, Валидационная {X_test.shape}, Тестовая {X_val.shape}')

Размеры выборок: Обучающая (425714, 111), Валидационная (141905, 111), Тестовая (141905, 111)


### Случайный лес с эмбедингами

In [None]:
model_dtr = DecisionTreeRegressor(random_state=12345)

regressor = TransformedTargetRegressor(
    regressor=model_dtr,
    func=np.log,
    inverse_func=np.exp
)


param_grid = {
    'regressor__max_depth': [10, 11, 12, 13, 14, 15, 16],
    'regressor__min_samples_split': [2, 5],
    'regressor__min_samples_leaf': [1, 2]
}

grid_search = GridSearchCV(
    estimator=regressor,
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=2
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Лучшие параметры: {best_params}')


y_pred = best_model.predict(X_test)
culc_metrics(y_test, y_pred)

Fitting 3 folds for each of 28 candidates, totalling 84 fits
Лучшие параметры: {'regressor__max_depth': 15, 'regressor__min_samples_leaf': 2, 'regressor__min_samples_split': 2}
Корень из среднеквадратичной ошибки (RMSE): 48970.264067754295
R² Score: 0.5416319993914924
Средняя абсолютная ошибка (MAE): 25268.431367611895
Средняя абсолютная процентная ошибка (SMAPE): 28.27%
Медианная абсолютная ошибка (MedAE): 14970.763403016885


Случайному лесу создание эмбедингов не принесло никакой информации и никак не улучшило обобщающую способность. Продолжем использщовать изначальный DF.

#Полносвязная нейронная сеть

Создадим свою нейронную сеть основаную на **Sequentia**
И протестируем на разных вариантах архитектур

### Полносвязная нейронная сеть с эмбедингами

In [None]:
def build_and_train_model(architecture, X_train, y_train, X_test, y_test, epochs=100, batch_size=32):
    """
    Строит и обучает модель с заданной архитектурой

    Параметры:
    architecture - список, определяющий архитектуру сети (количество нейронов в каждом слое)
    X_train, y_train - обучающие данные
    X_test, y_test - тестовые данные
    epochs - количество эпох обучения
    batch_size - размер батча

    Возвращает:
    model - обученная модель
    history - история обучения
    metrics - словарь с метриками на тестовых данных
    train_time - время обучения
    """

    input_shape = X_train.shape[1]

    model = Sequential()

    model.add(InputLayer(shape=(input_shape,)))
    model.add(Dense(architecture[0], activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    for neurons in architecture[1:]:
        model.add(Dense(neurons, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.2))

    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose=0
    )
    train_time = time.time() - start_time

    y_pred = model.predict(X_test).flatten()

    metrics = {
        'MAE': mean_absolute_error(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'R2': r2_score(y_test, y_pred)
    }

    return model, history, y_pred, train_time, metrics

In [None]:
architectures = {
    'small': [64, 32],
    'medium': [128, 64, 32],
    'large': [256, 128, 64, 32],
    'wide': [512, 256],
    'deep': [64, 64, 64, 64, 64]
}

results = {}

for name, arch in architectures.items():
    print(f"\nTraining {name} architecture: {arch}")
    model, history, y_pred, train_time, metrics = build_and_train_model(
        arch, X_train, y_train, X_test, y_test
    )

    results[name] = {
        'architecture': arch,
        'train_time': train_time,
        'metrics': metrics,
        'epochs_trained': len(history.history['loss'])
    }

    print(f"Training time: {train_time:.2f}s")
    culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
[1m4435/4435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
Training time: 1012.70s
Корень из среднеквадратичной ошибки (RMSE): 50941.297970951884
R² Score: 0.5039911649265851
Средняя абсолютная ошибка (MAE): 29103.112701003913
Средняя абсолютная процентная ошибка (SMAPE): 32.33%
Медианная абсолютная ошибка (MedAE): 20343.0625

Training medium architecture: [128, 64, 32]
[1m4435/4435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
Training time: 1239.07s
Корень из среднеквадратичной ошибки (RMSE): 63997.72743782658
R² Score: 0.2171500930664486
Средняя абсолютная ошибка (MAE): 29133.000331506486
Средняя абсолютная процентная ошибка (SMAPE): 31.22%
Медианная абсолютная ошибка (MedAE): 18446.55078125

Training large architecture: [256, 128, 64, 32]
[1m4435/4435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step
Training time: 903.88s
Корень из среднеквадратичной ошибки (RMSE): 99553.35289036723
R² Score: 

## Попробуем улучшить нейронную сеть

In [None]:
def build_and_train_model(architecture, X_train, y_train, X_test, y_test,
                          epochs=50,
                          batch_size=32,
                          norm='batch',
                          optimizer='adam',
                          learning_rate=0.001,
                          dropout=0.2):

    input_shape = X_train.shape[1]
    model = Sequential()

    model.add(InputLayer(shape=(input_shape,)))
    model.add(Dense(architecture[0], activation='relu'))

    if norm == 'batch':
        model.add(BatchNormalization())
    elif norm == 'layer':
        model.add(LayerNormalization())

    model.add(Dropout(dropout))

    for neurons in architecture[1:]:
        model.add(Dense(neurons, activation='relu'))
        if norm == 'batch':
            model.add(BatchNormalization())
        elif norm == 'layer':
            model.add(LayerNormalization())
        model.add(Dropout(dropout))

    model.add(Dense(1))

    if optimizer == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        optimizer = SGD(learning_rate=learning_rate, momentum=0.9, clipnorm=1)

    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose=1
    )
    train_time = time.time() - start_time

    y_pred = model.predict(X_test).flatten()

    metrics = {
        'MAE': mean_absolute_error(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'R2': r2_score(y_test, y_pred)
    }

    return model, history, y_pred, train_time, metrics

1. Попробуем нормализацию по батчам и по слоям

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='batch',
                          optimizer='adam', learning_rate=0.001)

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 3ms/step - loss: 14569798656.0000 - mae: 88045.1875 - val_loss: 11746809856.0000 - val_mae: 83408.5000
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 3ms/step - loss: 20008044544.0000 - mae: 78276.3984 - val_loss: 8272483328.0000 - val_mae: 64880.9570
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 3ms/step - loss: 22606891008.0000 - mae: 59471.1953 - val_loss: 4919746560.0000 - val_mae: 42189.9961
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 11112892416.0000 - mae: 38715.8320 - val_loss: 3031486208.0000 - val_mae: 28739.7422
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 3ms/step - loss: 11658013696.0000 - mae: 30686.9883 - val_loss: 2912188416.0000 - val_mae: 27899.6777
Epoch 6/50
[1m13304/13304[0m [32m━━━━

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='layer',
                          optimizer='adam', learning_rate=0.001)

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 3ms/step - loss: 14577665024.0000 - mae: 87028.7031 - val_loss: 10671672320.0000 - val_mae: 73856.6094
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 3ms/step - loss: 23649177600.0000 - mae: 67305.3828 - val_loss: 6778035712.0000 - val_mae: 45056.0742
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 12787627008.0000 - mae: 42728.3906 - val_loss: 5265594880.0000 - val_mae: 40915.8008
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3ms/step - loss: 8096053760.0000 - mae: 39109.3477 - val_loss: 4332970496.0000 - val_mae: 32079.2090
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 3ms/step - loss: 9174734848.0000 - mae: 32802.5391 - val_loss: 3982244864.0000 - val_mae: 29937.6270
Epoch 6/50
[1m13304/13304[0m [32m━━━━━━

лучше рузультат у batchnorm

2. Вместо adam оптимизатора попробуем sgd

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='batch',
                          optimizer='sgd', learning_rate=0.001)
print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 4ms/step - loss: 18110072832.0000 - mae: 87330.8906 - val_loss: 9499155456.0000 - val_mae: 71680.7422
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 3ms/step - loss: 8890734592.0000 - mae: 61910.0352 - val_loss: 4389327360.0000 - val_mae: 31759.1758
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 3ms/step - loss: 12017816576.0000 - mae: 32611.0312 - val_loss: 3899975424.0000 - val_mae: 29252.4141
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 3ms/step - loss: 9159672832.0000 - mae: 31949.0430 - val_loss: 3853399808.0000 - val_mae: 28825.4316
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 3ms/step - loss: 12084412416.0000 - mae: 31608.4805 - val_loss: 3848325632.0000 - val_mae: 28606.1992
Epoch 6/50
[1m13304/13304[0m [32m━━━━━━━

рузультат хуже

In [None]:
def build_and_train_model(architecture, X_train, y_train, X_test, y_test,
                          epochs=50,
                          batch_size=32,
                          norm='batch',
                          optimizer='adam',
                          learning_rate=0.001,
                          dropout=0.2,
                          initializer='he_normal'):

    input_shape = X_train.shape[1]
    model = Sequential()

    model.add(InputLayer(shape=(input_shape,)))
    model.add(Dense(architecture[0], activation='relu',
                    kernel_initializer=initializer))

    if norm == 'batch':
        model.add(BatchNormalization())
    elif norm == 'layer':
        model.add(LayerNormalization())

    model.add(Dropout(dropout))

    for neurons in architecture[1:]:
        model.add(Dense(neurons, activation='relu', kernel_initializer=initializer))
        if norm == 'batch':
            model.add(BatchNormalization())
        elif norm == 'layer':
            model.add(LayerNormalization())
        model.add(Dropout(dropout))

    model.add(Dense(1))

    if optimizer == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        optimizer = SGD(learning_rate=learning_rate, momentum=0.9, clipnorm=1)

    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose=1
    )
    train_time = time.time() - start_time

    y_pred = model.predict(X_test).flatten()

    metrics = {
        'MAE': mean_absolute_error(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'R2': r2_score(y_test, y_pred)
    }

    return model, history, y_pred, train_time, metrics

3. Попробуем добавить инициализацию весов

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='batch',
                          optimizer='adam', learning_rate=0.001,
                          initializer='he_normal')

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 3ms/step - loss: 17217370112.0000 - mae: 88269.1797 - val_loss: 11391233024.0000 - val_mae: 81347.8750
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 3ms/step - loss: 15828764672.0000 - mae: 77875.5156 - val_loss: 7955078656.0000 - val_mae: 64135.5664
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - loss: 10046084096.0000 - mae: 59220.5156 - val_loss: 5109677056.0000 - val_mae: 42636.3945
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 3ms/step - loss: 13028814848.0000 - mae: 38886.4297 - val_loss: 3536107264.0000 - val_mae: 28978.2988
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 3ms/step - loss: 12185419776.0000 - mae: 30524.8418 - val_loss: 3464095488.0000 - val_mae: 28364.0312
Epoch 6/50
[1m13304/13304[0m [32m━━━━

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='layer',
                          optimizer='adam', learning_rate=0.001,
                          initializer='he_normal')

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 5ms/step - loss: 15399426048.0000 - mae: 86962.5000 - val_loss: 10698916864.0000 - val_mae: 74038.7344
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 4ms/step - loss: 28157966336.0000 - mae: 67430.0312 - val_loss: 6773771776.0000 - val_mae: 45026.3945
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - loss: 9473038336.0000 - mae: 42577.0352 - val_loss: 5262173184.0000 - val_mae: 40980.4102
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 4ms/step - loss: 8028119040.0000 - mae: 39915.4453 - val_loss: 4342133248.0000 - val_mae: 32681.6816
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 4ms/step - loss: 10630411264.0000 - mae: 32699.3203 - val_loss: 3955458048.0000 - val_mae: 29368.4082
Epoch 6/50
[1m13304/13304[0m [32m━━━━━━

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='batch',
                          optimizer='adam', learning_rate=0.001,
                          initializer='glorot_uniform')

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 3ms/step - loss: 16198064128.0000 - mae: 88131.8984 - val_loss: 11275617280.0000 - val_mae: 80690.5469
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 11789763584.0000 - mae: 77674.0000 - val_loss: 7910936064.0000 - val_mae: 62578.3477
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 11138646016.0000 - mae: 58718.6602 - val_loss: 5198035968.0000 - val_mae: 42467.7773
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 3ms/step - loss: 12154558464.0000 - mae: 38897.1055 - val_loss: 3833204480.0000 - val_mae: 30121.7305
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 3ms/step - loss: 11615273984.0000 - mae: 30703.7129 - val_loss: 3500268288.0000 - val_mae: 28966.8809
Epoch 6/50
[1m13304/13304[0m [32m━━━━

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='layer',
                          optimizer='adam', learning_rate=0.001,
                          initializer='glorot_uniform')

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 3ms/step - loss: 16716733440.0000 - mae: 87155.6953 - val_loss: 10677915648.0000 - val_mae: 73898.3984
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 3ms/step - loss: 16449483776.0000 - mae: 66940.8281 - val_loss: 6772810240.0000 - val_mae: 45019.7266
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 11030131712.0000 - mae: 42757.9141 - val_loss: 5258724352.0000 - val_mae: 41041.5000
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 3ms/step - loss: 25328199680.0000 - mae: 38429.4453 - val_loss: 4257460992.0000 - val_mae: 31322.4180
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 3ms/step - loss: 9073805312.0000 - mae: 32374.2070 - val_loss: 3920040704.0000 - val_mae: 29525.3828
Epoch 6/50
[1m13304/13304[0m [32m━━━━━

In [None]:
print(f"\nTraining small architecture: [64, 32]")
model, history, y_pred, train_time, metrics = build_and_train_model([64, 32], X_train, y_train, X_test, y_test,
                          batch_size=32, norm='layer',
                          optimizer='sgd', learning_rate=0.001,
                          initializer='he_normal')

print(f"Training time: {train_time:.2f}s")
culc_metrics(y_test, y_pred)


Training small architecture: [64, 32]
Epoch 1/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 6ms/step - loss: 43797434368.0000 - mae: 79837.6172 - val_loss: 5153621504.0000 - val_mae: 35720.8477
Epoch 2/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 3ms/step - loss: 11087872000.0000 - mae: 44770.1523 - val_loss: 4228056576.0000 - val_mae: 30778.9922
Epoch 3/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3ms/step - loss: 14701904896.0000 - mae: 38874.1016 - val_loss: 4137486848.0000 - val_mae: 30012.8730
Epoch 4/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3ms/step - loss: 29279082496.0000 - mae: 37047.8789 - val_loss: 3996696576.0000 - val_mae: 29638.1836
Epoch 5/50
[1m13304/13304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3ms/step - loss: 9247617024.0000 - mae: 35282.5547 - val_loss: 3985769472.0000 - val_mae: 29373.1445
Epoch 6/50
[1m13304/13304[0m [32m━━━━━━

### TabNet с эмбедингами

In [None]:
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()

y_train = y_train.to_numpy().reshape(-1, 1)
y_val = y_val.to_numpy().reshape(-1, 1)
y_test = y_test.to_numpy().reshape(-1, 1)

class SMAPE(Metric):
    def __init__(self):
        self._name = "smape"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        return 100 * np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred)))

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tabnet_params = {
    "n_d": 8,
    "n_a": 8,
    "n_steps": 3,
    "gamma": 1.3,
    "lambda_sparse": 1e-3,
    "optimizer_fn": torch.optim.Adam,
    "optimizer_params": dict(lr=2e-2),
    "mask_type": "sparsemax",
    "scheduler_params": dict(
        mode="min",
        patience=5,
        min_lr=1e-5,
        factor=0.9,
    ),
    "scheduler_fn": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "seed": 42,
    "verbose": 10
}

model = TabNetRegressor(**tabnet_params, device_name=device)

model.fit(
    X_train=X_train,
    y_train=y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    eval_name=['train', 'val'],
    eval_metric=['rmse', 'mae', SMAPE],
    max_epochs=50,
    patience=20,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False,
    loss_fn=torch.nn.functional.mse_loss,
)

y_pred = model.predict(X_test)

culc_metrics(y_test, y_pred)



epoch 0  | loss: 20458232520.29283| train_rmse: 141366.73394| train_mae: 85954.93103| train_smape: 184.45024| val_rmse: 115550.20318| val_mae: 85792.98921| val_smape: 184.45371|  0:00:27s
epoch 10 | loss: 10776253501.539| train_rmse: 104593.90374| train_mae: 28305.01043| train_smape: 31.42144| val_rmse: 65855.12264| val_mae: 28382.34076| val_smape: 31.6171 |  0:04:42s
epoch 20 | loss: 10656618285.35748| train_rmse: 99652.1698| train_mae: 26755.24988| train_smape: 29.83397| val_rmse: 58971.46058| val_mae: 26900.41583| val_smape: 29.99048|  0:08:50s
epoch 30 | loss: 10250648907.63026| train_rmse: 100125.58855| train_mae: 31679.34288| train_smape: 34.87886| val_rmse: 56611.51132| val_mae: 31739.57858| val_smape: 35.01109|  0:13:07s
epoch 40 | loss: 10356946640.01888| train_rmse: 99670.80071| train_mae: 27671.52445| train_smape: 30.61571| val_rmse: 57740.39816| val_mae: 27902.08827| val_smape: 30.82985|  0:17:24s
Stop training because you reached max_epochs = 50 with best_epoch = 44 and be



Корень из среднеквадратичной ошибки (RMSE): 48567.14367565235
R² Score: 0.5491474560800674
Средняя абсолютная ошибка (MAE): 26383.14714622922
Средняя абсолютная процентная ошибка (SMAPE): 29.67%
Медианная абсолютная ошибка (MedAE): 17413.78125


: 

#### Вывод
С эмбедингами лучше
На cpu быстрее чем на mps

### Оптимизация TabNet

In [None]:
param_grid = {
    'n_d': [8, 16, 32],
    'n_a': [8, 16, 32],
}

from sklearn.base import BaseEstimator, RegressorMixin

class TabNetWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, n_d=8, n_a=8, n_steps=3, gamma=1.3, lambda_sparse=1e-3):
        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.lambda_sparse = lambda_sparse
        self.model = None

    def fit(self, X, y):
        self.model = TabNetRegressor(
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            lambda_sparse=self.lambda_sparse
        )
        self.model.fit(X, y.reshape(-1, 1))
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

model = TabNetWrapper()

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Лучшие параметры: {best_params}')


y_pred = best_model.predict(X_test)
culc_metrics(y_test, y_pred)

Fitting 3 folds for each of 9 candidates, totalling 27 fits




epoch 0  | loss: 22587540284.0722|  0:00:11s
epoch 0  | loss: 14788413876.21661|  0:00:11s
epoch 0  | loss: 24342798154.85921|  0:00:11s
epoch 0  | loss: 14777960170.74368|  0:00:12s
epoch 0  | loss: 24233168999.50903|  0:00:12s
epoch 0  | loss: 22585867312.05776|  0:00:12s
epoch 0  | loss: 24335206610.7148|  0:00:12s
epoch 0  | loss: 14653618634.39711|  0:00:12s
epoch 0  | loss: 22515549468.64982|  0:00:12s
epoch 0  | loss: 24042292837.66066|  0:00:14s
epoch 0  | loss: 22269967555.9278|  0:00:14s
epoch 0  | loss: 14492965089.5018|  0:00:14s
epoch 1  | loss: 21886076292.15885|  0:00:22s
epoch 1  | loss: 14046154230.75812|  0:00:23s
epoch 1  | loss: 23565393128.8953|  0:00:23s
epoch 1  | loss: 13529392978.25271|  0:00:24s
epoch 1  | loss: 21928937020.99639|  0:00:24s
epoch 1  | loss: 23646492727.45126|  0:00:24s
epoch 1  | loss: 23107719014.58486|  0:00:24s
epoch 1  | loss: 14098606494.0361|  0:00:24s
epoch 1  | loss: 21392681104.17329|  0:00:24s
epoch 1  | loss: 11411986149.19856|  0:0



epoch 98 | loss: 3731624963.4657|  0:19:18s
epoch 97 | loss: 13342032494.44043|  0:19:21s
epoch 91 | loss: 11258647704.02888|  0:19:22s
epoch 91 | loss: 13060198895.36462|  0:19:24s
epoch 76 | loss: 4747083958.98917|  0:19:25s
epoch 77 | loss: 12240111558.46932|  0:19:25s
epoch 93 | loss: 3637731686.12274|  0:19:25s
epoch 91 | loss: 12724124681.24188|  0:19:25s
epoch 78 | loss: 11248172145.90614|  0:19:26s
epoch 90 | loss: 11446747203.00361|  0:19:27s
epoch 92 | loss: 3522880634.45487|  0:19:28s
epoch 99 | loss: 3812422182.81588|  0:19:29s
epoch 0  | loss: 22469755774.61372|  0:00:15s
epoch 98 | loss: 13328110463.07581|  0:19:33s
epoch 92 | loss: 11257455728.05776|  0:19:35s
epoch 92 | loss: 13003553261.05415|  0:19:37s
epoch 94 | loss: 3531443824.28881|  0:19:38s
epoch 92 | loss: 12576258092.82311|  0:19:38s
epoch 77 | loss: 4766113147.37906|  0:19:40s
epoch 78 | loss: 12263723579.84116|  0:19:40s
epoch 79 | loss: 11238618932.21661|  0:19:41s
epoch 91 | loss: 11483212338.36824|  0:19:



epoch 81 | loss: 11233078687.65343|  0:20:14s
epoch 80 | loss: 12171098556.99639|  0:20:15s
epoch 79 | loss: 4785495146.28159|  0:20:15s
epoch 95 | loss: 11295952698.68592|  0:20:19s
epoch 97 | loss: 3588067896.83754|  0:20:21s
epoch 95 | loss: 12950035744.80867|  0:20:21s
epoch 95 | loss: 12875829846.1805|  0:20:22s
epoch 94 | loss: 11419768595.40794|  0:20:25s
epoch 96 | loss: 3569794218.97473|  0:20:25s
epoch 3  | loss: 17897269756.30325|  0:01:10s
[CV] END .......................................n_a=8, n_d=8; total time=20.5min




epoch 0  | loss: 14653702887.04694|  0:00:16s
epoch 82 | loss: 11179808645.08303|  0:20:29s
epoch 81 | loss: 12136536281.6462|  0:20:30s
epoch 80 | loss: 4799935494.93141|  0:20:31s
epoch 96 | loss: 11329043449.06859|  0:20:32s
epoch 98 | loss: 3565662079.07581|  0:20:33s
epoch 96 | loss: 13042945705.12635|  0:20:34s
epoch 96 | loss: 12957958732.2455|  0:20:34s
epoch 97 | loss: 3537623509.02527|  0:20:38s
epoch 95 | loss: 11490368374.29603|  0:20:38s
epoch 4  | loss: 16472620286.15164|  0:01:25s
epoch 0  | loss: 24241415851.89892|  0:00:15s
epoch 83 | loss: 11130201125.4296|  0:20:44s
epoch 1  | loss: 13361289325.05415|  0:00:32s
epoch 97 | loss: 11312409896.20216|  0:20:44s
epoch 82 | loss: 12394681301.94946|  0:20:45s
epoch 99 | loss: 3561374475.7834|  0:20:46s
epoch 81 | loss: 4764607405.2852|  0:20:46s
epoch 97 | loss: 12959958812.64982|  0:20:47s
epoch 97 | loss: 12525717013.48736|  0:20:47s
epoch 98 | loss: 3608420052.10109|  0:20:51s
epoch 96 | loss: 11433777779.52346|  0:20:51s



epoch 85 | loss: 12315485962.62816|  0:21:36s
epoch 99 | loss: 11427826627.4657|  0:21:36s
epoch 84 | loss: 4803666196.33213|  0:21:38s
epoch 7  | loss: 14095527612.5343|  0:02:24s
epoch 4  | loss: 8907319670.29603|  0:01:31s
epoch 3  | loss: 19601339804.18772|  0:01:15s
epoch 87 | loss: 11185097660.99639|  0:21:52s
epoch 86 | loss: 12264207606.29603|  0:21:55s
epoch 85 | loss: 4783313536.46209|  0:21:56s
epoch 0  | loss: 22332517693.92058|  0:00:27s
epoch 8  | loss: 13635001445.19857|  0:02:47s
epoch 5  | loss: 7874757889.84838|  0:01:53s
epoch 4  | loss: 18342428195.11914|  0:01:37s
epoch 88 | loss: 11209187528.77978|  0:22:10s
epoch 87 | loss: 12226432649.93502|  0:22:14s
epoch 86 | loss: 4806413165.51625|  0:22:14s
[CV] END ......................................n_a=16, n_d=8; total time=22.3min




[CV] END ......................................n_a=8, n_d=16; total time=22.5min




epoch 9  | loss: 13317317153.73286|  0:03:11s
epoch 6  | loss: 7064550939.72563|  0:02:16s
epoch 5  | loss: 17340270213.08304|  0:02:00s
epoch 89 | loss: 11057891827.98556|  0:22:29s
[CV] END ......................................n_a=8, n_d=16; total time=22.5min
[CV] END ......................................n_a=16, n_d=8; total time=22.5min




epoch 1  | loss: 20262603217.79061|  0:00:54s
epoch 87 | loss: 4795434691.4657|  0:22:33s
epoch 88 | loss: 12055501442.31046|  0:22:34s
epoch 0  | loss: 14541767328.80866|  0:00:23s
[CV] END ......................................n_a=16, n_d=8; total time=22.7min




epoch 10 | loss: 13023129606.00722|  0:03:28s
epoch 90 | loss: 11799271322.5704|  0:22:45s
epoch 6  | loss: 16468685608.66426|  0:02:17s
epoch 7  | loss: 6444478786.54152|  0:02:34s
epoch 0  | loss: 24089733330.7148|  0:00:18s
epoch 0  | loss: 14777786626.77256|  0:00:17s
epoch 0  | loss: 22582694412.93863|  0:00:17s
epoch 88 | loss: 4795510721.15523|  0:22:49s
epoch 89 | loss: 12343657066.51263|  0:22:49s
epoch 2  | loss: 17894057606.93141|  0:01:13s
epoch 1  | loss: 12533369904.05776|  0:00:40s
epoch 0  | loss: 24258735274.05054|  0:00:17s
epoch 91 | loss: 11214404453.8917|  0:23:01s
epoch 11 | loss: 12842628348.30325|  0:03:44s
epoch 7  | loss: 15833553621.48735|  0:02:32s
epoch 8  | loss: 5963389580.93863|  0:02:50s
epoch 1  | loss: 21935691169.73286|  0:00:36s
epoch 90 | loss: 12100984517.77617|  0:23:04s
epoch 1  | loss: 14097036217.76174|  0:00:33s
epoch 89 | loss: 4808711878.70036|  0:23:04s
epoch 1  | loss: 21898590991.71119|  0:00:34s
epoch 3  | loss: 15941998311.97112|  0:01



epoch 99 | loss: 4791219352.02888|  0:25:46s
epoch 10 | loss: 14356831697.79061|  0:03:19s
epoch 18 | loss: 4892343074.65704|  0:05:35s
epoch 11 | loss: 5225360332.70758|  0:03:26s
epoch 11 | loss: 4877133448.31769|  0:03:42s
epoch 10 | loss: 15303058110.84477|  0:03:19s
epoch 22 | loss: 12418492910.90253|  0:06:46s
epoch 12 | loss: 12692005014.6426|  0:04:28s
epoch 18 | loss: 14091799865.76173|  0:05:37s
epoch 11 | loss: 13475838185.35738|  0:03:36s
[CV] END ......................................n_a=8, n_d=32; total time=26.2min




epoch 19 | loss: 4817460973.51624|  0:05:57s
epoch 11 | loss: 14281510695.27796|  0:03:42s
epoch 0  | loss: 22499835674.80144|  0:00:27s
epoch 12 | loss: 4994841813.94946|  0:03:48s
epoch 12 | loss: 4765708656.2888|  0:04:04s
epoch 23 | loss: 12405738908.18773|  0:07:05s
epoch 11 | loss: 14862215805.22744|  0:03:41s
epoch 19 | loss: 14088621021.80506|  0:05:57s
epoch 13 | loss: 12581356479.30686|  0:04:51s
epoch 12 | loss: 13170535234.07942|  0:03:58s
epoch 20 | loss: 4774974569.8195|  0:06:17s
epoch 12 | loss: 14342244423.62456|  0:04:04s
epoch 0  | loss: 14712623159.45126|  0:00:23s
[CV] END ......................................n_a=8, n_d=32; total time=26.6min




epoch 1  | loss: 21409401377.27076|  0:00:49s
epoch 13 | loss: 4814743996.0722|  0:04:06s
epoch 24 | loss: 12373786395.72563|  0:07:21s
epoch 13 | loss: 4734441132.8231|  0:04:23s
epoch 12 | loss: 14461103829.94947|  0:03:59s
epoch 20 | loss: 14046484574.72924|  0:06:13s
epoch 21 | loss: 4732178748.0722|  0:06:32s
epoch 13 | loss: 12900564112.63537|  0:04:14s
epoch 14 | loss: 12493874997.14079|  0:05:09s
epoch 13 | loss: 14298746141.574|  0:04:21s
epoch 1  | loss: 13679694293.48736|  0:00:42s
epoch 0  | loss: 24269590823.74008|  0:00:18s
epoch 14 | loss: 4668441721.53069|  0:04:21s
epoch 25 | loss: 12414967283.98556|  0:07:36s
epoch 2  | loss: 19853471997.22742|  0:01:08s
epoch 14 | loss: 4697951042.07942|  0:04:39s
epoch 13 | loss: 14283536813.2852|  0:04:15s
epoch 21 | loss: 13997714648.25993|  0:06:28s
epoch 22 | loss: 4703513782.52707|  0:06:48s
epoch 14 | loss: 12727800931.35017|  0:04:31s
epoch 15 | loss: 12432914110.38268|  0:05:26s
epoch 14 | loss: 14236848740.73646|  0:04:38s




epoch 84 | loss: 10794283325.22744|  0:26:16s
epoch 63 | loss: 15679101189.08302|  0:23:07s
epoch 96 | loss: 3492451049.58845|  0:28:42s
epoch 63 | loss: 3301955029.25632|  0:22:45s
epoch 60 | loss: 12750646887.97112|  0:22:24s
epoch 84 | loss: 2687765646.55596|  0:26:45s
epoch 97 | loss: 12263050531.11913|  0:28:33s
epoch 87 | loss: 3138862150.23827|  0:26:31s
epoch 84 | loss: 12145364159.76895|  0:26:20s
epoch 81 | loss: 11524084913.90613|  0:26:36s
epoch 80 | loss: 10409910527.5379|  0:27:28s
epoch 85 | loss: 10983959038.38267|  0:26:35s
epoch 0  | loss: 22322577870.09386|  0:00:24s
epoch 97 | loss: 3216364904.89531|  0:28:59s
epoch 64 | loss: 11464402055.3935|  0:23:29s
epoch 64 | loss: 3179208626.83032|  0:23:07s
epoch 61 | loss: 12702863997.22744|  0:22:46s
epoch 98 | loss: 12729665842.36822|  0:28:51s
epoch 88 | loss: 3105038030.787|  0:26:49s
epoch 85 | loss: 2816807548.5343|  0:27:04s
epoch 85 | loss: 12106024130.31047|  0:26:40s
epoch 82 | loss: 11479298683.14802|  0:26:56s
e



epoch 88 | loss: 2802279412.44765|  0:28:13s
epoch 3  | loss: 16155873995.3213|  0:01:44s
epoch 88 | loss: 12219524193.03971|  0:27:48s
epoch 67 | loss: 3185633170.7148|  0:24:25s
epoch 85 | loss: 11793339928.02888|  0:28:07s
epoch 67 | loss: 11231107764.6787|  0:24:49s
epoch 89 | loss: 10744828163.46571|  0:28:05s
epoch 64 | loss: 12503839951.01806|  0:24:03s
[CV] END .....................................n_a=16, n_d=16; total time=30.4min




epoch 84 | loss: 10548677753.76173|  0:29:02s




epoch 92 | loss: 2767922178.07942|  0:28:15s
epoch 89 | loss: 2691022442.74368|  0:28:32s
epoch 89 | loss: 12309395971.69676|  0:28:08s
epoch 0  | loss: 14519083229.80505|  0:00:24s
epoch 4  | loss: 14894406348.24549|  0:02:06s
epoch 86 | loss: 11387691676.18772|  0:28:27s
epoch 90 | loss: 10731106182.9314|  0:28:24s
epoch 68 | loss: 3603337661.92057|  0:24:47s
epoch 65 | loss: 12322482860.36101|  0:24:24s
epoch 68 | loss: 10837730938.91697|  0:25:11s
epoch 85 | loss: 10793810483.52347|  0:29:21s
epoch 0  | loss: 24055495957.25632|  0:00:19s
epoch 93 | loss: 2842637679.59567|  0:28:34s
epoch 90 | loss: 3282305873.32852|  0:28:51s
epoch 90 | loss: 11889896440.37545|  0:28:26s
epoch 5  | loss: 13976317770.3971|  0:02:28s
epoch 87 | loss: 10594023253.71841|  0:28:46s
epoch 1  | loss: 12566370913.9639|  0:00:47s
epoch 91 | loss: 10696142696.20217|  0:28:43s
epoch 69 | loss: 3530326288.63538|  0:25:07s
epoch 86 | loss: 10583217366.87365|  0:29:40s
epoch 66 | loss: 12448811678.0361|  0:24:45



epoch 0  | loss: 19820568640.1542|  0:00:20s
epoch 1  | loss: 16241483879.63374|  0:00:41s
epoch 2  | loss: 13343661688.90602|  0:01:02s
epoch 3  | loss: 11734186776.98314|  0:01:23s
epoch 4  | loss: 11111544325.24339|  0:01:44s
epoch 5  | loss: 10945840894.76625|  0:02:05s
epoch 6  | loss: 10825096565.51324|  0:02:26s
epoch 7  | loss: 10810686125.95663|  0:02:48s
epoch 8  | loss: 10695388129.15662|  0:03:09s
epoch 9  | loss: 10674908346.60241|  0:03:30s
epoch 10 | loss: 10679847414.13012|  0:03:51s
epoch 11 | loss: 10550010714.67952|  0:04:12s
epoch 12 | loss: 10477058194.81446|  0:04:33s
epoch 13 | loss: 10418448725.43614|  0:04:54s
epoch 14 | loss: 10368084292.16384|  0:05:15s
epoch 15 | loss: 10309924791.51806|  0:05:36s
epoch 16 | loss: 10343394457.59999|  0:05:57s
epoch 17 | loss: 10290882369.38794|  0:06:18s
epoch 18 | loss: 10270283683.46988|  0:06:39s
epoch 19 | loss: 10386261066.33253|  0:06:59s
epoch 20 | loss: 10239995017.56144|  0:07:20s
epoch 21 | loss: 10240650845.76385|

**Лучшие параметры: {'n_a': 16, 'n_d': 32}**

In [9]:
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()

y_train = y_train.to_numpy().reshape(-1, 1)
y_val = y_val.to_numpy().reshape(-1, 1)
y_test = y_test.to_numpy().reshape(-1, 1)

class SMAPE(Metric):
    def __init__(self):
        self._name = "smape"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        return 100 * np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred)))

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tabnet_params = {
    "n_d": 32,
    "n_a": 16,
    "n_steps": 3,
    "gamma": 1.3,
    "lambda_sparse": 1e-3,
    "optimizer_fn": torch.optim.Adam,
    "optimizer_params": dict(lr=2e-2),
    "mask_type": "sparsemax",
    "scheduler_params": dict(
        mode="min",
        patience=5,
        min_lr=1e-5,
        factor=0.9,
    ),
    "scheduler_fn": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "seed": 42,
    "verbose": 10
}

model = TabNetRegressor(**tabnet_params, device_name=device)

model.fit(
    X_train=X_train,
    y_train=y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    eval_name=['train', 'val'],
    eval_metric=['rmse', 'mae', SMAPE],
    max_epochs=50,
    patience=20,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False,
    loss_fn=torch.nn.functional.mse_loss,
)

y_pred = model.predict(X_test)

culc_metrics(y_test, y_pred)



epoch 0  | loss: 19820567183.3192| train_rmse: 135170.19315| train_mae: 77867.30842| train_smape: 154.20288| val_rmse: 107906.39375| val_mae: 77731.56598| val_smape: 154.34046|  0:00:35s
epoch 10 | loss: 10496793579.30539| train_rmse: 104344.39755| train_mae: 33103.92994| train_smape: 36.18144| val_rmse: 66377.65427| val_mae: 33287.79646| val_smape: 36.37324|  0:06:20s
epoch 20 | loss: 10003760335.19862| train_rmse: 97519.3647| train_mae: 26248.48871| train_smape: 29.30686| val_rmse: 56525.99217| val_mae: 26885.14425| val_smape: 29.73279|  0:12:07s
epoch 30 | loss: 9397602219.5805| train_rmse: 95882.75162| train_mae: 25912.13837| train_smape: 29.05449| val_rmse: 56032.58119| val_mae: 26569.36553| val_smape: 29.50505|  0:17:57s
epoch 40 | loss: 9488302470.75909| train_rmse: 99105.53911| train_mae: 24811.23621| train_smape: 27.64644| val_rmse: 63068.17146| val_mae: 25691.85036| val_smape: 28.25153|  0:24:00s
Stop training because you reached max_epochs = 50 with best_epoch = 40 and best_



Корень из среднеквадратичной ошибки (RMSE): 52115.99526369916
R² Score: 0.4808516580807556
Средняя абсолютная ошибка (MAE): 25400.790932474767
Средняя абсолютная процентная ошибка (SMAPE): 28.22%
Медианная абсолютная ошибка (MedAE): 15666.015625


**n_steps**

In [10]:
param_grid = {
    'n_steps': [3, 5, 7],
}

from sklearn.base import BaseEstimator, RegressorMixin

class TabNetWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, n_d=32, n_a=16, n_steps=3, gamma=1.3, lambda_sparse=1e-3):
        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.lambda_sparse = lambda_sparse
        self.model = None

    def fit(self, X, y):
        self.model = TabNetRegressor(
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            lambda_sparse=self.lambda_sparse
        )
        self.model.fit(X, y.reshape(-1, 1))
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

model = TabNetWrapper()

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Лучшие параметры: {best_params}')

Fitting 3 folds for each of 3 candidates, totalling 9 fits




epoch 0  | loss: 22307453353.12635|  0:00:12s
epoch 0  | loss: 14545542853.77618|  0:00:12s
epoch 0  | loss: 24063471752.77978|  0:00:12s
epoch 0  | loss: 14211333236.44765|  0:00:19s
epoch 0  | loss: 23716392074.62816|  0:00:19s
epoch 0  | loss: 22035906833.55956|  0:00:19s
epoch 1  | loss: 20336620939.55234|  0:00:25s
epoch 1  | loss: 12534460543.53791|  0:00:25s
epoch 1  | loss: 22058236759.79783|  0:00:25s
epoch 0  | loss: 13843569699.11914|  0:00:28s
epoch 0  | loss: 23365600200.54874|  0:00:28s
epoch 0  | loss: 21634232000.23104|  0:00:28s
epoch 2  | loss: 18283962499.23466|  0:00:38s
epoch 2  | loss: 19692934515.52346|  0:00:38s
epoch 2  | loss: 10076036471.22022|  0:00:38s
epoch 1  | loss: 20159978052.38989|  0:00:39s
epoch 1  | loss: 18783244204.82311|  0:00:40s
epoch 1  | loss: 11116918460.5343|  0:00:40s
epoch 3  | loss: 17789606642.13718|  0:00:51s
epoch 3  | loss: 16454063958.87365|  0:00:51s
epoch 3  | loss: 8059575203.58123|  0:00:51s
epoch 1  | loss: 9643488258.77256|  



epoch 0  | loss: 19798510200.90601|  0:00:20s
epoch 1  | loss: 16434878344.32771|  0:00:40s
epoch 2  | loss: 13642607870.14939|  0:01:00s
epoch 3  | loss: 11952457842.42892|  0:01:20s
epoch 4  | loss: 11151475562.40964|  0:01:41s
epoch 5  | loss: 10784836736.30843|  0:02:01s
epoch 6  | loss: 10717388125.76386|  0:02:21s
epoch 7  | loss: 10654315685.32049|  0:02:41s
epoch 8  | loss: 10573024017.27229|  0:03:01s
epoch 9  | loss: 10489241668.47227|  0:18:47s
epoch 10 | loss: 10482920371.81687|  0:19:06s
epoch 11 | loss: 10426711410.12049|  0:19:26s
epoch 12 | loss: 10326472952.28914|  0:19:46s
epoch 13 | loss: 10295719259.91325|  0:20:06s
epoch 14 | loss: 10236787039.61446|  0:20:27s
epoch 15 | loss: 10175074291.97108|  0:20:46s
epoch 16 | loss: 10176022440.09637|  0:21:06s
epoch 17 | loss: 10184252299.41205|  0:36:32s
epoch 18 | loss: 10052009060.5494|  0:36:52s
epoch 19 | loss: 10160187771.99037|  0:37:12s
epoch 20 | loss: 10134898314.48675|  0:37:32s
epoch 21 | loss: 10027762167.82651|

**gamma**

In [11]:
param_grid = {
    'gamma': [1.0, 1.3, 1.5],
}

from sklearn.base import BaseEstimator, RegressorMixin

class TabNetWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, n_d=32, n_a=16, n_steps=3, gamma=1.3, lambda_sparse=1e-3):
        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.lambda_sparse = lambda_sparse
        self.model = None

    def fit(self, X, y):
        self.model = TabNetRegressor(
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            lambda_sparse=self.lambda_sparse
        )
        self.model.fit(X, y.reshape(-1, 1))
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

model = TabNetWrapper()

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Лучшие параметры: {best_params}')

Fitting 3 folds for each of 3 candidates, totalling 9 fits




epoch 0  | loss: 22346778790.3538|  0:00:11s
epoch 0  | loss: 14545542853.77618|  0:00:11s
epoch 0  | loss: 22307453353.12635|  0:00:11s
epoch 0  | loss: 14527079076.50542|  0:00:11s
epoch 0  | loss: 24063471752.77978|  0:00:11s
epoch 0  | loss: 24076429478.35379|  0:00:11s
epoch 0  | loss: 22334245503.53791|  0:00:11s
epoch 0  | loss: 24048653016.25993|  0:00:12s
epoch 0  | loss: 14537697997.16968|  0:00:12s
epoch 1  | loss: 12534460543.53791|  0:00:23s
epoch 1  | loss: 20354248996.04333|  0:00:23s
epoch 1  | loss: 22058236759.79783|  0:00:24s
epoch 1  | loss: 20336620939.55234|  0:00:24s
epoch 1  | loss: 12469965500.5343|  0:00:24s
epoch 1  | loss: 21975119554.07942|  0:00:24s
epoch 1  | loss: 20373384800.11552|  0:00:24s
epoch 1  | loss: 21936093749.60288|  0:00:24s
epoch 1  | loss: 12550505634.65704|  0:00:24s
epoch 2  | loss: 10076036471.22022|  0:16:01s
epoch 2  | loss: 17874883112.66425|  0:16:01s
epoch 2  | loss: 19692934515.52346|  0:16:01s
epoch 2  | loss: 19569905519.82671| 



epoch 0  | loss: 19858774012.29878|  0:00:24s
epoch 1  | loss: 16626380329.94698|  0:00:46s
epoch 2  | loss: 13734856187.06506|  0:01:09s
epoch 3  | loss: 11967469119.84578|  0:01:31s
epoch 4  | loss: 11140862823.3253|  0:01:53s
epoch 5  | loss: 10767987757.64819|  0:02:15s
epoch 6  | loss: 10700816300.7229|  0:02:38s
epoch 7  | loss: 10649708791.98072|  0:03:01s
epoch 8  | loss: 10556293503.0747|  0:03:25s
epoch 9  | loss: 10462526060.87711|  0:03:47s
epoch 10 | loss: 10468601938.66024|  0:04:09s
epoch 11 | loss: 10407115454.30362|  0:04:31s
epoch 12 | loss: 10359140638.53494|  0:04:54s
epoch 13 | loss: 10258373208.82891|  0:05:17s
epoch 14 | loss: 10230304208.80964|  0:05:39s
epoch 15 | loss: 10156730491.06505|  0:06:00s
epoch 16 | loss: 10047060157.37831|  0:06:22s
epoch 17 | loss: 10011075499.33494|  0:06:44s
epoch 18 | loss: 9998148906.25542|  0:07:07s
epoch 19 | loss: 9939550527.22891|  0:07:29s
epoch 20 | loss: 9878296922.67951|  0:07:51s
epoch 21 | loss: 9831174171.14216|  0:08

**lambda_sparse**

In [13]:
param_grid = {
    'lambda_sparse': [0, 1e-4, 1e-3, 1e-2],
}

from sklearn.base import BaseEstimator, RegressorMixin

class TabNetWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, n_d=32, n_a=16, n_steps=3, gamma=1.0, lambda_sparse=1e-3):
        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.lambda_sparse = lambda_sparse
        self.model = None

    def fit(self, X, y):
        self.model = TabNetRegressor(
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            lambda_sparse=self.lambda_sparse
        )
        self.model.fit(X, y.reshape(-1, 1))
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

model = TabNetWrapper()

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    verbose=2,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Лучшие параметры: {best_params}')

Fitting 3 folds for each of 4 candidates, totalling 12 fits




epoch 0  | loss: 22344680285.34296|  0:00:15s
epoch 0  | loss: 24039331440.7509|  0:00:15s
epoch 0  | loss: 24061082694.23826|  0:00:15s
epoch 0  | loss: 22317042872.83754|  0:00:15s
epoch 0  | loss: 14529660092.5343|  0:00:15s
epoch 0  | loss: 14521496309.83393|  0:00:15s
epoch 0  | loss: 14537697997.16968|  0:00:15s
epoch 0  | loss: 14505098849.9639|  0:00:15s
epoch 0  | loss: 22330558123.89892|  0:00:16s
epoch 0  | loss: 24048653016.25993|  0:00:15s
epoch 0  | loss: 24051531276.93863|  0:00:16s
epoch 0  | loss: 22334245503.53791|  0:00:16s
epoch 1  | loss: 12608565467.95668|  0:00:31s
epoch 1  | loss: 20353156079.36462|  0:00:31s
epoch 1  | loss: 12550505634.65704|  0:00:32s
epoch 1  | loss: 21934225640.8953|  0:00:32s
epoch 1  | loss: 22016248443.84115|  0:00:32s
epoch 1  | loss: 12411272044.12996|  0:00:32s
epoch 1  | loss: 12567084891.49459|  0:00:32s
epoch 1  | loss: 21936093749.60288|  0:00:32s
epoch 1  | loss: 21857595196.0722|  0:00:32s
epoch 1  | loss: 20287179753.81951|  0:



epoch 0  | loss: 19851063456.38556|  0:00:20s
epoch 1  | loss: 16694416971.25784|  0:00:41s
epoch 2  | loss: 13972356959.61445|  0:01:02s
epoch 3  | loss: 12090253807.96144|  0:01:23s
epoch 4  | loss: 11174662577.04096|  0:01:45s
epoch 5  | loss: 10869676251.91326|  0:02:06s
epoch 6  | loss: 10714546408.86746|  0:02:26s
epoch 7  | loss: 10668124206.26506|  0:02:48s
epoch 8  | loss: 10581056220.22169|  0:03:09s
epoch 9  | loss: 10505884036.93493|  0:03:30s
epoch 10 | loss: 10494014905.98554|  0:03:50s
epoch 11 | loss: 10415766627.0072|  0:04:11s
epoch 12 | loss: 10375458902.36144|  0:04:33s
epoch 13 | loss: 10345457373.3012|  0:04:54s
epoch 14 | loss: 10240338012.2217|  0:05:15s
epoch 15 | loss: 10217753312.53977|  0:05:36s
epoch 16 | loss: 10176488550.09156|  0:05:57s
epoch 17 | loss: 10206419699.66264|  0:06:18s
epoch 18 | loss: 10349568345.44578|  0:06:39s
epoch 19 | loss: 10217385968.26989|  0:06:59s
epoch 20 | loss: 10024965479.3253|  0:07:20s
epoch 21 | loss: 9989253723.29637|  0:

In [None]:
class R2Score(Metric):
    def __init__(self):
        self._name = "r2"
        self._maximize = True  # R² нужно максимизировать

    def __call__(self, y_true, y_pred):
        return r2_score(y_true, y_pred)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tabnet_params = {
    "n_d": 32,
    "n_a": 16,
    "n_steps": 3,
    "gamma": 1.0,
    "lambda_sparse": 1e-3,
    "optimizer_fn": torch.optim.Adam,
    "optimizer_params": dict(lr=2e-2),
    "mask_type": "sparsemax",
    "scheduler_params": dict(
        mode="min",
        patience=5,
        min_lr=1e-5,
        factor=0.9,
    ),
    "scheduler_fn": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "seed": 42,
    "verbose": 10
}

model = TabNetRegressor(**tabnet_params, device_name=device)

model.fit(
    X_train=X_train,
    y_train=y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    eval_name=['train', 'val'],
    eval_metric=['rmse', R2Score, SMAPE],
    max_epochs=50,
    patience=20,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False,
    loss_fn=torch.nn.functional.mse_loss,
)

y_pred = model.predict(X_test)

culc_metrics(y_test, y_pred)



epoch 0  | loss: 19790739692.96751| train_rmse: 134188.25805| train_r2: -0.41325| train_smape: 149.04689| val_rmse: 106700.17219| val_r2: -0.85555| val_smape: 149.19094|  0:00:37s


### Подготовка данных без эмбедингов

In [None]:
scaler = StandardScaler()
num_df = pd.DataFrame(scaler.fit_transform(df[num_columns]), columns=num_columns)

label_columns = []
ohe_columns = []

for column in cat_columns:
    if df[column].nunique() > 10:
        label_columns.append(column)
    else:
        ohe_columns.append(column)

to_bool = list(df[cat_columns].select_dtypes(include=['bool']).columns)
df[['salary_gross', 'employer_accredited_it_employer']] = df[['salary_gross', 'employer_accredited_it_employer']].astype(bool).astype(int)
df[to_bool] = df[to_bool].astype(int)

ohe = OneHotEncoder(sparse_output=False, drop='first')
ohe_encoded = ohe.fit_transform(df[ohe_columns])
ohe_feature_names = ohe.get_feature_names_out(ohe_columns).tolist()
encoded_ohe_data = pd.DataFrame(ohe_encoded, columns=ohe_feature_names)

label_encoder = LabelEncoder()
for col in label_columns:
    df[col] = label_encoder.fit_transform(df[col])

X = pd.concat([df[label_columns], encoded_ohe_data, num_df], axis=1)
y = df['salary']

X_2_train, X_2_test_val, y_2_train, y_2_test_val, = train_test_split(X, y, test_size=0.4, random_state=12345)
X_2_test, X_2_val, y_2_test, y_2_val = train_test_split(X_2_test_val, y_2_test_val, test_size=0.5, random_state=12345)

print(f'Размеры выборок: Обучающая {X_2_train.shape}, Валидационная {X_2_test.shape}, Тестовая {X_2_val.shape}')

Размеры выборок: Обучающая (425714, 69), Валидационная (141905, 69), Тестовая (141905, 69)


### Полносвязная нейронная сеть без эмбедингов

In [None]:
results_2 = {}

architectures = {
    'small': [64, 32],
    'medium': [128, 64, 32],
    'large': [256, 128, 64, 32],
    'wide': [512, 256],
    'deep': [64, 64, 64, 64, 64]
}

for name, arch in architectures.items():
    print(f"\nTraining {name} architecture: {arch}")
    model, history, y_pred, train_time, metrics = build_and_train_model(
        arch, X_2_train, y_2_train, X_2_test, y_2_test
    )

    results_2[name] = {
        'architecture': arch,
        'train_time': train_time,
        'metrics': metrics,
        'epochs_trained': len(history.history['loss'])
    }

    print(f"Training time: {train_time:.2f}s")
    culc_metrics(y_2_test, y_pred)


Training small architecture: [64, 32]
[1m4435/4435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step
Training time: 702.78s
Корень из среднеквадратичной ошибки (RMSE): 71249.22691526295
R² Score: 0.029691776178032314
Средняя абсолютная ошибка (MAE): 41372.06422014223
Средняя абсолютная процентная ошибка (SMAPE): 45.38%
Медианная абсолютная ошибка (MedAE): 33547.609375

Training medium architecture: [128, 64, 32]
[1m4435/4435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
Training time: 1053.81s
Корень из среднеквадратичной ошибки (RMSE): 70796.97617631152
R² Score: 0.041970643708062916
Средняя абсолютная ошибка (MAE): 38817.90827763207
Средняя абсолютная процентная ошибка (SMAPE): 42.65%
Медианная абсолютная ошибка (MedAE): 28399.53125

Training large architecture: [256, 128, 64, 32]
[1m4435/4435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step
Training time: 1138.94s
Корень из среднеквадратичной ошибки (RMSE): 75041.47628239205
R² Score: 

### TabNet без эмбедингов

In [None]:
X_train = X_2_train.to_numpy()
X_val = X_2_val.to_numpy()
X_test = X_2_test.to_numpy()

y_train = y_2_train.to_numpy().reshape(-1, 1)
y_val = y_2_val.to_numpy().reshape(-1, 1)
y_test = y_2_test.to_numpy().reshape(-1, 1)

class SMAPE(Metric):
    def __init__(self):
        self._name = "smape"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        return 100 * np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred)))

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tabnet_params = {
    "n_d": 8,
    "n_a": 8,
    "n_steps": 3,
    "gamma": 1.3,
    "lambda_sparse": 1e-3,
    "optimizer_fn": torch.optim.Adam,
    "optimizer_params": dict(lr=2e-2),
    "mask_type": "sparsemax",
    "scheduler_params": dict(
        mode="min",
        patience=5,
        min_lr=1e-5,
        factor=0.9,
    ),
    "scheduler_fn": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "seed": 42,
    "verbose": 10,
    "device_name": device
}

model = TabNetRegressor(**tabnet_params)

model.fit(
    X_train=X_train,
    y_train=y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    eval_name=['train', 'val'],
    eval_metric=['rmse', 'mae', SMAPE],
    max_epochs=50,
    patience=20,
    batch_size=512,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False,
    loss_fn=torch.nn.functional.mse_loss,
    pin_memory=False
)

y_pred = model.predict(X_test)

culc_metrics(y_test, y_pred)

epoch 0  | loss: 19872069664.19348| train_rmse: 136917.37655| train_mae: 79243.44531| train_smape: 154.26673| val_rmse: 110076.88833| val_mae: 79087.0 | val_smape: 154.32972|  0:00:58s
epoch 10 | loss: 11264595244.16502| train_rmse: 106016.77754| train_mae: 29198.63867| train_smape: 32.39938| val_rmse: 68291.12645| val_mae: 29137.13281| val_smape: 32.44966|  0:10:57s
epoch 20 | loss: 11220968340.13476| train_rmse: 112126.81278| train_mae: 33202.75781| train_smape: 36.70336| val_rmse: 78464.1925| val_mae: 33103.00391| val_smape: 36.7279 |  0:20:47s
epoch 30 | loss: 11206899306.1923| train_rmse: 114423.6865| train_mae: 38514.47266| train_smape: 41.57988| val_rmse: 80557.57692| val_mae: 38373.82812| val_smape: 41.61062|  0:30:39s

Early stopping occurred at epoch 30 with best_epoch = 10 and best_val_smape = 32.44966




Корень из среднеквадратичной ошибки (RMSE): 61374.38162621274
R² Score: 0.28001469373703003
Средняя абсолютная ошибка (MAE): 29087.1015625
Средняя абсолютная процентная ошибка (SMAPE): 32.53%
Медианная абсолютная ошибка (MedAE): 19507.56640625


## Выводы

Лучший результат дает RandomForestRegressor

| Model                               |    RMSE |      R2 |     MAE |   SMAPE (%) |   MedAE |
|:------------------------------------|--------:|--------:|--------:|------------:|--------:|
| RF                                  | 44484.3 |  0.6218 | 21484.4 |       24.32 | 12514.5 |
| DT (с эмбеддингами)                 | 48970.3 |  0.5416 | 25268.4 |       28.27 | 14970.8 |
| TabNet + Embeddings                 | 49171.3 |  0.5379 | 26366.7 |       29.62 | 17047.4 |
| DNN [64,32] + Embeddings            | 50941.3 |  0.504  | 29103.1 |       32.33 | 20343.1 |
| DNN [64,32] + BN + Adam             | 51853.1 |  0.4861 | 28105   |       31.24 | 18523.9 |
| DNN [64,32] + BN + SGD              | 56739.9 |  0.3846 | 27046.5 |       29.85 | 17030.6 |
| DNN [64,32] + BN + Adam + Glorot    | 57458.3 |  0.369  | 28906.6 |       32.1  | 19763   |
| DNN [64,32] + LN + Adam + He        | 57648.2 |  0.3648 | 26941.2 |       30.1  | 17837.7 |
| DNN [64,32] + BN + Adam + He        | 57850.2 |  0.3603 | 28789   |       31.83 | 19358.6 |
| DNN [64,32] + LN + Adam + Glorot    | 57900.8 |  0.3592 | 27254.7 |       30.34 | 17967.6 |
| DNN [64,32] + LN + Adam             | 57955.4 |  0.358  | 27787.1 |       30.88 | 18780.5 |
| DNN [64x5]                          | 58303.6 |  0.3503 | 28460.1 |       31.2  | 18813.3 |
| DNN [512,256]                       | 58451.3 |  0.347  | 29016.8 |       31.54 | 19087.2 |
| DNN [64,32] + LN + SGD + He         | 58823.4 |  0.3386 | 27388.2 |       30.6  | 18350.5 |
| TabNet без эмбеддингов              | 61374.4 |  0.28   | 29087.1 |       32.53 | 19507.6 |
| DNN [128,64,32]                     | 63997.7 |  0.2172 | 29133   |       31.22 | 18446.5 |
| DNN [128, 64, 32] без эмбеддингов   | 70797   |  0.042  | 38817.9 |       42.65 | 28399.5 |
| DNN [64, 32] без эмбеддингов        | 71249.2 |  0.0297 | 41372.1 |       45.38 | 33547.6 |
| DNN Large [256,...] без эмбеддингов | 75041.5 | -0.0763 | 43487.6 |       47.14 | 36524.2 |
| DNN [256,128,64,32]                 | 99553.4 | -0.8944 | 62541.9 |      107.8  | 48945.9 |
