In [None]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd


lgb_X = tr_features.drop(columns=["target"])
lgb_y = tr_features["target"]
lgb_X_test = tst_features

lgb_categorical_features = tr_features.select_dtypes(include=['category', 'object']).columns.tolist()

lgb_X_train, lgb_X_val, lgb_y_train, lgb_y_val = train_test_split(
    lgb_X, lgb_y, test_size=0.2, random_state=42, stratify=lgb_y
)

lgb_train_data = lgb.Dataset(lgb_X_train, label=lgb_y_train, categorical_feature=lgb_categorical_features, free_raw_data=False)
lgb_val_data = lgb.Dataset(lgb_X_val, label=lgb_y_val, categorical_feature=lgb_categorical_features, free_raw_data=False)

lgb_params = {
    "objective": "multiclass",
    "num_class": 8,
    "metric": "None",
    "learning_rate": 0.05,
    "num_leaves": 64,
    "max_depth": 7,
    "verbosity": -1,
    "seed": 42,
}

weights_by_class = {
    0: 1.00,
    1: 0.72,
    2: 0.52,
    3: 0.37,
    4: 0.27,
    5: 0.19,
    6: 0.14,
    7: 0.00,
}


In [None]:
import numpy as np

def feval_wmae_sklearn(y_true, y_pred):
    # y_true, y_pred — оба numpy-массивы одинаковой длины
    weights = np.vectorize(weights_by_class.get)(y_true.astype(int))
    loss = np.mean(weights * np.abs(y_true - y_pred))
    # возвращаем кортеж (имя, значение, is_higher_better)
    return 'wmae', loss, False


reg = lgb.LGBMRegressor(
    objective='regression',
    metric='None',              # отключаем стандартную
    learning_rate=0.05,
    num_leaves=64,
    max_depth=7,
    n_estimators=1000,
    reg_alpha=0.5,
    #reg_lambda=4,
    random_state=42,
)
reg.fit(
    lgb_X_train, lgb_y_train,
    eval_set=[(lgb_X_train, lgb_y_train), (lgb_X_val, lgb_y_val)],
    eval_metric=feval_wmae_sklearn,
    callbacks=[
        lgb.early_stopping(50),
        lgb.log_evaluation(period=100),
    ]
)


y_pred_cont = reg.predict(lgb_X_val)
# Округляем к ближайшему целочисленному классу 0–7
y_pred_cls = np.clip(np.rint(y_pred_cont), 0, 7).astype(int)

def compute_wmae(y_true, y_pred):
    weights = np.vectorize(weights_by_class.get)(y_true.astype(int))
    return np.mean(weights * np.abs(y_true - y_pred))

# Собираем истинные метки в numpy-массив
y_val_np = lgb_y_val.to_numpy()

# Считаем WMAE
wmae_score = compute_wmae(y_val_np, y_pred_cls)
cont_wmae_score = compute_wmae(y_val_np, y_pred_cont)

# Печатаем результат
print(f"📊 Validation WMAE: {wmae_score:.5f}")
print(f"📊 Cont validation WMAE: {cont_wmae_score:.5f}")

In [None]:
test_pred_cont = reg.predict(lgb_X_test)
test_pred_labels = np.clip(np.rint(test_pred_cont), 0, 7).astype(int)

submission = pd.DataFrame({
    "client_num": tst_features["client_num"],
    "target": test_pred_labels
})

submission.to_csv("submission_regression.csv", index=False)
print("Saved submission_regression.csv") 
